You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

399 lines
12 KiB

4 years ago
  1. #if MYSQL_6_9
  2. // Copyright (c) 2009-2010 Sun Microsystems, Inc.
  3. //
  4. // MySQL Connector/NET is licensed under the terms of the GPLv2
  5. // <http://www.gnu.org/licenses/old-licenses/gpl-2.0.html>, like most
  6. // MySQL Connectors. There are special exceptions to the terms and
  7. // conditions of the GPLv2 as it is applied to this software, see the
  8. // FLOSS License Exception
  9. // <http://www.mysql.com/about/legal/licensing/foss-exception.html>.
  10. //
  11. // This program is free software; you can redistribute it and/or modify
  12. // it under the terms of the GNU General Public License as published
  13. // by the Free Software Foundation; version 2 of the License.
  14. //
  15. // This program is distributed in the hope that it will be useful, but
  16. // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  17. // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  18. // for more details.
  19. //
  20. // You should have received a copy of the GNU General Public License along
  21. // with this program; if not, write to the Free Software Foundation, Inc.,
  22. // 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  23. using System;
  24. using System.Collections.Generic;
  25. using System.Text;
  26. using System.IO;
  27. using System.Diagnostics;
  28. using System.Collections;
  29. using Externals.MySql.Data.MySqlClient.Properties;
  30. namespace Externals.MySql.Data.Common
  31. {
  32. internal class QueryNormalizer
  33. {
  34. private static List<string> keywords = new List<string>();
  35. private List<Token> tokens = new List<Token>();
  36. private int pos;
  37. private string fullSql;
  38. private string queryType;
  39. static QueryNormalizer()
  40. {
  41. keywords.AddRange(Resources.Keywords);
  42. }
  43. public string QueryType
  44. {
  45. get { return queryType; }
  46. }
  47. public string Normalize(string sql)
  48. {
  49. tokens.Clear();
  50. StringBuilder newSql = new StringBuilder();
  51. fullSql = sql;
  52. TokenizeSql(sql);
  53. DetermineStatementType(tokens);
  54. ProcessMathSymbols(tokens);
  55. CollapseValueLists(tokens);
  56. CollapseInLists(tokens);
  57. CollapseWhitespace(tokens);
  58. foreach (Token t in tokens)
  59. if (t.Output)
  60. newSql.Append(t.Text);
  61. return newSql.ToString();
  62. }
  63. private void DetermineStatementType(List<Token> tok)
  64. {
  65. foreach (Token t in tok)
  66. {
  67. if (t.Type == TokenType.Keyword)
  68. {
  69. queryType = t.Text.ToUpperInvariant();
  70. //string s = t.Text.ToLowerInvariant();
  71. //if (s == "select")
  72. // queryType = "SELECT";
  73. //else if (s == "update" || s == "insert")
  74. // queryType = "UPSERT";
  75. //else
  76. // queryType = "OTHER";
  77. break;
  78. }
  79. }
  80. }
  81. /// <summary>
  82. /// Mark - or + signs that are unary ops as no output
  83. /// </summary>
  84. /// <param name="tok"></param>
  85. private void ProcessMathSymbols(List<Token> tok)
  86. {
  87. Token lastToken = null;
  88. foreach (Token t in tok)
  89. {
  90. if (t.Type == TokenType.Symbol &&
  91. (t.Text == "-" || t.Text == "+"))
  92. {
  93. if (lastToken != null &&
  94. lastToken.Type != TokenType.Number &&
  95. lastToken.Type != TokenType.Identifier &&
  96. (lastToken.Type != TokenType.Symbol || lastToken.Text != ")"))
  97. t.Output = false;
  98. }
  99. if (t.IsRealToken)
  100. lastToken = t;
  101. }
  102. }
  103. private void CollapseWhitespace(List<Token> tok)
  104. {
  105. Token lastToken = null;
  106. foreach (Token t in tok)
  107. {
  108. if (t.Output &&
  109. t.Type == TokenType.Whitespace &&
  110. lastToken != null &&
  111. lastToken.Type == TokenType.Whitespace)
  112. {
  113. t.Output = false;
  114. }
  115. if (t.Output)
  116. lastToken = t;
  117. }
  118. }
  119. private void CollapseValueLists(List<Token> tok)
  120. {
  121. int pos = -1;
  122. while (++pos < tok.Count)
  123. {
  124. Token t = tok[pos];
  125. if (t.Type != TokenType.Keyword) continue;
  126. if (!t.Text.StartsWith("VALUE", StringComparison.OrdinalIgnoreCase)) continue;
  127. CollapseValueList(tok, ref pos);
  128. }
  129. }
  130. private void CollapseValueList(List<Token> tok, ref int pos)
  131. {
  132. List<int> parenIndices = new List<int>();
  133. // this while loop will find all closing parens in this value list
  134. while (true)
  135. {
  136. // find the close ')'
  137. while (++pos < tok.Count)
  138. {
  139. if (tok[pos].Type == TokenType.Symbol && tok[pos].Text == ")")
  140. break;
  141. if (pos == tok.Count - 1)
  142. break;
  143. }
  144. parenIndices.Add(pos);
  145. // now find the next "real" token
  146. while (++pos < tok.Count)
  147. if (tok[pos].IsRealToken) break;
  148. if (pos == tok.Count) break;
  149. if (tok[pos].Text != ",")
  150. {
  151. pos--;
  152. break;
  153. }
  154. }
  155. // if we only have 1 value then we don't collapse
  156. if (parenIndices.Count < 2) return;
  157. int index = parenIndices[0];
  158. tok[++index] = new Token(TokenType.Whitespace, " ");
  159. tok[++index] = new Token(TokenType.Comment, "/* , ... */");
  160. index++;
  161. // now mark all the other tokens as no output
  162. while (index <= parenIndices[parenIndices.Count - 1])
  163. tok[index++].Output = false;
  164. }
  165. private void CollapseInLists(List<Token> tok)
  166. {
  167. int pos = -1;
  168. while (++pos < tok.Count)
  169. {
  170. Token t = tok[pos];
  171. if (t.Type != TokenType.Keyword) continue;
  172. if (!(t.Text == "IN")) continue;
  173. CollapseInList(tok, ref pos);
  174. }
  175. }
  176. private Token GetNextRealToken(List<Token> tok, ref int pos)
  177. {
  178. while (++pos < tok.Count)
  179. {
  180. if (tok[pos].IsRealToken) return tok[pos];
  181. }
  182. return null;
  183. }
  184. private void CollapseInList(List<Token> tok, ref int pos)
  185. {
  186. Token t = GetNextRealToken(tok, ref pos);
  187. // Debug.Assert(t.Text == "(");
  188. if (t == null)
  189. return;
  190. // if the first token is a keyword then we likely have a
  191. // SELECT .. IN (SELECT ...)
  192. t = GetNextRealToken(tok, ref pos);
  193. if (t == null || t.Type == TokenType.Keyword) return;
  194. int start = pos;
  195. // first find all the tokens that make up the in list
  196. while (++pos < tok.Count)
  197. {
  198. t = tok[pos];
  199. if (t.Type == TokenType.CommandComment) return;
  200. if (!t.IsRealToken) continue;
  201. if (t.Text == "(") return;
  202. if (t.Text == ")") break;
  203. }
  204. int stop = pos;
  205. for (int i = stop; i > start; i--)
  206. tok.RemoveAt(i);
  207. tok.Insert(++start, new Token(TokenType.Whitespace, " "));
  208. tok.Insert(++start, new Token(TokenType.Comment, "/* , ... */"));
  209. tok.Insert(++start, new Token(TokenType.Whitespace, " "));
  210. tok.Insert(++start, new Token(TokenType.Symbol, ")"));
  211. }
  212. private void TokenizeSql(string sql)
  213. {
  214. pos = 0;
  215. while (pos < sql.Length)
  216. {
  217. char c = sql[pos];
  218. if (LetterStartsComment(c) && ConsumeComment())
  219. continue;
  220. if (Char.IsWhiteSpace(c))
  221. ConsumeWhitespace();
  222. else if (c == '\'' || c == '\"' || c == '`')
  223. ConsumeQuotedToken(c);
  224. else if (!IsSpecialCharacter(c))
  225. ConsumeUnquotedToken();
  226. else
  227. ConsumeSymbol();
  228. }
  229. }
  230. private bool LetterStartsComment(char c)
  231. {
  232. return c == '#' || c == '/' || c == '-';
  233. }
  234. private bool IsSpecialCharacter(char c)
  235. {
  236. if (Char.IsLetterOrDigit(c) ||
  237. c == '$' || c == '_' || c == '.') return false;
  238. return true;
  239. }
  240. private bool ConsumeComment()
  241. {
  242. char c = fullSql[pos];
  243. // make sure the comment starts correctly
  244. if (c == '/' && ((pos + 1) >= fullSql.Length || fullSql[pos + 1] != '*')) return false;
  245. if (c == '-' && ((pos + 2) >= fullSql.Length || fullSql[pos + 1] != '-' || fullSql[pos + 2] != ' ')) return false;
  246. string endingPattern = "\n";
  247. if (c == '/')
  248. endingPattern = "*/";
  249. int startingIndex = pos;
  250. int index = fullSql.IndexOf(endingPattern, pos);
  251. if (index == -1)
  252. index = fullSql.Length - 1;
  253. else
  254. index += endingPattern.Length;
  255. string comment = fullSql.Substring(pos, index - pos);
  256. if (comment.StartsWith("/*!", StringComparison.Ordinal))
  257. tokens.Add(new Token(TokenType.CommandComment, comment));
  258. pos = index;
  259. return true;
  260. }
  261. private void ConsumeSymbol()
  262. {
  263. char c = fullSql[pos++];
  264. tokens.Add(new Token(TokenType.Symbol, c.ToString()));
  265. }
  266. private void ConsumeQuotedToken(char c)
  267. {
  268. bool escaped = false;
  269. int start = pos;
  270. pos++;
  271. while (pos < fullSql.Length)
  272. {
  273. char x = fullSql[pos];
  274. if (x == c && !escaped) break;
  275. if (escaped)
  276. escaped = false;
  277. else if (x == '\\')
  278. escaped = true;
  279. pos++;
  280. }
  281. pos++;
  282. if (c == '\'')
  283. tokens.Add(new Token(TokenType.String, "?"));
  284. else
  285. tokens.Add(new Token(TokenType.Identifier, fullSql.Substring(start, pos - start)));
  286. }
  287. private void ConsumeUnquotedToken()
  288. {
  289. int startPos = pos;
  290. while (pos < fullSql.Length && !IsSpecialCharacter(fullSql[pos]))
  291. pos++;
  292. string word = fullSql.Substring(startPos, pos - startPos);
  293. double v;
  294. if (Double.TryParse(word, out v))
  295. tokens.Add(new Token(TokenType.Number, "?"));
  296. else
  297. {
  298. Token t = new Token(TokenType.Identifier, word);
  299. if (IsKeyword(word))
  300. {
  301. t.Type = TokenType.Keyword;
  302. t.Text = t.Text.ToUpperInvariant();
  303. }
  304. tokens.Add(t);
  305. }
  306. }
  307. private void ConsumeWhitespace()
  308. {
  309. tokens.Add(new Token(TokenType.Whitespace, " "));
  310. while (pos < fullSql.Length && Char.IsWhiteSpace(fullSql[pos]))
  311. pos++;
  312. }
  313. private bool IsKeyword(string word)
  314. {
  315. return keywords.Contains(word.ToUpperInvariant());
  316. }
  317. }
  318. internal class Token
  319. {
  320. public TokenType Type;
  321. public string Text;
  322. public bool Output;
  323. public Token(TokenType type, string text)
  324. {
  325. Type = type;
  326. Text = text;
  327. Output = true;
  328. }
  329. public bool IsRealToken
  330. {
  331. get
  332. {
  333. return Type != TokenType.Comment &&
  334. Type != TokenType.CommandComment &&
  335. Type != TokenType.Whitespace &&
  336. Output;
  337. }
  338. }
  339. }
  340. internal enum TokenType
  341. {
  342. Keyword,
  343. String,
  344. Number,
  345. Symbol,
  346. Identifier,
  347. Comment,
  348. CommandComment,
  349. Whitespace
  350. }
  351. }
  352. #endif