/* Copyright 2010-2011 10gen Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Xml; using MongoDB.Bson; namespace MongoDB.Bson.IO { /// /// A static class that represents a JSON scanner. /// public static class JsonScanner { #region public static methods /// /// Gets the next JsonToken from a JsonBuffer. /// /// The buffer. /// The next token. public static JsonToken GetNextToken( JsonBuffer buffer ) { // skip leading whitespace var c = buffer.Read(); while (c != -1 && char.IsWhiteSpace((char) c)) { c = buffer.Read(); } if (c == -1) { return new JsonToken(JsonTokenType.EndOfFile, ""); } // leading character determines token type switch (c) { case '{': return new JsonToken(JsonTokenType.BeginObject, "{"); case '}': return new JsonToken(JsonTokenType.EndObject, "}"); case '[': return new JsonToken(JsonTokenType.BeginArray, "["); case ']': return new JsonToken(JsonTokenType.EndArray, "]"); case '(': return new JsonToken(JsonTokenType.LeftParen, "("); case ')': return new JsonToken(JsonTokenType.RightParen, ")"); case ':': return new JsonToken(JsonTokenType.Colon, ":"); case ',': return new JsonToken(JsonTokenType.Comma, ","); case '\'': case '"': return GetStringToken(buffer, (char) c); case '/': return GetRegularExpressionToken(buffer); default: if (c == '-' || char.IsDigit((char) c)) { return GetNumberToken(buffer, c); } else if (c == '$' || char.IsLetter((char) c)) { return GetUnquotedStringToken(buffer); } else { buffer.UnRead(c); throw new FileFormatException(FormatMessage("Invalid JSON input", buffer, buffer.Position)); } } } #endregion #region private methods private static string FormatMessage( string message, JsonBuffer buffer, int start ) { var length = 20; string snippet; if (buffer.Position + length >= buffer.Length) { snippet = buffer.Substring(start); } else { snippet = buffer.Substring(start, length) + "..."; } return string.Format("{0} '{1}'.", message, snippet); } private static JsonToken GetNumberToken( JsonBuffer buffer, int c // first character ) { // leading digit or '-' has already been read var start = buffer.Position - 1; NumberState state; switch (c) { case '-': state = NumberState.SawLeadingMinus; break; case '0': state = NumberState.SawLeadingZero; break; default: state = NumberState.SawIntegerDigits; break; } var type = JsonTokenType.Int64; // assume integer until proved otherwise while (true) { c = buffer.Read(); switch (state) { case NumberState.SawLeadingMinus: switch (c) { case '0': state = NumberState.SawLeadingZero; break; case 'I': state = NumberState.SawMinusI; break; default: if (char.IsDigit((char) c)) { state = NumberState.SawIntegerDigits; } else { state = NumberState.Invalid; } break; } break; case NumberState.SawLeadingZero: switch (c) { case '.': state = NumberState.SawDecimalPoint; break; case 'e': case 'E': state = NumberState.SawExponentLetter; break; case ',': case '}': case ']': case ')': case -1: state = NumberState.Done; break; default: if (char.IsWhiteSpace((char) c)) { state = NumberState.Done; } else { state = NumberState.Invalid; } break; } break; case NumberState.SawIntegerDigits: switch (c) { case '.': state = NumberState.SawDecimalPoint; break; case 'e': case 'E': state = NumberState.SawExponentLetter; break; case ',': case '}': case ']': case ')': case -1: state = NumberState.Done; break; default: if (char.IsDigit((char) c)) { state = NumberState.SawIntegerDigits; } else if (char.IsWhiteSpace((char) c)) { state = NumberState.Done; } else { state = NumberState.Invalid; } break; } break; case NumberState.SawDecimalPoint: type = JsonTokenType.Double; if (char.IsDigit((char) c)) { state = NumberState.SawFractionDigits; } else { state = NumberState.Invalid; } break; case NumberState.SawFractionDigits: switch (c) { case 'e': case 'E': state = NumberState.SawExponentLetter; break; case ',': case '}': case ']': case ')': case -1: state = NumberState.Done; break; default: if (char.IsDigit((char) c)) { state = NumberState.SawFractionDigits; } else if (char.IsWhiteSpace((char) c)) { state = NumberState.Done; } else { state = NumberState.Invalid; } break; } break; case NumberState.SawExponentLetter: type = JsonTokenType.Double; switch (c) { case '+': case '-': state = NumberState.SawExponentSign; break; default: if (char.IsDigit((char) c)) { state = NumberState.SawExponentDigits; } else { state = NumberState.Invalid; } break; } break; case NumberState.SawExponentSign: if (char.IsDigit((char) c)) { state = NumberState.SawExponentDigits; } else { state = NumberState.Invalid; } break; case NumberState.SawExponentDigits: switch (c) { case ',': case '}': case ']': case ')': case -1: state = NumberState.Done; break; default: if (char.IsDigit((char) c)) { state = NumberState.SawExponentDigits; } else if (char.IsWhiteSpace((char) c)) { state = NumberState.Done; } else { state = NumberState.Invalid; } break; } break; case NumberState.SawMinusI: var sawMinusInfinity = true; var nfinity = new char[] { 'n', 'f', 'i', 'n', 'i', 't', 'y' }; for (var i = 0; i < nfinity.Length; i++) { if (c != nfinity[i]) { sawMinusInfinity = false; break; } c = buffer.Read(); } if (sawMinusInfinity) { type = JsonTokenType.Double; switch (c) { case ',': case '}': case ']': case ')': case -1: state = NumberState.Done; break; default: if (char.IsWhiteSpace((char) c)) { state = NumberState.Done; } else { state = NumberState.Invalid; } break; } } else { state = NumberState.Invalid; } break; } switch (state) { case NumberState.Done: buffer.UnRead(c); var lexeme = buffer.Substring(start, buffer.Position - start); if (type == JsonTokenType.Double) { var value = XmlConvert.ToDouble(lexeme); return new DoubleJsonToken(lexeme, value); } else { var value = XmlConvert.ToInt64(lexeme); if (value < int.MinValue || value > int.MaxValue) { return new Int64JsonToken(lexeme, value); } else { return new Int32JsonToken(lexeme, (int) value); } } case NumberState.Invalid: throw new FileFormatException(FormatMessage("Invalid JSON number", buffer, start)); } } } private static JsonToken GetRegularExpressionToken( JsonBuffer buffer ) { // opening slash has already been read var start = buffer.Position - 1; var state = RegularExpressionState.InPattern; while (true) { var c = buffer.Read(); switch (state) { case RegularExpressionState.InPattern: switch (c) { case '/': state = RegularExpressionState.InOptions; break; case '\\': state = RegularExpressionState.InEscapeSequence; break; default: state = RegularExpressionState.InPattern; break; } break; case RegularExpressionState.InEscapeSequence: state = RegularExpressionState.InPattern; break; case RegularExpressionState.InOptions: switch (c) { case 'i': case 'm': case 'x': case 's': state = RegularExpressionState.InOptions; break; case ',': case '}': case ']': case ')': case -1: state = RegularExpressionState.Done; break; default: if (char.IsWhiteSpace((char) c)) { state = RegularExpressionState.Done; } else { state = RegularExpressionState.Invalid; } break; } break; } switch (state) { case RegularExpressionState.Done: buffer.UnRead(c); var lexeme = buffer.Substring(start, buffer.Position - start); var regex = BsonRegularExpression.Create(lexeme); return new RegularExpressionJsonToken(lexeme, regex); case RegularExpressionState.Invalid: throw new FileFormatException(FormatMessage("Invalid JSON regular expression", buffer, start)); } } } private static JsonToken GetStringToken( JsonBuffer buffer, char quoteCharacter // either single or double quote ) { // opening quote has already been read var start = buffer.Position - 1; var sb = new StringBuilder(); while (true) { var c = buffer.Read(); switch (c) { case '\\': c = buffer.Read(); switch (c) { case '\'': sb.Append('\''); break; case '"': sb.Append('"'); break; case '\\': sb.Append('\\'); break; case '/': sb.Append('/'); break; case 'b': sb.Append('\b'); break; case 'f': sb.Append('\f'); break; case 'n': sb.Append('\n'); break; case 'r': sb.Append('\r'); break; case 't': sb.Append('\t'); break; case 'u': var u1 = buffer.Read(); var u2 = buffer.Read(); var u3 = buffer.Read(); var u4 = buffer.Read(); if (u4 != -1) { var hex = new string(new char[] { (char) u1, (char) u2, (char) u3, (char) u4 }); var n = Convert.ToInt32(hex, 16); sb.Append((char) n); } break; default: if (c != -1) { var message = string.Format("Invalid escape sequence in JSON string '\\{0}'.", (char) c); throw new FileFormatException(message); } break; } break; default: if (c == quoteCharacter) { var lexeme = buffer.Substring(start, buffer.Position - start); return new StringJsonToken(JsonTokenType.String, lexeme, sb.ToString()); } if (c != -1) { sb.Append((char) c); } break; } if (c == -1) { throw new FileFormatException(FormatMessage("End of file in JSON string.", buffer, start)); } } } private static JsonToken GetUnquotedStringToken( JsonBuffer buffer ) { // opening letter or $ has already been read var start = buffer.Position - 1; var c = buffer.Read(); while (c == '$' || char.IsLetterOrDigit((char) c)) { c = buffer.Read(); } buffer.UnRead(c); var lexeme = buffer.Substring(start, buffer.Position - start); return new StringJsonToken(JsonTokenType.UnquotedString, lexeme, lexeme); } #endregion #region nested types private enum NumberState { SawLeadingMinus, SawLeadingZero, SawIntegerDigits, SawDecimalPoint, SawFractionDigits, SawExponentLetter, SawExponentSign, SawExponentDigits, SawMinusI, Done, Invalid } private enum RegularExpressionState { InPattern, InEscapeSequence, InOptions, Done, Invalid } #endregion } }