You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

461 lines
19 KiB

/* Copyright 2010-2011 10gen Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Xml;
using MongoDB.Bson;
namespace MongoDB.Bson.IO {
/// <summary>
/// A static class that represents a JSON scanner.
/// </summary>
public static class JsonScanner {
#region public static methods
/// <summary>
/// Gets the next JsonToken from a JsonBuffer.
/// </summary>
/// <param name="buffer">The buffer.</param>
/// <returns>The next token.</returns>
public static JsonToken GetNextToken(
JsonBuffer buffer
) {
// skip leading whitespace
var c = buffer.Read();
while (c != -1 && char.IsWhiteSpace((char) c)) {
c = buffer.Read();
}
if (c == -1) {
return new JsonToken(JsonTokenType.EndOfFile, "<eof>");
}
// leading character determines token type
switch (c) {
case '{': return new JsonToken(JsonTokenType.BeginObject, "{");
case '}': return new JsonToken(JsonTokenType.EndObject, "}");
case '[': return new JsonToken(JsonTokenType.BeginArray, "[");
case ']': return new JsonToken(JsonTokenType.EndArray, "]");
case '(': return new JsonToken(JsonTokenType.LeftParen, "(");
case ')': return new JsonToken(JsonTokenType.RightParen, ")");
case ':': return new JsonToken(JsonTokenType.Colon, ":");
case ',': return new JsonToken(JsonTokenType.Comma, ",");
case '\'':
case '"':
return GetStringToken(buffer, (char) c);
case '/': return GetRegularExpressionToken(buffer);
default:
if (c == '-' || char.IsDigit((char) c)) {
return GetNumberToken(buffer, c);
} else if (c == '$' || char.IsLetter((char) c)) {
return GetUnquotedStringToken(buffer);
} else {
buffer.UnRead(c);
throw new FileFormatException(FormatMessage("Invalid JSON input", buffer, buffer.Position));
}
}
}
#endregion
#region private methods
private static string FormatMessage(
string message,
JsonBuffer buffer,
int start
) {
var length = 20;
string snippet;
if (buffer.Position + length >= buffer.Length) {
snippet = buffer.Substring(start);
} else {
snippet = buffer.Substring(start, length) + "...";
}
return string.Format("{0} '{1}'.", message, snippet);
}
private static JsonToken GetNumberToken(
JsonBuffer buffer,
int c // first character
) {
// leading digit or '-' has already been read
var start = buffer.Position - 1;
NumberState state;
switch (c) {
case '-': state = NumberState.SawLeadingMinus; break;
case '0': state = NumberState.SawLeadingZero; break;
default: state = NumberState.SawIntegerDigits; break;
}
var type = JsonTokenType.Int64; // assume integer until proved otherwise
while (true) {
c = buffer.Read();
switch (state) {
case NumberState.SawLeadingMinus:
switch (c) {
case '0':
state = NumberState.SawLeadingZero;
break;
case 'I':
state = NumberState.SawMinusI;
break;
default:
if (char.IsDigit((char) c)) {
state = NumberState.SawIntegerDigits;
} else {
state = NumberState.Invalid;
}
break;
}
break;
case NumberState.SawLeadingZero:
switch (c) {
case '.':
state = NumberState.SawDecimalPoint;
break;
case 'e':
case 'E':
state = NumberState.SawExponentLetter;
break;
case ',':
case '}':
case ']':
case ')':
case -1:
state = NumberState.Done;
break;
default:
if (char.IsWhiteSpace((char) c)) {
state = NumberState.Done;
} else {
state = NumberState.Invalid;
}
break;
}
break;
case NumberState.SawIntegerDigits:
switch (c) {
case '.':
state = NumberState.SawDecimalPoint;
break;
case 'e':
case 'E':
state = NumberState.SawExponentLetter;
break;
case ',':
case '}':
case ']':
case ')':
case -1:
state = NumberState.Done;
break;
default:
if (char.IsDigit((char) c)) {
state = NumberState.SawIntegerDigits;
} else if (char.IsWhiteSpace((char) c)) {
state = NumberState.Done;
} else {
state = NumberState.Invalid;
}
break;
}
break;
case NumberState.SawDecimalPoint:
type = JsonTokenType.Double;
if (char.IsDigit((char) c)) {
state = NumberState.SawFractionDigits;
} else {
state = NumberState.Invalid;
}
break;
case NumberState.SawFractionDigits:
switch (c) {
case 'e':
case 'E':
state = NumberState.SawExponentLetter;
break;
case ',':
case '}':
case ']':
case ')':
case -1:
state = NumberState.Done;
break;
default:
if (char.IsDigit((char) c)) {
state = NumberState.SawFractionDigits;
} else if (char.IsWhiteSpace((char) c)) {
state = NumberState.Done;
} else {
state = NumberState.Invalid;
}
break;
}
break;
case NumberState.SawExponentLetter:
type = JsonTokenType.Double;
switch (c) {
case '+':
case '-':
state = NumberState.SawExponentSign;
break;
default:
if (char.IsDigit((char) c)) {
state = NumberState.SawExponentDigits;
} else {
state = NumberState.Invalid;
}
break;
}
break;
case NumberState.SawExponentSign:
if (char.IsDigit((char) c)) {
state = NumberState.SawExponentDigits;
} else {
state = NumberState.Invalid;
}
break;
case NumberState.SawExponentDigits:
switch (c) {
case ',':
case '}':
case ']':
case ')':
case -1:
state = NumberState.Done;
break;
default:
if (char.IsDigit((char) c)) {
state = NumberState.SawExponentDigits;
} else if (char.IsWhiteSpace((char) c)) {
state = NumberState.Done;
} else {
state = NumberState.Invalid;
}
break;
}
break;
case NumberState.SawMinusI:
var sawMinusInfinity = true;
var nfinity = new char[] { 'n', 'f', 'i', 'n', 'i', 't', 'y' };
for (var i = 0; i < nfinity.Length; i++) {
if (c != nfinity[i]) {
sawMinusInfinity = false;
break;
}
c = buffer.Read();
}
if (sawMinusInfinity) {
type = JsonTokenType.Double;
switch (c) {
case ',':
case '}':
case ']':
case ')':
case -1:
state = NumberState.Done;
break;
default:
if (char.IsWhiteSpace((char) c)) {
state = NumberState.Done;
} else {
state = NumberState.Invalid;
}
break;
}
} else {
state = NumberState.Invalid;
}
break;
}
switch (state) {
case NumberState.Done:
buffer.UnRead(c);
var lexeme = buffer.Substring(start, buffer.Position - start);
if (type == JsonTokenType.Double) {
var value = XmlConvert.ToDouble(lexeme);
return new DoubleJsonToken(lexeme, value);
} else {
var value = XmlConvert.ToInt64(lexeme);
if (value < int.MinValue || value > int.MaxValue) {
return new Int64JsonToken(lexeme, value);
} else {
return new Int32JsonToken(lexeme, (int) value);
}
}
case NumberState.Invalid:
throw new FileFormatException(FormatMessage("Invalid JSON number", buffer, start));
}
}
}
private static JsonToken GetRegularExpressionToken(
JsonBuffer buffer
) {
// opening slash has already been read
var start = buffer.Position - 1;
var state = RegularExpressionState.InPattern;
while (true) {
var c = buffer.Read();
switch (state) {
case RegularExpressionState.InPattern:
switch (c) {
case '/': state = RegularExpressionState.InOptions; break;
case '\\': state = RegularExpressionState.InEscapeSequence; break;
default: state = RegularExpressionState.InPattern; break;
}
break;
case RegularExpressionState.InEscapeSequence:
state = RegularExpressionState.InPattern;
break;
case RegularExpressionState.InOptions:
switch (c) {
case 'i':
case 'm':
case 'x':
case 's':
state = RegularExpressionState.InOptions;
break;
case ',':
case '}':
case ']':
case ')':
case -1:
state = RegularExpressionState.Done;
break;
default:
if (char.IsWhiteSpace((char) c)) {
state = RegularExpressionState.Done;
} else {
state = RegularExpressionState.Invalid;
}
break;
}
break;
}
switch (state) {
case RegularExpressionState.Done:
buffer.UnRead(c);
var lexeme = buffer.Substring(start, buffer.Position - start);
var regex = BsonRegularExpression.Create(lexeme);
return new RegularExpressionJsonToken(lexeme, regex);
case RegularExpressionState.Invalid:
throw new FileFormatException(FormatMessage("Invalid JSON regular expression", buffer, start));
}
}
}
private static JsonToken GetStringToken(
JsonBuffer buffer,
char quoteCharacter // either single or double quote
) {
// opening quote has already been read
var start = buffer.Position - 1;
var sb = new StringBuilder();
while (true) {
var c = buffer.Read();
switch (c) {
case '\\':
c = buffer.Read();
switch (c) {
case '\'': sb.Append('\''); break;
case '"': sb.Append('"'); break;
case '\\': sb.Append('\\'); break;
case '/': sb.Append('/'); break;
case 'b': sb.Append('\b'); break;
case 'f': sb.Append('\f'); break;
case 'n': sb.Append('\n'); break;
case 'r': sb.Append('\r'); break;
case 't': sb.Append('\t'); break;
case 'u':
var u1 = buffer.Read();
var u2 = buffer.Read();
var u3 = buffer.Read();
var u4 = buffer.Read();
if (u4 != -1) {
var hex = new string(new char[] { (char) u1, (char) u2, (char) u3, (char) u4 });
var n = Convert.ToInt32(hex, 16);
sb.Append((char) n);
}
break;
default:
if (c != -1) {
var message = string.Format("Invalid escape sequence in JSON string '\\{0}'.", (char) c);
throw new FileFormatException(message);
}
break;
}
break;
default:
if (c == quoteCharacter) {
var lexeme = buffer.Substring(start, buffer.Position - start);
return new StringJsonToken(JsonTokenType.String, lexeme, sb.ToString());
}
if (c != -1) {
sb.Append((char) c);
}
break;
}
if (c == -1) {
throw new FileFormatException(FormatMessage("End of file in JSON string.", buffer, start));
}
}
}
private static JsonToken GetUnquotedStringToken(
JsonBuffer buffer
) {
// opening letter or $ has already been read
var start = buffer.Position - 1;
var c = buffer.Read();
while (c == '$' || char.IsLetterOrDigit((char) c)) {
c = buffer.Read();
}
buffer.UnRead(c);
var lexeme = buffer.Substring(start, buffer.Position - start);
return new StringJsonToken(JsonTokenType.UnquotedString, lexeme, lexeme);
}
#endregion
#region nested types
private enum NumberState {
SawLeadingMinus,
SawLeadingZero,
SawIntegerDigits,
SawDecimalPoint,
SawFractionDigits,
SawExponentLetter,
SawExponentSign,
SawExponentDigits,
SawMinusI,
Done,
Invalid
}
private enum RegularExpressionState {
InPattern,
InEscapeSequence,
InOptions,
Done,
Invalid
}
#endregion
}
}