JSON Token解析
JSONLexerBase
定义并实现了json
串实现解析机制的基础,在理解后面反序列化之前,我们先来看看并理解重要的属性:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
| protected int token;
protected int pos; protected int features;
protected char ch;
protected int bp;
protected int eofPos;
protected char[] sbuf;
protected int sp;
protected int np;
|
JSONLexerBase成员函数
在开始分析词法分析实现过程中,我发现中解析存在大量重复代码实现或极其类似实现,重复代码主要解决类似c++内联调用,极其相似代码实现我会挑选有代表性的来说明(一般实现较为复杂),没有说明的成员函数可以参考代码注释。
推断token类型
fastjson
token类型推断当前json
字符串是那种类型的token, 比如是字符串、花括号和逗号等等。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
| public final void nextToken() { sp = 0;
for (;;) { pos = bp;
if (ch == '/') { skipComment(); continue; }
if (ch == '"') { scanString(); return; }
if (ch == ',') { next(); token = COMMA; return; }
if (ch >= '0' && ch <= '9') { scanNumber(); return; }
if (ch == '-') { scanNumber(); return; }
switch (ch) { case '\'': if (!isEnabled(Feature.AllowSingleQuotes)) { throw new JSONException("Feature.AllowSingleQuotes is false"); } scanStringSingleQuote(); return; case ' ': case '\t': case '\b': case '\f': case '\n': case '\r': next(); break; case 't': scanTrue(); return; case 'f': scanFalse(); return; case 'n': scanNullOrNew(); return; case 'T': case 'N': case 'S': case 'u': scanIdent(); return; case '(': next(); token = LPAREN; return; case ')': next(); token = RPAREN; return; case '[': next(); token = LBRACKET; return; case ']': next(); token = RBRACKET; return; case '{': next(); token = LBRACE; return; case '}': next(); token = RBRACE; return; case ':': next(); token = COLON; return; case ';': next(); token = SEMI; return; case '.': next(); token = DOT; return; case '+': next(); scanNumber(); return; case 'x': scanHex(); return; default: if (isEOF()) { if (token == EOF) { throw new JSONException("EOF error"); }
token = EOF; pos = bp = eofPos; } else { if (ch <= 31 || ch == 127) { next(); break; }
lexError("illegal.char", String.valueOf((int) ch)); next(); }
return; } }
}
|
跳过注释
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
| protected void skipComment() { next(); if (ch == '/') { for (;;) { next(); if (ch == '\n') { next(); return; } else if (ch == EOI) { return; } } } else if (ch == '*') { next(); for (; ch != EOI;) { if (ch == '*') { next(); if (ch == '/') { next(); return; } else { continue; } } next(); } } else { throw new JSONException("invalid comment"); } }
|
解析注释主要分为2中,支持//
或者 /* */
注释格式。
扫描字符串
当解析json
字符串是"
时,会调用扫描字符串方法。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
| public final void scanString() { np = bp; hasSpecial = false; char ch; for (;;) {
ch = next();
if (ch == '\"') { break; }
if (ch == EOI) { if (!isEOF()) { putChar((char) EOI); continue; } throw new JSONException("unclosed string : " + ch); }
if (ch == '\\') { if (!hasSpecial) { hasSpecial = true;
if (sp >= sbuf.length) { int newCapcity = sbuf.length * 2; if (sp > newCapcity) { newCapcity = sp; } char[] newsbuf = new char[newCapcity]; System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length); sbuf = newsbuf; }
copyTo(np + 1, sp, sbuf); }
ch = next();
switch (ch) { case '0': putChar('\0'); break; case '1': putChar('\1'); break; case '2': putChar('\2'); break; case '3': putChar('\3'); break; case '4': putChar('\4'); break; case '5': putChar('\5'); break; case '6': putChar('\6'); break; case '7': putChar('\7'); break; case 'b': putChar('\b'); break; case 't': putChar('\t'); break; case 'n': putChar('\n'); break; case 'v': putChar('\u000B'); break; case 'f': case 'F': putChar('\f'); break; case 'r': putChar('\r'); break; case '"': putChar('"'); break; case '\'': putChar('\''); break; case '/': putChar('/'); break; case '\\': putChar('\\'); break; case 'x': char x1 = ch = next(); char x2 = ch = next();
int x_val = digits[x1] * 16 + digits[x2]; char x_char = (char) x_val; putChar(x_char); break; case 'u': char u1 = ch = next(); char u2 = ch = next(); char u3 = ch = next(); char u4 = ch = next(); int val = Integer.parseInt(new String(new char[] { u1, u2, u3, u4 }), 16); putChar((char) val); break; default: this.ch = ch; throw new JSONException("unclosed string : " + ch); } continue; }
if (!hasSpecial) { sp++; continue; }
if (sp == sbuf.length) { putChar(ch); } else { sbuf[sp++] = ch; } }
token = JSONToken.LITERAL_STRING; this.ch = next(); }
|
解析到字符串的时候会写入buffer。
扫描数字类型
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
| public final void scanNumber() { np = bp;
if (ch == '-') { sp++; next(); }
for (;;) { if (ch >= '0' && ch <= '9') { sp++; } else { break; } next(); }
boolean isDouble = false;
if (ch == '.') { sp++; next(); isDouble = true;
for (;;) { if (ch >= '0' && ch <= '9') { sp++; } else { break; } next(); } }
if (ch == 'L') { sp++; next(); } else if (ch == 'S') { sp++; next(); } else if (ch == 'B') { sp++; next(); } else if (ch == 'F') { sp++; next(); isDouble = true; } else if (ch == 'D') { sp++; next(); isDouble = true; } else if (ch == 'e' || ch == 'E') {
sp++; next();
if (ch == '+' || ch == '-') { sp++; next(); }
for (;;) { if (ch >= '0' && ch <= '9') { sp++; } else { break; } next(); }
if (ch == 'D' || ch == 'F') { sp++; next(); }
isDouble = true; }
if (isDouble) { token = JSONToken.LITERAL_FLOAT; } else { token = JSONToken.LITERAL_INT; } }
|
扫描Boolean
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
| public final void scanTrue() { if (ch != 't') { throw new JSONException("error parse true"); } next();
if (ch != 'r') { throw new JSONException("error parse true"); } next();
if (ch != 'u') { throw new JSONException("error parse true"); } next();
if (ch != 'e') { throw new JSONException("error parse true"); } next();
if (ch == ' ' || ch == ',' || ch == '}' || ch == ']' || ch == '\n' || ch == '\r' || ch == '\t' || ch == EOI || ch == '\f' || ch == '\b' || ch == ':' || ch == '/') { token = JSONToken.TRUE; } else { throw new JSONException("scan true error"); } }
|
扫描标识符
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
| public final void scanIdent() { np = bp - 1; hasSpecial = false;
for (;;) { sp++;
next(); if (Character.isLetterOrDigit(ch)) { continue; }
String ident = stringVal();
if ("null".equalsIgnoreCase(ident)) { token = JSONToken.NULL; } else if ("new".equals(ident)) { token = JSONToken.NEW; } else if ("true".equals(ident)) { token = JSONToken.TRUE; } else if ("false".equals(ident)) { token = JSONToken.FALSE; } else if ("undefined".equals(ident)) { token = JSONToken.UNDEFINED; } else if ("Set".equals(ident)) { token = JSONToken.SET; } else if ("TreeSet".equals(ident)) { token = JSONToken.TREE_SET; } else { token = JSONToken.IDENTIFIER; } return; } }
|
扫描十六进制数
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
| public final void scanHex() { if (ch != 'x') { throw new JSONException("illegal state. " + ch); } next(); if (ch != '\'') { throw new JSONException("illegal state. " + ch); }
np = bp; next();
for (int i = 0;;++i) { char ch = next(); if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F')) { sp++; continue; } else if (ch == '\'') { sp++; next(); break; } else { throw new JSONException("illegal state. " + ch); } } token = JSONToken.HEX; }
|
根据期望字符扫描token
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
| public final void nextToken(int expect) { sp = 0;
for (;;) {
switch (expect) { case JSONToken.LBRACE: if (ch == '{') { token = JSONToken.LBRACE; next(); return; } if (ch == '[') { token = JSONToken.LBRACKET; next(); return; } break; case JSONToken.COMMA: if (ch == ',') { token = JSONToken.COMMA; next(); return; }
if (ch == '}') { token = JSONToken.RBRACE; next(); return; }
if (ch == ']') { token = JSONToken.RBRACKET; next(); return; }
if (ch == EOI) { token = JSONToken.EOF; return; } break; case JSONToken.LITERAL_INT: if (ch >= '0' && ch <= '9') { pos = bp; scanNumber(); return; }
if (ch == '"') { pos = bp; scanString(); return; }
if (ch == '[') { token = JSONToken.LBRACKET; next(); return; }
if (ch == '{') { token = JSONToken.LBRACE; next(); return; }
break; case JSONToken.LITERAL_STRING: if (ch == '"') { pos = bp; scanString(); return; }
if (ch >= '0' && ch <= '9') { pos = bp; scanNumber(); return; }
if (ch == '[') { token = JSONToken.LBRACKET; next(); return; }
if (ch == '{') { token = JSONToken.LBRACE; next(); return; } break; case JSONToken.LBRACKET: if (ch == '[') { token = JSONToken.LBRACKET; next(); return; }
if (ch == '{') { token = JSONToken.LBRACE; next(); return; } break; case JSONToken.RBRACKET: if (ch == ']') { token = JSONToken.RBRACKET; next(); return; } case JSONToken.EOF: if (ch == EOI) { token = JSONToken.EOF; return; } break; case JSONToken.IDENTIFIER: nextIdent(); return; default: break; }
if (ch == ' ' || ch == '\n' || ch == '\r' || ch == '\t' || ch == '\f' || ch == '\b') { next(); continue; }
nextToken(); break; } }
|
这个方法主要是根据期望的字符expect,判定expect对应的token, 接下来主要分析解析对象字段的相关api实现。