JSON Token解析 这个章节主要讨论关于对象字段相关词法解析的api。
JSONLexerBase成员函数 这里讲解主要挑选具有代表性的api进行讲解,同时对于极其相似的api不冗余分析,可以参考代码阅读。
Int类型字段解析 当反序列化java
对象遇到整型int.class
字段会调用该方法解析:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 public int scanInt (char expectNext) { matchStat = UNKNOWN; int offset = 0 ; char chLocal = charAt(bp + (offset++)); final boolean quote = chLocal == '"' ; if (quote) { chLocal = charAt(bp + (offset++)); } final boolean negative = chLocal == '-' ; if (negative) { chLocal = charAt(bp + (offset++)); } int value; if (chLocal >= '0' && chLocal <= '9' ) { value = chLocal - '0' ; for (;;) { chLocal = charAt(bp + (offset++)); if (chLocal >= '0' && chLocal <= '9' ) { value = value * 10 + (chLocal - '0' ); } else if (chLocal == '.' ) { matchStat = NOT_MATCH; return 0 ; } else { break ; } } if (value < 0 ) { matchStat = NOT_MATCH; return 0 ; } } else if (chLocal == 'n' && charAt(bp + offset) == 'u' && charAt(bp + offset + 1 ) == 'l' && charAt(bp + offset + 2 ) == 'l' ) { matchStat = VALUE_NULL; value = 0 ; offset += 3 ; chLocal = charAt(bp + offset++); if (quote && chLocal == '"' ) { chLocal = charAt(bp + offset++); } for (;;) { if (chLocal == ',' ) { bp += offset; this .ch = charAt(bp); matchStat = VALUE_NULL; token = JSONToken.COMMA; return value; } else if (chLocal == ']' ) { bp += offset; this .ch = charAt(bp); matchStat = VALUE_NULL; token = JSONToken.RBRACKET; return value; } else if (isWhitespace(chLocal)) { chLocal = charAt(bp + offset++); continue ; } break ; } matchStat = NOT_MATCH; return 0 ; } else { matchStat = NOT_MATCH; return 0 ; } for (;;) { if (chLocal == expectNext) { bp += offset; this .ch = this .charAt(bp); matchStat = VALUE; token = JSONToken.COMMA; return negative ? -value : value; } else { if (isWhitespace(chLocal)) { chLocal = charAt(bp + (offset++)); continue ; } matchStat = NOT_MATCH; return negative ? -value : value; } } }
com.alibaba.fastjson.parser.JSONLexerBase#scanInt(char)
方法考虑了数字加引号的情况,当遇到下列情况认为匹配失败:
扫描遇到的数字遇到标点符号 扫描的数字范围溢出 扫描到的非数字并且不是null 忽略空白字符的情况下,读取数字后结束符和期望expectNext不一致 fastjson
还提供第二种接口,根据token识别数字:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 public final Number integerValue () throws NumberFormatException { long result = 0 ; boolean negative = false ; if (np == -1 ) { np = 0 ; } int i = np, max = np + sp; long limit; long multmin; int digit; char type = ' ' ; switch (charAt(max - 1 )) { case 'L' : max--; type = 'L' ; break ; case 'S' : max--; type = 'S' ; break ; case 'B' : max--; type = 'B' ; break ; default : break ; } if (charAt(np) == '-' ) { negative = true ; limit = Long.MIN_VALUE; i++; } else { limit = -Long.MAX_VALUE; } multmin = MULTMIN_RADIX_TEN; if (i < max) { digit = charAt(i++) - '0' ; result = -digit; } while (i < max) { digit = charAt(i++) - '0' ; if (result < multmin) { return new BigInteger(numberString()); } result *= 10 ; if (result < limit + digit) { return new BigInteger(numberString()); } result -= digit; } if (negative) { if (i > np + 1 ) { if (result >= Integer.MIN_VALUE && type != 'L' ) { if (type == 'S' ) { return (short ) result; } if (type == 'B' ) { return (byte ) result; } return (int ) result; } return result; } else { throw new NumberFormatException(numberString()); } } else { result = -result; if (result <= Integer.MAX_VALUE && type != 'L' ) { if (type == 'S' ) { return (short ) result; } if (type == 'B' ) { return (byte ) result; } return (int ) result; } return result; } }
fastjson
还提供第三种接口,这个接口严格根据字段名进行匹配json
字符串,字段名会自动加上双引号和冒号,格式"key":
:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 public int scanFieldInt (char [] fieldName) { matchStat = UNKNOWN; if (!charArrayCompare(fieldName)) { matchStat = NOT_MATCH_NAME; return 0 ; } int offset = fieldName.length; char chLocal = charAt(bp + (offset++)); final boolean negative = chLocal == '-' ; if (negative) { chLocal = charAt(bp + (offset++)); } int value; if (chLocal >= '0' && chLocal <= '9' ) { value = chLocal - '0' ; for (;;) { chLocal = charAt(bp + (offset++)); if (chLocal >= '0' && chLocal <= '9' ) { value = value * 10 + (chLocal - '0' ); } else if (chLocal == '.' ) { matchStat = NOT_MATCH; return 0 ; } else { break ; } } if (value < 0 || offset > 11 + 3 + fieldName.length) { if (value != Integer.MIN_VALUE || offset != 17 || !negative) { matchStat = NOT_MATCH; return 0 ; } } } else { matchStat = NOT_MATCH; return 0 ; } if (chLocal == ',' ) { bp += offset; this .ch = this .charAt(bp); matchStat = VALUE; token = JSONToken.COMMA; return negative ? -value : value; } if (chLocal == '}' ) { chLocal = charAt(bp + (offset++)); if (chLocal == ',' ) { token = JSONToken.COMMA; bp += offset; this .ch = this .charAt(bp); } else if (chLocal == ']' ) { token = JSONToken.RBRACKET; bp += offset; this .ch = this .charAt(bp); } else if (chLocal == '}' ) { token = JSONToken.RBRACE; bp += offset; this .ch = this .charAt(bp); } else if (chLocal == EOI) { token = JSONToken.EOF; bp += (offset - 1 ); ch = EOI; } else { matchStat = NOT_MATCH; return 0 ; } matchStat = END; } else { matchStat = NOT_MATCH; return 0 ; } return negative ? -value : value; }
Long类型字段解析 Long
字段解析和Int
一样提供3中接口,先看第一种基于字段类型解析:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 public long scanLong (char expectNextChar) { matchStat = UNKNOWN; int offset = 0 ; char chLocal = charAt(bp + (offset++)); final boolean quote = chLocal == '"' ; if (quote) { chLocal = charAt(bp + (offset++)); } final boolean negative = chLocal == '-' ; if (negative) { chLocal = charAt(bp + (offset++)); } long value; if (chLocal >= '0' && chLocal <= '9' ) { value = chLocal - '0' ; for (;;) { chLocal = charAt(bp + (offset++)); if (chLocal >= '0' && chLocal <= '9' ) { value = value * 10 + (chLocal - '0' ); } else if (chLocal == '.' ) { matchStat = NOT_MATCH; return 0 ; } else { break ; } } boolean valid = value >= 0 || (value == -9223372036854775808L && negative); if (!valid) { String val = subString(bp, offset - 1 ); throw new NumberFormatException(val); } } else if (chLocal == 'n' && charAt(bp + offset) == 'u' && charAt(bp + offset + 1 ) == 'l' && charAt(bp + offset + 2 ) == 'l' ) { matchStat = VALUE_NULL; value = 0 ; offset += 3 ; chLocal = charAt(bp + offset++); if (quote && chLocal == '"' ) { chLocal = charAt(bp + offset++); } for (;;) { if (chLocal == ',' ) { bp += offset; this .ch = charAt(bp); matchStat = VALUE_NULL; token = JSONToken.COMMA; return value; } else if (chLocal == ']' ) { bp += offset; this .ch = charAt(bp); matchStat = VALUE_NULL; token = JSONToken.RBRACKET; return value; } else if (isWhitespace(chLocal)) { chLocal = charAt(bp + offset++); continue ; } break ; } matchStat = NOT_MATCH; return 0 ; } else { matchStat = NOT_MATCH; return 0 ; } if (quote) { if (chLocal != '"' ) { matchStat = NOT_MATCH; return 0 ; } else { chLocal = charAt(bp + (offset++)); } } }
因为和Int
比较相似,这里提供第三个基于字段名字匹配实现:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 public long scanFieldLong (char [] fieldName) { matchStat = UNKNOWN; if (!charArrayCompare(fieldName)) { matchStat = NOT_MATCH_NAME; return 0 ; } int offset = fieldName.length; char chLocal = charAt(bp + (offset++)); boolean negative = false ; if (chLocal == '-' ) { chLocal = charAt(bp + (offset++)); negative = true ; } long value; if (chLocal >= '0' && chLocal <= '9' ) { value = chLocal - '0' ; for (;;) { chLocal = charAt(bp + (offset++)); if (chLocal >= '0' && chLocal <= '9' ) { value = value * 10 + (chLocal - '0' ); } else if (chLocal == '.' ) { matchStat = NOT_MATCH; return 0 ; } else { break ; } } boolean valid = offset - fieldName.length < 21 && (value >= 0 || (value == -9223372036854775808L && negative)); if (!valid) { matchStat = NOT_MATCH; return 0 ; } } else { matchStat = NOT_MATCH; return 0 ; } if (chLocal == ',' ) { bp += offset; this .ch = this .charAt(bp); matchStat = VALUE; token = JSONToken.COMMA; return negative ? -value : value; } return negative ? -value : value; }
Float类型字段解析 跟Int
一致的接口,现提供第二种获取float
实现:
1 2 3 4 5 6 7 8 9 10 11 12 13 public float floatValue () { String strVal = numberString(); float floatValue = Float.parseFloat(strVal); if (floatValue == 0 || floatValue == Float.POSITIVE_INFINITY) { char c0 = strVal.charAt(0 ); if (c0 > '0' && c0 <= '9' ) { throw new JSONException("float overflow : " + strVal); } } return floatValue; }
提供根据属性字段名字匹配的源码实现:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 public final float scanFieldFloat (char [] fieldName) { matchStat = UNKNOWN; if (!charArrayCompare(fieldName)) { matchStat = NOT_MATCH_NAME; return 0 ; } int offset = fieldName.length; char chLocal = charAt(bp + (offset++)); final boolean quote = chLocal == '"' ; if (quote) { chLocal = charAt(bp + (offset++)); } boolean negative = chLocal == '-' ; if (negative) { chLocal = charAt(bp + (offset++)); } float value; if (chLocal >= '0' && chLocal <= '9' ) { int intVal = chLocal - '0' ; for (;;) { chLocal = charAt(bp + (offset++)); if (chLocal >= '0' && chLocal <= '9' ) { intVal = intVal * 10 + (chLocal - '0' ); continue ; } else { break ; } } int power = 1 ; boolean small = (chLocal == '.' ); if (small) { chLocal = charAt(bp + (offset++)); if (chLocal >= '0' && chLocal <= '9' ) { intVal = intVal * 10 + (chLocal - '0' ); power = 10 ; for (;;) { chLocal = charAt(bp + (offset++)); if (chLocal >= '0' && chLocal <= '9' ) { intVal = intVal * 10 + (chLocal - '0' ); power *= 10 ; continue ; } else { break ; } } } else { matchStat = NOT_MATCH; return 0 ; } } boolean exp = chLocal == 'e' || chLocal == 'E' ; if (exp) { chLocal = charAt(bp + (offset++)); if (chLocal == '+' || chLocal == '-' ) { chLocal = charAt(bp + (offset++)); } for (;;) { if (chLocal >= '0' && chLocal <= '9' ) { chLocal = charAt(bp + (offset++)); } else { break ; } } } int start, count; if (quote) { if (chLocal != '"' ) { matchStat = NOT_MATCH; return 0 ; } else { chLocal = charAt(bp + (offset++)); } start = bp + fieldName.length + 1 ; count = bp + offset - start - 2 ; } else { start = bp + fieldName.length; count = bp + offset - start - 1 ; } if (!exp && count < 20 ) { value = ((float ) intVal) / power; if (negative) { value = -value; } } else { String text = this .subString(start, count); value = Float.parseFloat(text); } } else if (chLocal == 'n' && charAt(bp + offset) == 'u' && charAt(bp + offset + 1 ) == 'l' && charAt(bp + offset + 2 ) == 'l' ) { matchStat = VALUE_NULL; value = 0 ; offset += 3 ; chLocal = charAt(bp + offset++); if (quote && chLocal == '"' ) { chLocal = charAt(bp + offset++); } for (;;) { if (chLocal == ',' ) { bp += offset; this .ch = charAt(bp); matchStat = VALUE_NULL; token = JSONToken.COMMA; return value; } else if (chLocal == '}' ) { bp += offset; this .ch = charAt(bp); matchStat = VALUE_NULL; token = JSONToken.RBRACE; return value; } else if (isWhitespace(chLocal)) { chLocal = charAt(bp + offset++); continue ; } break ; } matchStat = NOT_MATCH; return 0 ; } else { matchStat = NOT_MATCH; return 0 ; } if (chLocal == ',' ) { bp += offset; this .ch = this .charAt(bp); matchStat = VALUE; token = JSONToken.COMMA; return value; } return value; }
String类型字段解析 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 public String scanString (char expectNextChar) { matchStat = UNKNOWN; int offset = 0 ; char chLocal = charAt(bp + (offset++)); if (chLocal == 'n' ) { if (charAt(bp + offset) == 'u' && charAt(bp + offset + 1 ) == 'l' && charAt(bp + offset + 2 ) == 'l' ) { offset += 3 ; chLocal = charAt(bp + (offset++)); } else { matchStat = NOT_MATCH; return null ; } if (chLocal == expectNextChar) { bp += offset; this .ch = this .charAt(bp); matchStat = VALUE; return null ; } else { matchStat = NOT_MATCH; return null ; } } final String strVal; for (;;) { if (chLocal == '"' ) { int startIndex = bp + offset; int endIndex = indexOf('"' , startIndex); if (endIndex == -1 ) { throw new JSONException("unclosed str" ); } String stringVal = subString(bp + offset, endIndex - startIndex); if (stringVal.indexOf('\\' ) != -1 ) { for (; ; ) { int slashCount = 0 ; for (int i = endIndex - 1 ; i >= 0 ; --i) { if (charAt(i) == '\\' ) { slashCount++; } else { break ; } } if (slashCount % 2 == 0 ) { break ; } endIndex = indexOf('"' , endIndex + 1 ); } int chars_len = endIndex - startIndex; char [] chars = sub_chars(bp + 1 , chars_len); stringVal = readString(chars, chars_len); } offset += (endIndex - startIndex + 1 ); chLocal = charAt(bp + (offset++)); strVal = stringVal; break ; } else if (isWhitespace(chLocal)) { chLocal = charAt(bp + (offset++)); continue ; } else { matchStat = NOT_MATCH; return stringDefaultValue(); } } for (;;) { if (chLocal == expectNextChar) { bp += offset; this .ch = charAt(bp); matchStat = VALUE; return strVal; } else if (isWhitespace(chLocal)) { chLocal = charAt(bp + (offset++)); continue ; } else { matchStat = NOT_MATCH; return strVal; } } }
目前已经分析足够多的此法分析代码,可以先自己分析或者参考下方更详细scanFieldString
实现。
1 public abstract String stringVal () ;
这里提供的stringVal()
需要由子类实现,原因:
在android6.0
和jdk6
版本 获取子字符串会共享外层String
的char[]
会导致String占用内存无法释放(特别是打文本字符串)。 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 public String scanFieldString (char [] fieldName) { matchStat = UNKNOWN; if (!charArrayCompare(fieldName)) { matchStat = NOT_MATCH_NAME; return stringDefaultValue(); } int offset = fieldName.length; char chLocal = charAt(bp + (offset++)); if (chLocal != '"' ) { matchStat = NOT_MATCH; return stringDefaultValue(); } final String strVal; { int startIndex = bp + fieldName.length + 1 ; int endIndex = indexOf('"' , startIndex); if (endIndex == -1 ) { throw new JSONException("unclosed str" ); } int startIndex2 = bp + fieldName.length + 1 ; String stringVal = subString(startIndex2, endIndex - startIndex2); if (stringVal.indexOf('\\' ) != -1 ) { for (;;) { int slashCount = 0 ; for (int i = endIndex - 1 ; i >= 0 ; --i) { if (charAt(i) == '\\' ) { slashCount++; } else { break ; } } if (slashCount % 2 == 0 ) { break ; } endIndex = indexOf('"' , endIndex + 1 ); } int chars_len = endIndex - (bp + fieldName.length + 1 ); char [] chars = sub_chars( bp + fieldName.length + 1 , chars_len); stringVal = readString(chars, chars_len); } offset += (endIndex - (bp + fieldName.length + 1 ) + 1 ); chLocal = charAt(bp + (offset++)); strVal = stringVal; } if (chLocal == ',' ) { bp += offset; this .ch = this .charAt(bp); matchStat = VALUE; return strVal; } if (chLocal == '}' ) { chLocal = charAt(bp + (offset++)); if (chLocal == ',' ) { token = JSONToken.COMMA; bp += offset; this .ch = this .charAt(bp); } else if (chLocal == ']' ) { token = JSONToken.RBRACKET; bp += offset; this .ch = this .charAt(bp); } else if (chLocal == '}' ) { token = JSONToken.RBRACE; bp += offset; this .ch = this .charAt(bp); } else if (chLocal == EOI) { token = JSONToken.EOF; bp += (offset - 1 ); ch = EOI; } else { matchStat = NOT_MATCH; return stringDefaultValue(); } matchStat = END; } else { matchStat = NOT_MATCH; return stringDefaultValue(); } return strVal; }
目前分析的代码其实包括大部分实现了,这里没有给出Decimal
和Double
的实现,它们实现是类似的并且相对简单,主要是提取字符串直接用对应类的构造函数生成对象而已,如果想详细了解可以参考代码中已经添加的详尽注释。
终于要结束词法分析相关api
接口的分析了,这个是词法分析非常重要的基础实现,有继承这个类的两种实现com.alibaba.fastjson.parser.JSONScanner
和com.alibaba.fastjson.parser.JSONReaderScanner
, 这两个类继承主要增加一个优化的措施,后面讲解反序列化实现的时候会对相关重写的方法进行补充。