33// https://tools.ietf.org/html/rfc8259
44
55const std = @import ("index.zig" );
6+ const debug = std .debug ;
67const mem = std .mem ;
78
89const u1 = @IntType (false , 1 );
@@ -86,7 +87,9 @@ pub const Token = struct {
8687// parsing state requires ~40-50 bytes of stack space.
8788//
8889// Conforms strictly to RFC8529.
89- pub const StreamingJsonParser = struct {
90+ //
91+ // For a non-byte based wrapper, consider using TokenStream instead.
92+ pub const StreamingParser = struct {
9093 // Current state
9194 state : State ,
9295 // How many bytes we have counted for the current token
@@ -109,13 +112,13 @@ pub const StreamingJsonParser = struct {
109112 const array_bit = 1 ;
110113 const max_stack_size = @maxValue (u8 );
111114
112- pub fn init () StreamingJsonParser {
113- var p : StreamingJsonParser = undefined ;
115+ pub fn init () StreamingParser {
116+ var p : StreamingParser = undefined ;
114117 p .reset ();
115118 return p ;
116119 }
117120
118- pub fn reset (p : * StreamingJsonParser ) void {
121+ pub fn reset (p : * StreamingParser ) void {
119122 p .state = State .TopLevelBegin ;
120123 p .count = 0 ;
121124 // Set before ever read in main transition function
@@ -175,7 +178,7 @@ pub const StreamingJsonParser = struct {
175178
176179 // Only call this function to generate array/object final state.
177180 pub fn fromInt (x : var ) State {
178- std . debug .assert (x == 0 or x == 1 );
181+ debug .assert (x == 0 or x == 1 );
179182 const T = @TagType (State );
180183 return State (T (x ));
181184 }
@@ -205,7 +208,7 @@ pub const StreamingJsonParser = struct {
205208 // tokens. token2 is always null if token1 is null.
206209 //
207210 // There is currently no error recovery on a bad stream.
208- pub fn feed (p : * StreamingJsonParser , c : u8 , token1 : * ? Token , token2 : * ? Token ) Error ! void {
211+ pub fn feed (p : * StreamingParser , c : u8 , token1 : * ? Token , token2 : * ? Token ) Error ! void {
209212 token1 .* = null ;
210213 token2 .* = null ;
211214 p .count += 1 ;
@@ -217,7 +220,7 @@ pub const StreamingJsonParser = struct {
217220 }
218221
219222 // Perform a single transition on the state machine and return any possible token.
220- fn transition (p : * StreamingJsonParser , c : u8 , token : * ? Token ) Error ! bool {
223+ fn transition (p : * StreamingParser , c : u8 , token : * ? Token ) Error ! bool {
221224 switch (p .state ) {
222225 State .TopLevelBegin = > switch (c ) {
223226 '{' = > {
@@ -852,10 +855,116 @@ pub const StreamingJsonParser = struct {
852855 }
853856};
854857
858+ // A small wrapper over a StreamingParser for full slices. Returns a stream of json Tokens.
859+ pub const TokenStream = struct {
860+ i : usize ,
861+ slice : []const u8 ,
862+ parser : StreamingParser ,
863+ token : ? Token ,
864+
865+ pub fn init (slice : []const u8 ) TokenStream {
866+ return TokenStream {
867+ .i = 0 ,
868+ .slice = slice ,
869+ .parser = StreamingParser .init (),
870+ .token = null ,
871+ };
872+ }
873+
874+ pub fn next (self : * TokenStream ) ! ? Token {
875+ if (self .token ) | token | {
876+ self .token = null ;
877+ return token ;
878+ }
879+
880+ var t1 : ? Token = undefined ;
881+ var t2 : ? Token = undefined ;
882+
883+ while (self .i < self .slice .len ) {
884+ try self .parser .feed (self .slice [self .i ], & t1 , & t2 );
885+ self .i += 1 ;
886+
887+ if (t1 ) | token | {
888+ self .token = t2 ;
889+ return token ;
890+ }
891+ }
892+
893+ if (self .i > self .slice .len ) {
894+ try self .parser .feed (' ' , & t1 , & t2 );
895+ self .i += 1 ;
896+
897+ if (t1 ) | token | {
898+ return token ;
899+ }
900+ }
901+
902+ return null ;
903+ }
904+ };
905+
906+ fn checkNext (p : * TokenStream , id : Token.Id ) void {
907+ const token = ?? (p .next () catch unreachable );
908+ debug .assert (token .id == id );
909+ }
910+
911+ test "token" {
912+ const s =
913+ \\{
914+ \\ "Image": {
915+ \\ "Width": 800,
916+ \\ "Height": 600,
917+ \\ "Title": "View from 15th Floor",
918+ \\ "Thumbnail": {
919+ \\ "Url": "http://www.example.com/image/481989943",
920+ \\ "Height": 125,
921+ \\ "Width": 100
922+ \\ },
923+ \\ "Animated" : false,
924+ \\ "IDs": [116, 943, 234, 38793]
925+ \\ }
926+ \\}
927+ ;
928+
929+ var p = TokenStream .init (s );
930+
931+ checkNext (& p , Token .Id .ObjectBegin );
932+ checkNext (& p , Token .Id .String ); // Image
933+ checkNext (& p , Token .Id .ObjectBegin );
934+ checkNext (& p , Token .Id .String ); // Width
935+ checkNext (& p , Token .Id .Number );
936+ checkNext (& p , Token .Id .String ); // Height
937+ checkNext (& p , Token .Id .Number );
938+ checkNext (& p , Token .Id .String ); // Title
939+ checkNext (& p , Token .Id .String );
940+ checkNext (& p , Token .Id .String ); // Thumbnail
941+ checkNext (& p , Token .Id .ObjectBegin );
942+ checkNext (& p , Token .Id .String ); // Url
943+ checkNext (& p , Token .Id .String );
944+ checkNext (& p , Token .Id .String ); // Height
945+ checkNext (& p , Token .Id .Number );
946+ checkNext (& p , Token .Id .String ); // Width
947+ checkNext (& p , Token .Id .Number );
948+ checkNext (& p , Token .Id .ObjectEnd );
949+ checkNext (& p , Token .Id .String ); // Animated
950+ checkNext (& p , Token .Id .False );
951+ checkNext (& p , Token .Id .String ); // IDs
952+ checkNext (& p , Token .Id .ArrayBegin );
953+ checkNext (& p , Token .Id .Number );
954+ checkNext (& p , Token .Id .Number );
955+ checkNext (& p , Token .Id .Number );
956+ checkNext (& p , Token .Id .Number );
957+ checkNext (& p , Token .Id .ArrayEnd );
958+ checkNext (& p , Token .Id .ObjectEnd );
959+ checkNext (& p , Token .Id .ObjectEnd );
960+
961+ debug .assert ((try p .next ()) == null );
962+ }
963+
855964// Validate a JSON string. This does not limit number precision so a decoder may not necessarily
856965// be able to decode the string even if this returns true.
857966pub fn validate (s : []const u8 ) bool {
858- var p = StreamingJsonParser .init ();
967+ var p = StreamingParser .init ();
859968
860969 for (s ) | c , i | {
861970 var token1 : ? Token = undefined ;
@@ -897,46 +1006,46 @@ pub const Value = union(enum) {
8971006 pub fn dump (self : * const Value ) void {
8981007 switch (self .* ) {
8991008 Value .Null = > {
900- std . debug .warn ("null" );
1009+ debug .warn ("null" );
9011010 },
9021011 Value .Bool = > | inner | {
903- std . debug .warn ("{}" , inner );
1012+ debug .warn ("{}" , inner );
9041013 },
9051014 Value .Integer = > | inner | {
906- std . debug .warn ("{}" , inner );
1015+ debug .warn ("{}" , inner );
9071016 },
9081017 Value .Float = > | inner | {
909- std . debug .warn ("{.5}" , inner );
1018+ debug .warn ("{.5}" , inner );
9101019 },
9111020 Value .String = > | inner | {
912- std . debug .warn ("\" {}\" " , inner );
1021+ debug .warn ("\" {}\" " , inner );
9131022 },
9141023 Value .Array = > | inner | {
9151024 var not_first = false ;
916- std . debug .warn ("[" );
1025+ debug .warn ("[" );
9171026 for (inner .toSliceConst ()) | value | {
9181027 if (not_first ) {
919- std . debug .warn ("," );
1028+ debug .warn ("," );
9201029 }
9211030 not_first = true ;
9221031 value .dump ();
9231032 }
924- std . debug .warn ("]" );
1033+ debug .warn ("]" );
9251034 },
9261035 Value .Object = > | inner | {
9271036 var not_first = false ;
928- std . debug .warn ("{{" );
1037+ debug .warn ("{{" );
9291038 var it = inner .iterator ();
9301039
9311040 while (it .next ()) | entry | {
9321041 if (not_first ) {
933- std . debug .warn ("," );
1042+ debug .warn ("," );
9341043 }
9351044 not_first = true ;
936- std . debug .warn ("\" {}\" :" , entry .key );
1045+ debug .warn ("\" {}\" :" , entry .key );
9371046 entry .value .dump ();
9381047 }
939- std . debug .warn ("}}" );
1048+ debug .warn ("}}" );
9401049 },
9411050 }
9421051 }
@@ -952,67 +1061,67 @@ pub const Value = union(enum) {
9521061 fn dumpIndentLevel (self : * const Value , indent : usize , level : usize ) void {
9531062 switch (self .* ) {
9541063 Value .Null = > {
955- std . debug .warn ("null" );
1064+ debug .warn ("null" );
9561065 },
9571066 Value .Bool = > | inner | {
958- std . debug .warn ("{}" , inner );
1067+ debug .warn ("{}" , inner );
9591068 },
9601069 Value .Integer = > | inner | {
961- std . debug .warn ("{}" , inner );
1070+ debug .warn ("{}" , inner );
9621071 },
9631072 Value .Float = > | inner | {
964- std . debug .warn ("{.5}" , inner );
1073+ debug .warn ("{.5}" , inner );
9651074 },
9661075 Value .String = > | inner | {
967- std . debug .warn ("\" {}\" " , inner );
1076+ debug .warn ("\" {}\" " , inner );
9681077 },
9691078 Value .Array = > | inner | {
9701079 var not_first = false ;
971- std . debug .warn ("[\n " );
1080+ debug .warn ("[\n " );
9721081
9731082 for (inner .toSliceConst ()) | value | {
9741083 if (not_first ) {
975- std . debug .warn (",\n " );
1084+ debug .warn (",\n " );
9761085 }
9771086 not_first = true ;
9781087 padSpace (level + indent );
9791088 value .dumpIndentLevel (indent , level + indent );
9801089 }
981- std . debug .warn ("\n " );
1090+ debug .warn ("\n " );
9821091 padSpace (level );
983- std . debug .warn ("]" );
1092+ debug .warn ("]" );
9841093 },
9851094 Value .Object = > | inner | {
9861095 var not_first = false ;
987- std . debug .warn ("{{\n " );
1096+ debug .warn ("{{\n " );
9881097 var it = inner .iterator ();
9891098
9901099 while (it .next ()) | entry | {
9911100 if (not_first ) {
992- std . debug .warn (",\n " );
1101+ debug .warn (",\n " );
9931102 }
9941103 not_first = true ;
9951104 padSpace (level + indent );
996- std . debug .warn ("\" {}\" : " , entry .key );
1105+ debug .warn ("\" {}\" : " , entry .key );
9971106 entry .value .dumpIndentLevel (indent , level + indent );
9981107 }
999- std . debug .warn ("\n " );
1108+ debug .warn ("\n " );
10001109 padSpace (level );
1001- std . debug .warn ("}}" );
1110+ debug .warn ("}}" );
10021111 },
10031112 }
10041113 }
10051114
10061115 fn padSpace (indent : usize ) void {
10071116 var i : usize = 0 ;
10081117 while (i < indent ) : (i += 1 ) {
1009- std . debug .warn (" " );
1118+ debug .warn (" " );
10101119 }
10111120 }
10121121};
10131122
10141123// A non-stream JSON parser which constructs a tree of Value's.
1015- pub const JsonParser = struct {
1124+ pub const Parser = struct {
10161125 allocator : * Allocator ,
10171126 state : State ,
10181127 copy_strings : bool ,
@@ -1026,61 +1135,35 @@ pub const JsonParser = struct {
10261135 Simple ,
10271136 };
10281137
1029- pub fn init (allocator : * Allocator , copy_strings : bool ) JsonParser {
1030- return JsonParser {
1138+ pub fn init (allocator : * Allocator , copy_strings : bool ) Parser {
1139+ return Parser {
10311140 .allocator = allocator ,
10321141 .state = State .Simple ,
10331142 .copy_strings = copy_strings ,
10341143 .stack = ArrayList (Value ).init (allocator ),
10351144 };
10361145 }
10371146
1038- pub fn deinit (p : * JsonParser ) void {
1147+ pub fn deinit (p : * Parser ) void {
10391148 p .stack .deinit ();
10401149 }
10411150
1042- pub fn reset (p : * JsonParser ) void {
1151+ pub fn reset (p : * Parser ) void {
10431152 p .state = State .Simple ;
10441153 p .stack .shrink (0 );
10451154 }
10461155
1047- pub fn parse (p : * JsonParser , input : []const u8 ) ! ValueTree {
1048- var mp = StreamingJsonParser .init ();
1156+ pub fn parse (p : * Parser , input : []const u8 ) ! ValueTree {
1157+ var s = TokenStream .init (input );
10491158
10501159 var arena = ArenaAllocator .init (p .allocator );
10511160 errdefer arena .deinit ();
10521161
1053- for (input ) | c , i | {
1054- var mt1 : ? Token = undefined ;
1055- var mt2 : ? Token = undefined ;
1056-
1057- try mp .feed (c , & mt1 , & mt2 );
1058- if (mt1 ) | t1 | {
1059- try p .transition (& arena .allocator , input , i , t1 );
1060-
1061- if (mt2 ) | t2 | {
1062- try p .transition (& arena .allocator , input , i , t2 );
1063- }
1064- }
1162+ while (try s .next ()) | token | {
1163+ try p .transition (& arena .allocator , input , s .i - 1 , token );
10651164 }
10661165
1067- // Handle top-level lonely number values.
1068- {
1069- const i = input .len ;
1070- var mt1 : ? Token = undefined ;
1071- var mt2 : ? Token = undefined ;
1072-
1073- try mp .feed (' ' , & mt1 , & mt2 );
1074- if (mt1 ) | t1 | {
1075- try p .transition (& arena .allocator , input , i , t1 );
1076- }
1077- }
1078-
1079- if (! mp .complete ) {
1080- return error .IncompleteJsonInput ;
1081- }
1082-
1083- std .debug .assert (p .stack .len == 1 );
1166+ debug .assert (p .stack .len == 1 );
10841167
10851168 return ValueTree {
10861169 .arena = arena ,
@@ -1090,7 +1173,7 @@ pub const JsonParser = struct {
10901173
10911174 // Even though p.allocator exists, we take an explicit allocator so that allocation state
10921175 // can be cleaned up on error correctly during a `parse` on call.
1093- fn transition (p : * JsonParser , allocator : * Allocator , input : []const u8 , i : usize , token : * const Token ) ! void {
1176+ fn transition (p : * Parser , allocator : * Allocator , input : []const u8 , i : usize , token : * const Token ) ! void {
10941177 switch (p .state ) {
10951178 State .ObjectKey = > switch (token .id ) {
10961179 Token .Id .ObjectEnd = > {
@@ -1223,7 +1306,7 @@ pub const JsonParser = struct {
12231306 }
12241307 }
12251308
1226- fn pushToParent (p : * JsonParser , value : * const Value ) ! void {
1309+ fn pushToParent (p : * Parser , value : * const Value ) ! void {
12271310 switch (p .stack .at (p .stack .len - 1 )) {
12281311 // Object Parent -> [ ..., object, <key>, value ]
12291312 Value .String = > | key | {
@@ -1244,25 +1327,23 @@ pub const JsonParser = struct {
12441327 }
12451328 }
12461329
1247- fn parseString (p : * JsonParser , allocator : * Allocator , token : * const Token , input : []const u8 , i : usize ) ! Value {
1330+ fn parseString (p : * Parser , allocator : * Allocator , token : * const Token , input : []const u8 , i : usize ) ! Value {
12481331 // TODO: We don't strictly have to copy values which do not contain any escape
12491332 // characters if flagged with the option.
12501333 const slice = token .slice (input , i );
12511334 return Value { .String = try mem .dupe (p .allocator , u8 , slice ) };
12521335 }
12531336
1254- fn parseNumber (p : * JsonParser , token : * const Token , input : []const u8 , i : usize ) ! Value {
1337+ fn parseNumber (p : * Parser , token : * const Token , input : []const u8 , i : usize ) ! Value {
12551338 return if (token .number_is_integer )
12561339 Value { .Integer = try std .fmt .parseInt (i64 , token .slice (input , i ), 10 ) }
12571340 else
12581341 @panic ("TODO: fmt.parseFloat not yet implemented" );
12591342 }
12601343};
12611344
1262- const debug = std .debug ;
1263-
12641345test "json parser dynamic" {
1265- var p = JsonParser .init (std . debug .global_allocator , false );
1346+ var p = Parser .init (debug .global_allocator , false );
12661347 defer p .deinit ();
12671348
12681349 const s =
0 commit comments