Skip to content

Commit e7f141b

Browse files
tiehuisandrewrk
authored andcommittedJun 6, 2018
Add json.TokenStream (#1062)
This hides some of the low-level parsing details from the StreamingParser. These don't need to be known when parsing a complete slice at once (which is we can usually do). Also, remove `Json` from Parser names. The namespace `json` is sufficient.
·
0.15.20.3.0
1 parent f389e53 commit e7f141b

File tree

1 file changed

+159
-78
lines changed

1 file changed

+159
-78
lines changed
 

‎std/json.zig‎

Lines changed: 159 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// https://tools.ietf.org/html/rfc8259
44

55
const std = @import("index.zig");
6+
const debug = std.debug;
67
const mem = std.mem;
78

89
const u1 = @IntType(false, 1);
@@ -86,7 +87,9 @@ pub const Token = struct {
8687
// parsing state requires ~40-50 bytes of stack space.
8788
//
8889
// Conforms strictly to RFC8529.
89-
pub const StreamingJsonParser = struct {
90+
//
91+
// For a non-byte based wrapper, consider using TokenStream instead.
92+
pub const StreamingParser = struct {
9093
// Current state
9194
state: State,
9295
// How many bytes we have counted for the current token
@@ -109,13 +112,13 @@ pub const StreamingJsonParser = struct {
109112
const array_bit = 1;
110113
const max_stack_size = @maxValue(u8);
111114

112-
pub fn init() StreamingJsonParser {
113-
var p: StreamingJsonParser = undefined;
115+
pub fn init() StreamingParser {
116+
var p: StreamingParser = undefined;
114117
p.reset();
115118
return p;
116119
}
117120

118-
pub fn reset(p: *StreamingJsonParser) void {
121+
pub fn reset(p: *StreamingParser) void {
119122
p.state = State.TopLevelBegin;
120123
p.count = 0;
121124
// Set before ever read in main transition function
@@ -175,7 +178,7 @@ pub const StreamingJsonParser = struct {
175178

176179
// Only call this function to generate array/object final state.
177180
pub fn fromInt(x: var) State {
178-
std.debug.assert(x == 0 or x == 1);
181+
debug.assert(x == 0 or x == 1);
179182
const T = @TagType(State);
180183
return State(T(x));
181184
}
@@ -205,7 +208,7 @@ pub const StreamingJsonParser = struct {
205208
// tokens. token2 is always null if token1 is null.
206209
//
207210
// There is currently no error recovery on a bad stream.
208-
pub fn feed(p: *StreamingJsonParser, c: u8, token1: *?Token, token2: *?Token) Error!void {
211+
pub fn feed(p: *StreamingParser, c: u8, token1: *?Token, token2: *?Token) Error!void {
209212
token1.* = null;
210213
token2.* = null;
211214
p.count += 1;
@@ -217,7 +220,7 @@ pub const StreamingJsonParser = struct {
217220
}
218221

219222
// Perform a single transition on the state machine and return any possible token.
220-
fn transition(p: *StreamingJsonParser, c: u8, token: *?Token) Error!bool {
223+
fn transition(p: *StreamingParser, c: u8, token: *?Token) Error!bool {
221224
switch (p.state) {
222225
State.TopLevelBegin => switch (c) {
223226
'{' => {
@@ -852,10 +855,116 @@ pub const StreamingJsonParser = struct {
852855
}
853856
};
854857

858+
// A small wrapper over a StreamingParser for full slices. Returns a stream of json Tokens.
859+
pub const TokenStream = struct {
860+
i: usize,
861+
slice: []const u8,
862+
parser: StreamingParser,
863+
token: ?Token,
864+
865+
pub fn init(slice: []const u8) TokenStream {
866+
return TokenStream{
867+
.i = 0,
868+
.slice = slice,
869+
.parser = StreamingParser.init(),
870+
.token = null,
871+
};
872+
}
873+
874+
pub fn next(self: *TokenStream) !?Token {
875+
if (self.token) |token| {
876+
self.token = null;
877+
return token;
878+
}
879+
880+
var t1: ?Token = undefined;
881+
var t2: ?Token = undefined;
882+
883+
while (self.i < self.slice.len) {
884+
try self.parser.feed(self.slice[self.i], &t1, &t2);
885+
self.i += 1;
886+
887+
if (t1) |token| {
888+
self.token = t2;
889+
return token;
890+
}
891+
}
892+
893+
if (self.i > self.slice.len) {
894+
try self.parser.feed(' ', &t1, &t2);
895+
self.i += 1;
896+
897+
if (t1) |token| {
898+
return token;
899+
}
900+
}
901+
902+
return null;
903+
}
904+
};
905+
906+
fn checkNext(p: *TokenStream, id: Token.Id) void {
907+
const token = ??(p.next() catch unreachable);
908+
debug.assert(token.id == id);
909+
}
910+
911+
test "token" {
912+
const s =
913+
\\{
914+
\\ "Image": {
915+
\\ "Width": 800,
916+
\\ "Height": 600,
917+
\\ "Title": "View from 15th Floor",
918+
\\ "Thumbnail": {
919+
\\ "Url": "http://www.example.com/image/481989943",
920+
\\ "Height": 125,
921+
\\ "Width": 100
922+
\\ },
923+
\\ "Animated" : false,
924+
\\ "IDs": [116, 943, 234, 38793]
925+
\\ }
926+
\\}
927+
;
928+
929+
var p = TokenStream.init(s);
930+
931+
checkNext(&p, Token.Id.ObjectBegin);
932+
checkNext(&p, Token.Id.String); // Image
933+
checkNext(&p, Token.Id.ObjectBegin);
934+
checkNext(&p, Token.Id.String); // Width
935+
checkNext(&p, Token.Id.Number);
936+
checkNext(&p, Token.Id.String); // Height
937+
checkNext(&p, Token.Id.Number);
938+
checkNext(&p, Token.Id.String); // Title
939+
checkNext(&p, Token.Id.String);
940+
checkNext(&p, Token.Id.String); // Thumbnail
941+
checkNext(&p, Token.Id.ObjectBegin);
942+
checkNext(&p, Token.Id.String); // Url
943+
checkNext(&p, Token.Id.String);
944+
checkNext(&p, Token.Id.String); // Height
945+
checkNext(&p, Token.Id.Number);
946+
checkNext(&p, Token.Id.String); // Width
947+
checkNext(&p, Token.Id.Number);
948+
checkNext(&p, Token.Id.ObjectEnd);
949+
checkNext(&p, Token.Id.String); // Animated
950+
checkNext(&p, Token.Id.False);
951+
checkNext(&p, Token.Id.String); // IDs
952+
checkNext(&p, Token.Id.ArrayBegin);
953+
checkNext(&p, Token.Id.Number);
954+
checkNext(&p, Token.Id.Number);
955+
checkNext(&p, Token.Id.Number);
956+
checkNext(&p, Token.Id.Number);
957+
checkNext(&p, Token.Id.ArrayEnd);
958+
checkNext(&p, Token.Id.ObjectEnd);
959+
checkNext(&p, Token.Id.ObjectEnd);
960+
961+
debug.assert((try p.next()) == null);
962+
}
963+
855964
// Validate a JSON string. This does not limit number precision so a decoder may not necessarily
856965
// be able to decode the string even if this returns true.
857966
pub fn validate(s: []const u8) bool {
858-
var p = StreamingJsonParser.init();
967+
var p = StreamingParser.init();
859968

860969
for (s) |c, i| {
861970
var token1: ?Token = undefined;
@@ -897,46 +1006,46 @@ pub const Value = union(enum) {
8971006
pub fn dump(self: *const Value) void {
8981007
switch (self.*) {
8991008
Value.Null => {
900-
std.debug.warn("null");
1009+
debug.warn("null");
9011010
},
9021011
Value.Bool => |inner| {
903-
std.debug.warn("{}", inner);
1012+
debug.warn("{}", inner);
9041013
},
9051014
Value.Integer => |inner| {
906-
std.debug.warn("{}", inner);
1015+
debug.warn("{}", inner);
9071016
},
9081017
Value.Float => |inner| {
909-
std.debug.warn("{.5}", inner);
1018+
debug.warn("{.5}", inner);
9101019
},
9111020
Value.String => |inner| {
912-
std.debug.warn("\"{}\"", inner);
1021+
debug.warn("\"{}\"", inner);
9131022
},
9141023
Value.Array => |inner| {
9151024
var not_first = false;
916-
std.debug.warn("[");
1025+
debug.warn("[");
9171026
for (inner.toSliceConst()) |value| {
9181027
if (not_first) {
919-
std.debug.warn(",");
1028+
debug.warn(",");
9201029
}
9211030
not_first = true;
9221031
value.dump();
9231032
}
924-
std.debug.warn("]");
1033+
debug.warn("]");
9251034
},
9261035
Value.Object => |inner| {
9271036
var not_first = false;
928-
std.debug.warn("{{");
1037+
debug.warn("{{");
9291038
var it = inner.iterator();
9301039

9311040
while (it.next()) |entry| {
9321041
if (not_first) {
933-
std.debug.warn(",");
1042+
debug.warn(",");
9341043
}
9351044
not_first = true;
936-
std.debug.warn("\"{}\":", entry.key);
1045+
debug.warn("\"{}\":", entry.key);
9371046
entry.value.dump();
9381047
}
939-
std.debug.warn("}}");
1048+
debug.warn("}}");
9401049
},
9411050
}
9421051
}
@@ -952,67 +1061,67 @@ pub const Value = union(enum) {
9521061
fn dumpIndentLevel(self: *const Value, indent: usize, level: usize) void {
9531062
switch (self.*) {
9541063
Value.Null => {
955-
std.debug.warn("null");
1064+
debug.warn("null");
9561065
},
9571066
Value.Bool => |inner| {
958-
std.debug.warn("{}", inner);
1067+
debug.warn("{}", inner);
9591068
},
9601069
Value.Integer => |inner| {
961-
std.debug.warn("{}", inner);
1070+
debug.warn("{}", inner);
9621071
},
9631072
Value.Float => |inner| {
964-
std.debug.warn("{.5}", inner);
1073+
debug.warn("{.5}", inner);
9651074
},
9661075
Value.String => |inner| {
967-
std.debug.warn("\"{}\"", inner);
1076+
debug.warn("\"{}\"", inner);
9681077
},
9691078
Value.Array => |inner| {
9701079
var not_first = false;
971-
std.debug.warn("[\n");
1080+
debug.warn("[\n");
9721081

9731082
for (inner.toSliceConst()) |value| {
9741083
if (not_first) {
975-
std.debug.warn(",\n");
1084+
debug.warn(",\n");
9761085
}
9771086
not_first = true;
9781087
padSpace(level + indent);
9791088
value.dumpIndentLevel(indent, level + indent);
9801089
}
981-
std.debug.warn("\n");
1090+
debug.warn("\n");
9821091
padSpace(level);
983-
std.debug.warn("]");
1092+
debug.warn("]");
9841093
},
9851094
Value.Object => |inner| {
9861095
var not_first = false;
987-
std.debug.warn("{{\n");
1096+
debug.warn("{{\n");
9881097
var it = inner.iterator();
9891098

9901099
while (it.next()) |entry| {
9911100
if (not_first) {
992-
std.debug.warn(",\n");
1101+
debug.warn(",\n");
9931102
}
9941103
not_first = true;
9951104
padSpace(level + indent);
996-
std.debug.warn("\"{}\": ", entry.key);
1105+
debug.warn("\"{}\": ", entry.key);
9971106
entry.value.dumpIndentLevel(indent, level + indent);
9981107
}
999-
std.debug.warn("\n");
1108+
debug.warn("\n");
10001109
padSpace(level);
1001-
std.debug.warn("}}");
1110+
debug.warn("}}");
10021111
},
10031112
}
10041113
}
10051114

10061115
fn padSpace(indent: usize) void {
10071116
var i: usize = 0;
10081117
while (i < indent) : (i += 1) {
1009-
std.debug.warn(" ");
1118+
debug.warn(" ");
10101119
}
10111120
}
10121121
};
10131122

10141123
// A non-stream JSON parser which constructs a tree of Value's.
1015-
pub const JsonParser = struct {
1124+
pub const Parser = struct {
10161125
allocator: *Allocator,
10171126
state: State,
10181127
copy_strings: bool,
@@ -1026,61 +1135,35 @@ pub const JsonParser = struct {
10261135
Simple,
10271136
};
10281137

1029-
pub fn init(allocator: *Allocator, copy_strings: bool) JsonParser {
1030-
return JsonParser{
1138+
pub fn init(allocator: *Allocator, copy_strings: bool) Parser {
1139+
return Parser{
10311140
.allocator = allocator,
10321141
.state = State.Simple,
10331142
.copy_strings = copy_strings,
10341143
.stack = ArrayList(Value).init(allocator),
10351144
};
10361145
}
10371146

1038-
pub fn deinit(p: *JsonParser) void {
1147+
pub fn deinit(p: *Parser) void {
10391148
p.stack.deinit();
10401149
}
10411150

1042-
pub fn reset(p: *JsonParser) void {
1151+
pub fn reset(p: *Parser) void {
10431152
p.state = State.Simple;
10441153
p.stack.shrink(0);
10451154
}
10461155

1047-
pub fn parse(p: *JsonParser, input: []const u8) !ValueTree {
1048-
var mp = StreamingJsonParser.init();
1156+
pub fn parse(p: *Parser, input: []const u8) !ValueTree {
1157+
var s = TokenStream.init(input);
10491158

10501159
var arena = ArenaAllocator.init(p.allocator);
10511160
errdefer arena.deinit();
10521161

1053-
for (input) |c, i| {
1054-
var mt1: ?Token = undefined;
1055-
var mt2: ?Token = undefined;
1056-
1057-
try mp.feed(c, &mt1, &mt2);
1058-
if (mt1) |t1| {
1059-
try p.transition(&arena.allocator, input, i, t1);
1060-
1061-
if (mt2) |t2| {
1062-
try p.transition(&arena.allocator, input, i, t2);
1063-
}
1064-
}
1162+
while (try s.next()) |token| {
1163+
try p.transition(&arena.allocator, input, s.i - 1, token);
10651164
}
10661165

1067-
// Handle top-level lonely number values.
1068-
{
1069-
const i = input.len;
1070-
var mt1: ?Token = undefined;
1071-
var mt2: ?Token = undefined;
1072-
1073-
try mp.feed(' ', &mt1, &mt2);
1074-
if (mt1) |t1| {
1075-
try p.transition(&arena.allocator, input, i, t1);
1076-
}
1077-
}
1078-
1079-
if (!mp.complete) {
1080-
return error.IncompleteJsonInput;
1081-
}
1082-
1083-
std.debug.assert(p.stack.len == 1);
1166+
debug.assert(p.stack.len == 1);
10841167

10851168
return ValueTree{
10861169
.arena = arena,
@@ -1090,7 +1173,7 @@ pub const JsonParser = struct {
10901173

10911174
// Even though p.allocator exists, we take an explicit allocator so that allocation state
10921175
// can be cleaned up on error correctly during a `parse` on call.
1093-
fn transition(p: *JsonParser, allocator: *Allocator, input: []const u8, i: usize, token: *const Token) !void {
1176+
fn transition(p: *Parser, allocator: *Allocator, input: []const u8, i: usize, token: *const Token) !void {
10941177
switch (p.state) {
10951178
State.ObjectKey => switch (token.id) {
10961179
Token.Id.ObjectEnd => {
@@ -1223,7 +1306,7 @@ pub const JsonParser = struct {
12231306
}
12241307
}
12251308

1226-
fn pushToParent(p: *JsonParser, value: *const Value) !void {
1309+
fn pushToParent(p: *Parser, value: *const Value) !void {
12271310
switch (p.stack.at(p.stack.len - 1)) {
12281311
// Object Parent -> [ ..., object, <key>, value ]
12291312
Value.String => |key| {
@@ -1244,25 +1327,23 @@ pub const JsonParser = struct {
12441327
}
12451328
}
12461329

1247-
fn parseString(p: *JsonParser, allocator: *Allocator, token: *const Token, input: []const u8, i: usize) !Value {
1330+
fn parseString(p: *Parser, allocator: *Allocator, token: *const Token, input: []const u8, i: usize) !Value {
12481331
// TODO: We don't strictly have to copy values which do not contain any escape
12491332
// characters if flagged with the option.
12501333
const slice = token.slice(input, i);
12511334
return Value{ .String = try mem.dupe(p.allocator, u8, slice) };
12521335
}
12531336

1254-
fn parseNumber(p: *JsonParser, token: *const Token, input: []const u8, i: usize) !Value {
1337+
fn parseNumber(p: *Parser, token: *const Token, input: []const u8, i: usize) !Value {
12551338
return if (token.number_is_integer)
12561339
Value{ .Integer = try std.fmt.parseInt(i64, token.slice(input, i), 10) }
12571340
else
12581341
@panic("TODO: fmt.parseFloat not yet implemented");
12591342
}
12601343
};
12611344

1262-
const debug = std.debug;
1263-
12641345
test "json parser dynamic" {
1265-
var p = JsonParser.init(std.debug.global_allocator, false);
1346+
var p = Parser.init(debug.global_allocator, false);
12661347
defer p.deinit();
12671348

12681349
const s =

0 commit comments

Comments
 (0)
Please sign in to comment.