Skip to content

Commit

Permalink
std.zig.tokenizer: fix tokenization of hex floats
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewrk committed May 24, 2018
1 parent b132a17 commit 54e887e
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 13 deletions.
15 changes: 8 additions & 7 deletions std/zig/parser_test.zig
@@ -1,3 +1,11 @@
test "zig fmt: float literal with exponent" {
try testCanonical(
\\pub const f64_true_min = 4.94065645841246544177e-324;
\\const threshold = 0x1.a827999fcef32p+1022;
\\
);
}

test "zig fmt: if-else end of comptime" {
try testCanonical(
\\comptime {
Expand Down Expand Up @@ -238,13 +246,6 @@ test "zig fmt: switch with empty body" {
);
}

test "zig fmt: float literal with exponent" {
try testCanonical(
\\pub const f64_true_min = 4.94065645841246544177e-324;
\\
);
}

test "zig fmt: line comments in struct initializer" {
try testCanonical(
\\fn foo() void {
Expand Down
75 changes: 69 additions & 6 deletions std/zig/tokenizer.zig
Expand Up @@ -6,12 +6,12 @@ pub const Token = struct {
start: usize,
end: usize,

const Keyword = struct {
pub const Keyword = struct {
bytes: []const u8,
id: Id,
};

const keywords = []Keyword {
pub const keywords = []Keyword {
Keyword{.bytes="align", .id = Id.Keyword_align},
Keyword{.bytes="and", .id = Id.Keyword_and},
Keyword{.bytes="asm", .id = Id.Keyword_asm},
Expand Down Expand Up @@ -62,6 +62,7 @@ pub const Token = struct {
Keyword{.bytes="while", .id = Id.Keyword_while},
};

// TODO perfect hash at comptime
fn getKeyword(bytes: []const u8) ?Id {
for (keywords) |kw| {
if (mem.eql(u8, kw.bytes, bytes)) {
Expand Down Expand Up @@ -236,10 +237,15 @@ pub const Tokenizer = struct {
Zero,
IntegerLiteral,
IntegerLiteralWithRadix,
IntegerLiteralWithRadixHex,
NumberDot,
NumberDotHex,
FloatFraction,
FloatFractionHex,
FloatExponentUnsigned,
FloatExponentUnsignedHex,
FloatExponentNumber,
FloatExponentNumberHex,
Ampersand,
Caret,
Percent,
Expand Down Expand Up @@ -839,9 +845,12 @@ pub const Tokenizer = struct {
else => self.checkLiteralCharacter(),
},
State.Zero => switch (c) {
'b', 'o', 'x' => {
'b', 'o' => {
state = State.IntegerLiteralWithRadix;
},
'x' => {
state = State.IntegerLiteralWithRadixHex;
},
else => {
// reinterpret as a normal number
self.index -= 1;
Expand All @@ -862,8 +871,15 @@ pub const Tokenizer = struct {
'.' => {
state = State.NumberDot;
},
'0'...'9' => {},
else => break,
},
State.IntegerLiteralWithRadixHex => switch (c) {
'.' => {
state = State.NumberDotHex;
},
'p', 'P' => {
state = State.FloatExponentUnsigned;
state = State.FloatExponentUnsignedHex;
},
'0'...'9', 'a'...'f', 'A'...'F' => {},
else => break,
Expand All @@ -880,13 +896,32 @@ pub const Tokenizer = struct {
state = State.FloatFraction;
},
},
State.NumberDotHex => switch (c) {
'.' => {
self.index -= 1;
state = State.Start;
break;
},
else => {
self.index -= 1;
result.id = Token.Id.FloatLiteral;
state = State.FloatFractionHex;
},
},
State.FloatFraction => switch (c) {
'p', 'P', 'e', 'E' => {
'e', 'E' => {
state = State.FloatExponentUnsigned;
},
'0'...'9' => {},
else => break,
},
State.FloatFractionHex => switch (c) {
'p', 'P' => {
state = State.FloatExponentUnsignedHex;
},
'0'...'9', 'a'...'f', 'A'...'F' => {},
else => break,
},
State.FloatExponentUnsigned => switch (c) {
'+', '-' => {
state = State.FloatExponentNumber;
Expand All @@ -897,7 +932,21 @@ pub const Tokenizer = struct {
state = State.FloatExponentNumber;
}
},
State.FloatExponentUnsignedHex => switch (c) {
'+', '-' => {
state = State.FloatExponentNumberHex;
},
else => {
// reinterpret as a normal exponent number
self.index -= 1;
state = State.FloatExponentNumberHex;
}
},
State.FloatExponentNumber => switch (c) {
'0'...'9' => {},
else => break,
},
State.FloatExponentNumberHex => switch (c) {
'0'...'9', 'a'...'f', 'A'...'F' => {},
else => break,
},
Expand All @@ -908,8 +957,11 @@ pub const Tokenizer = struct {
State.C,
State.IntegerLiteral,
State.IntegerLiteralWithRadix,
State.IntegerLiteralWithRadixHex,
State.FloatFraction,
State.FloatFractionHex,
State.FloatExponentNumber,
State.FloatExponentNumberHex,
State.StringLiteral, // find this error later
State.MultilineStringLiteralLine,
State.Builtin => {},
Expand All @@ -928,7 +980,9 @@ pub const Tokenizer = struct {
},

State.NumberDot,
State.NumberDotHex,
State.FloatExponentUnsigned,
State.FloatExponentUnsignedHex,
State.SawAtSign,
State.Backslash,
State.MultilineStringLiteralLineBackslash,
Expand Down Expand Up @@ -1073,7 +1127,7 @@ test "tokenizer" {
});
}

test "tokenizer - float literal" {
test "tokenizer - float literal e exponent" {
testTokenize("a = 4.94065645841246544177e-324;\n", []Token.Id {
Token.Id.Identifier,
Token.Id.Equal,
Expand All @@ -1082,6 +1136,15 @@ test "tokenizer - float literal" {
});
}

test "tokenizer - float literal p exponent" {
testTokenize("a = 0x1.a827999fcef32p+1022;\n", []Token.Id {
Token.Id.Identifier,
Token.Id.Equal,
Token.Id.FloatLiteral,
Token.Id.Semicolon,
});
}

test "tokenizer - chars" {
testTokenize("'c'", []Token.Id {Token.Id.CharLiteral});
}
Expand Down

0 comments on commit 54e887e

Please sign in to comment.