Skip to content

Commit 54e887e

Browse files
committedMay 24, 2018
std.zig.tokenizer: fix tokenization of hex floats
·
0.15.20.3.0
1 parent b132a17 commit 54e887e

File tree

2 files changed

+77
-13
lines changed

2 files changed

+77
-13
lines changed
 

‎std/zig/parser_test.zig‎

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
test "zig fmt: float literal with exponent" {
2+
try testCanonical(
3+
\\pub const f64_true_min = 4.94065645841246544177e-324;
4+
\\const threshold = 0x1.a827999fcef32p+1022;
5+
\\
6+
);
7+
}
8+
19
test "zig fmt: if-else end of comptime" {
210
try testCanonical(
311
\\comptime {
@@ -238,13 +246,6 @@ test "zig fmt: switch with empty body" {
238246
);
239247
}
240248

241-
test "zig fmt: float literal with exponent" {
242-
try testCanonical(
243-
\\pub const f64_true_min = 4.94065645841246544177e-324;
244-
\\
245-
);
246-
}
247-
248249
test "zig fmt: line comments in struct initializer" {
249250
try testCanonical(
250251
\\fn foo() void {

‎std/zig/tokenizer.zig‎

Lines changed: 69 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@ pub const Token = struct {
66
start: usize,
77
end: usize,
88

9-
const Keyword = struct {
9+
pub const Keyword = struct {
1010
bytes: []const u8,
1111
id: Id,
1212
};
1313

14-
const keywords = []Keyword {
14+
pub const keywords = []Keyword {
1515
Keyword{.bytes="align", .id = Id.Keyword_align},
1616
Keyword{.bytes="and", .id = Id.Keyword_and},
1717
Keyword{.bytes="asm", .id = Id.Keyword_asm},
@@ -62,6 +62,7 @@ pub const Token = struct {
6262
Keyword{.bytes="while", .id = Id.Keyword_while},
6363
};
6464

65+
// TODO perfect hash at comptime
6566
fn getKeyword(bytes: []const u8) ?Id {
6667
for (keywords) |kw| {
6768
if (mem.eql(u8, kw.bytes, bytes)) {
@@ -236,10 +237,15 @@ pub const Tokenizer = struct {
236237
Zero,
237238
IntegerLiteral,
238239
IntegerLiteralWithRadix,
240+
IntegerLiteralWithRadixHex,
239241
NumberDot,
242+
NumberDotHex,
240243
FloatFraction,
244+
FloatFractionHex,
241245
FloatExponentUnsigned,
246+
FloatExponentUnsignedHex,
242247
FloatExponentNumber,
248+
FloatExponentNumberHex,
243249
Ampersand,
244250
Caret,
245251
Percent,
@@ -839,9 +845,12 @@ pub const Tokenizer = struct {
839845
else => self.checkLiteralCharacter(),
840846
},
841847
State.Zero => switch (c) {
842-
'b', 'o', 'x' => {
848+
'b', 'o' => {
843849
state = State.IntegerLiteralWithRadix;
844850
},
851+
'x' => {
852+
state = State.IntegerLiteralWithRadixHex;
853+
},
845854
else => {
846855
// reinterpret as a normal number
847856
self.index -= 1;
@@ -862,8 +871,15 @@ pub const Tokenizer = struct {
862871
'.' => {
863872
state = State.NumberDot;
864873
},
874+
'0'...'9' => {},
875+
else => break,
876+
},
877+
State.IntegerLiteralWithRadixHex => switch (c) {
878+
'.' => {
879+
state = State.NumberDotHex;
880+
},
865881
'p', 'P' => {
866-
state = State.FloatExponentUnsigned;
882+
state = State.FloatExponentUnsignedHex;
867883
},
868884
'0'...'9', 'a'...'f', 'A'...'F' => {},
869885
else => break,
@@ -880,13 +896,32 @@ pub const Tokenizer = struct {
880896
state = State.FloatFraction;
881897
},
882898
},
899+
State.NumberDotHex => switch (c) {
900+
'.' => {
901+
self.index -= 1;
902+
state = State.Start;
903+
break;
904+
},
905+
else => {
906+
self.index -= 1;
907+
result.id = Token.Id.FloatLiteral;
908+
state = State.FloatFractionHex;
909+
},
910+
},
883911
State.FloatFraction => switch (c) {
884-
'p', 'P', 'e', 'E' => {
912+
'e', 'E' => {
885913
state = State.FloatExponentUnsigned;
886914
},
887915
'0'...'9' => {},
888916
else => break,
889917
},
918+
State.FloatFractionHex => switch (c) {
919+
'p', 'P' => {
920+
state = State.FloatExponentUnsignedHex;
921+
},
922+
'0'...'9', 'a'...'f', 'A'...'F' => {},
923+
else => break,
924+
},
890925
State.FloatExponentUnsigned => switch (c) {
891926
'+', '-' => {
892927
state = State.FloatExponentNumber;
@@ -897,7 +932,21 @@ pub const Tokenizer = struct {
897932
state = State.FloatExponentNumber;
898933
}
899934
},
935+
State.FloatExponentUnsignedHex => switch (c) {
936+
'+', '-' => {
937+
state = State.FloatExponentNumberHex;
938+
},
939+
else => {
940+
// reinterpret as a normal exponent number
941+
self.index -= 1;
942+
state = State.FloatExponentNumberHex;
943+
}
944+
},
900945
State.FloatExponentNumber => switch (c) {
946+
'0'...'9' => {},
947+
else => break,
948+
},
949+
State.FloatExponentNumberHex => switch (c) {
901950
'0'...'9', 'a'...'f', 'A'...'F' => {},
902951
else => break,
903952
},
@@ -908,8 +957,11 @@ pub const Tokenizer = struct {
908957
State.C,
909958
State.IntegerLiteral,
910959
State.IntegerLiteralWithRadix,
960+
State.IntegerLiteralWithRadixHex,
911961
State.FloatFraction,
962+
State.FloatFractionHex,
912963
State.FloatExponentNumber,
964+
State.FloatExponentNumberHex,
913965
State.StringLiteral, // find this error later
914966
State.MultilineStringLiteralLine,
915967
State.Builtin => {},
@@ -928,7 +980,9 @@ pub const Tokenizer = struct {
928980
},
929981

930982
State.NumberDot,
983+
State.NumberDotHex,
931984
State.FloatExponentUnsigned,
985+
State.FloatExponentUnsignedHex,
932986
State.SawAtSign,
933987
State.Backslash,
934988
State.MultilineStringLiteralLineBackslash,
@@ -1073,7 +1127,7 @@ test "tokenizer" {
10731127
});
10741128
}
10751129

1076-
test "tokenizer - float literal" {
1130+
test "tokenizer - float literal e exponent" {
10771131
testTokenize("a = 4.94065645841246544177e-324;\n", []Token.Id {
10781132
Token.Id.Identifier,
10791133
Token.Id.Equal,
@@ -1082,6 +1136,15 @@ test "tokenizer - float literal" {
10821136
});
10831137
}
10841138

1139+
test "tokenizer - float literal p exponent" {
1140+
testTokenize("a = 0x1.a827999fcef32p+1022;\n", []Token.Id {
1141+
Token.Id.Identifier,
1142+
Token.Id.Equal,
1143+
Token.Id.FloatLiteral,
1144+
Token.Id.Semicolon,
1145+
});
1146+
}
1147+
10851148
test "tokenizer - chars" {
10861149
testTokenize("'c'", []Token.Id {Token.Id.CharLiteral});
10871150
}

0 commit comments

Comments
 (0)
Please sign in to comment.