@@ -6,12 +6,12 @@ pub const Token = struct {
66 start : usize ,
77 end : usize ,
88
9- const Keyword = struct {
9+ pub const Keyword = struct {
1010 bytes : []const u8 ,
1111 id : Id ,
1212 };
1313
14- const keywords = []Keyword {
14+ pub const keywords = []Keyword {
1515 Keyword {.bytes = "align" , .id = Id .Keyword_align },
1616 Keyword {.bytes = "and" , .id = Id .Keyword_and },
1717 Keyword {.bytes = "asm" , .id = Id .Keyword_asm },
@@ -62,6 +62,7 @@ pub const Token = struct {
6262 Keyword {.bytes = "while" , .id = Id .Keyword_while },
6363 };
6464
65+ // TODO perfect hash at comptime
6566 fn getKeyword (bytes : []const u8 ) ? Id {
6667 for (keywords ) | kw | {
6768 if (mem .eql (u8 , kw .bytes , bytes )) {
@@ -236,10 +237,15 @@ pub const Tokenizer = struct {
236237 Zero ,
237238 IntegerLiteral ,
238239 IntegerLiteralWithRadix ,
240+ IntegerLiteralWithRadixHex ,
239241 NumberDot ,
242+ NumberDotHex ,
240243 FloatFraction ,
244+ FloatFractionHex ,
241245 FloatExponentUnsigned ,
246+ FloatExponentUnsignedHex ,
242247 FloatExponentNumber ,
248+ FloatExponentNumberHex ,
243249 Ampersand ,
244250 Caret ,
245251 Percent ,
@@ -839,9 +845,12 @@ pub const Tokenizer = struct {
839845 else = > self .checkLiteralCharacter (),
840846 },
841847 State .Zero = > switch (c ) {
842- 'b' , 'o' , 'x' = > {
848+ 'b' , 'o' = > {
843849 state = State .IntegerLiteralWithRadix ;
844850 },
851+ 'x' = > {
852+ state = State .IntegerLiteralWithRadixHex ;
853+ },
845854 else = > {
846855 // reinterpret as a normal number
847856 self .index -= 1 ;
@@ -862,8 +871,15 @@ pub const Tokenizer = struct {
862871 '.' = > {
863872 state = State .NumberDot ;
864873 },
874+ '0' ... '9' = > {},
875+ else = > break ,
876+ },
877+ State .IntegerLiteralWithRadixHex = > switch (c ) {
878+ '.' = > {
879+ state = State .NumberDotHex ;
880+ },
865881 'p' , 'P' = > {
866- state = State .FloatExponentUnsigned ;
882+ state = State .FloatExponentUnsignedHex ;
867883 },
868884 '0' ... '9' , 'a' ... 'f' , 'A' ... 'F' = > {},
869885 else = > break ,
@@ -880,13 +896,32 @@ pub const Tokenizer = struct {
880896 state = State .FloatFraction ;
881897 },
882898 },
899+ State .NumberDotHex = > switch (c ) {
900+ '.' = > {
901+ self .index -= 1 ;
902+ state = State .Start ;
903+ break ;
904+ },
905+ else = > {
906+ self .index -= 1 ;
907+ result .id = Token .Id .FloatLiteral ;
908+ state = State .FloatFractionHex ;
909+ },
910+ },
883911 State .FloatFraction = > switch (c ) {
884- 'p' , 'P' , ' e' , 'E' = > {
912+ 'e' , 'E' = > {
885913 state = State .FloatExponentUnsigned ;
886914 },
887915 '0' ... '9' = > {},
888916 else = > break ,
889917 },
918+ State .FloatFractionHex = > switch (c ) {
919+ 'p' , 'P' = > {
920+ state = State .FloatExponentUnsignedHex ;
921+ },
922+ '0' ... '9' , 'a' ... 'f' , 'A' ... 'F' = > {},
923+ else = > break ,
924+ },
890925 State .FloatExponentUnsigned = > switch (c ) {
891926 '+' , '-' = > {
892927 state = State .FloatExponentNumber ;
@@ -897,7 +932,21 @@ pub const Tokenizer = struct {
897932 state = State .FloatExponentNumber ;
898933 }
899934 },
935+ State .FloatExponentUnsignedHex = > switch (c ) {
936+ '+' , '-' = > {
937+ state = State .FloatExponentNumberHex ;
938+ },
939+ else = > {
940+ // reinterpret as a normal exponent number
941+ self .index -= 1 ;
942+ state = State .FloatExponentNumberHex ;
943+ }
944+ },
900945 State .FloatExponentNumber = > switch (c ) {
946+ '0' ... '9' = > {},
947+ else = > break ,
948+ },
949+ State .FloatExponentNumberHex = > switch (c ) {
901950 '0' ... '9' , 'a' ... 'f' , 'A' ... 'F' = > {},
902951 else = > break ,
903952 },
@@ -908,8 +957,11 @@ pub const Tokenizer = struct {
908957 State .C ,
909958 State .IntegerLiteral ,
910959 State .IntegerLiteralWithRadix ,
960+ State .IntegerLiteralWithRadixHex ,
911961 State .FloatFraction ,
962+ State .FloatFractionHex ,
912963 State .FloatExponentNumber ,
964+ State .FloatExponentNumberHex ,
913965 State .StringLiteral , // find this error later
914966 State .MultilineStringLiteralLine ,
915967 State .Builtin = > {},
@@ -928,7 +980,9 @@ pub const Tokenizer = struct {
928980 },
929981
930982 State .NumberDot ,
983+ State .NumberDotHex ,
931984 State .FloatExponentUnsigned ,
985+ State .FloatExponentUnsignedHex ,
932986 State .SawAtSign ,
933987 State .Backslash ,
934988 State .MultilineStringLiteralLineBackslash ,
@@ -1073,7 +1127,7 @@ test "tokenizer" {
10731127 });
10741128}
10751129
1076- test "tokenizer - float literal" {
1130+ test "tokenizer - float literal e exponent " {
10771131 testTokenize ("a = 4.94065645841246544177e-324;\n " , []Token.Id {
10781132 Token .Id .Identifier ,
10791133 Token .Id .Equal ,
@@ -1082,6 +1136,15 @@ test "tokenizer - float literal" {
10821136 });
10831137}
10841138
1139+ test "tokenizer - float literal p exponent" {
1140+ testTokenize ("a = 0x1.a827999fcef32p+1022;\n " , []Token.Id {
1141+ Token .Id .Identifier ,
1142+ Token .Id .Equal ,
1143+ Token .Id .FloatLiteral ,
1144+ Token .Id .Semicolon ,
1145+ });
1146+ }
1147+
10851148test "tokenizer - chars" {
10861149 testTokenize ("'c'" , []Token.Id {Token .Id .CharLiteral });
10871150}
0 commit comments