Skip to content

Commit f06bce5

Browse files
committedJun 2, 2018
introduce [*] for unknown length pointers
See #770 Currently it does not have any different behavior than `*` but it is now recommended to use `[*]` for unknown length pointers to be future-proof. Instead of [ * ] being separate tokens as the proposal suggested, this commit implements `[*]` as a single token.
1 parent 7b386ea commit f06bce5

File tree

9 files changed

+87
-12
lines changed

9 files changed

+87
-12
lines changed
 

‎doc/langref.html.in‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6450,7 +6450,7 @@ ContainerInitBody = list(StructLiteralField, ",") | list(Expression, ",")
64506450

64516451
StructLiteralField = "." Symbol "=" Expression
64526452

6453-
PrefixOp = "!" | "-" | "~" | ("*" option("align" "(" Expression option(":" Integer ":" Integer) ")" ) option("const") option("volatile")) | "?" | "??" | "-%" | "try" | "await"
6453+
PrefixOp = "!" | "-" | "~" | (("*" | "[*]") option("align" "(" Expression option(":" Integer ":" Integer) ")" ) option("const") option("volatile")) | "?" | "??" | "-%" | "try" | "await"
64546454

64556455
PrimaryExpression = Integer | Float | String | CharLiteral | KeywordLiteral | GroupedExpression | BlockExpression(BlockOrExpression) | Symbol | ("@" Symbol FnCallExpression) | ArrayType | FnProto | AsmExpression | ContainerDecl | ("continue" option(":" Symbol)) | ErrorSetDecl | PromiseType
64566456

‎src/all_types.hpp‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,7 @@ struct AstNodePrefixOpExpr {
625625
};
626626

627627
struct AstNodePointerType {
628+
Token *star_token;
628629
AstNode *align_expr;
629630
BigInt *bit_offset_start;
630631
BigInt *bit_offset_end;

‎src/parser.cpp‎

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,6 +1174,7 @@ static PrefixOp tok_to_prefix_op(Token *token) {
11741174

11751175
static AstNode *ast_parse_pointer_type(ParseContext *pc, size_t *token_index, Token *star_tok) {
11761176
AstNode *node = ast_create_node(pc, NodeTypePointerType, star_tok);
1177+
node->data.pointer_type.star_token = star_tok;
11771178

11781179
Token *token = &pc->tokens->at(*token_index);
11791180
if (token->id == TokenIdKeywordAlign) {
@@ -1211,11 +1212,11 @@ static AstNode *ast_parse_pointer_type(ParseContext *pc, size_t *token_index, To
12111212

12121213
/*
12131214
PrefixOpExpression = PrefixOp ErrorSetExpr | SuffixOpExpression
1214-
PrefixOp = "!" | "-" | "~" | ("*" option("align" "(" Expression option(":" Integer ":" Integer) ")" ) option("const") option("volatile")) | "?" | "??" | "-%" | "try" | "await"
1215+
PrefixOp = "!" | "-" | "~" | (("*" | "[*]") option("align" "(" Expression option(":" Integer ":" Integer) ")" ) option("const") option("volatile")) | "?" | "??" | "-%" | "try" | "await"
12151216
*/
12161217
static AstNode *ast_parse_prefix_op_expr(ParseContext *pc, size_t *token_index, bool mandatory) {
12171218
Token *token = &pc->tokens->at(*token_index);
1218-
if (token->id == TokenIdStar) {
1219+
if (token->id == TokenIdStar || token->id == TokenIdBracketStarBracket) {
12191220
*token_index += 1;
12201221
return ast_parse_pointer_type(pc, token_index, token);
12211222
}

‎src/tokenizer.cpp‎

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,8 @@ enum TokenizeState {
219219
TokenizeStateSawAtSign,
220220
TokenizeStateCharCode,
221221
TokenizeStateError,
222+
TokenizeStateLBracket,
223+
TokenizeStateLBracketStar,
222224
};
223225

224226

@@ -539,8 +541,8 @@ void tokenize(Buf *buf, Tokenization *out) {
539541
end_token(&t);
540542
break;
541543
case '[':
544+
t.state = TokenizeStateLBracket;
542545
begin_token(&t, TokenIdLBracket);
543-
end_token(&t);
544546
break;
545547
case ']':
546548
begin_token(&t, TokenIdRBracket);
@@ -852,6 +854,30 @@ void tokenize(Buf *buf, Tokenization *out) {
852854
continue;
853855
}
854856
break;
857+
case TokenizeStateLBracket:
858+
switch (c) {
859+
case '*':
860+
t.state = TokenizeStateLBracketStar;
861+
set_token_id(&t, t.cur_tok, TokenIdBracketStarBracket);
862+
break;
863+
default:
864+
// reinterpret as just an lbracket
865+
t.pos -= 1;
866+
end_token(&t);
867+
t.state = TokenizeStateStart;
868+
continue;
869+
}
870+
break;
871+
case TokenizeStateLBracketStar:
872+
switch (c) {
873+
case ']':
874+
end_token(&t);
875+
t.state = TokenizeStateStart;
876+
break;
877+
default:
878+
invalid_char_error(&t, c);
879+
}
880+
break;
855881
case TokenizeStateSawPlusPercent:
856882
switch (c) {
857883
case '=':
@@ -1467,12 +1493,14 @@ void tokenize(Buf *buf, Tokenization *out) {
14671493
case TokenizeStateLineString:
14681494
case TokenizeStateLineStringEnd:
14691495
case TokenizeStateSawBarBar:
1496+
case TokenizeStateLBracket:
14701497
end_token(&t);
14711498
break;
14721499
case TokenizeStateSawDotDot:
14731500
case TokenizeStateSawBackslash:
14741501
case TokenizeStateLineStringContinue:
14751502
case TokenizeStateLineStringContinueC:
1503+
case TokenizeStateLBracketStar:
14761504
tokenize_error(&t, "unexpected EOF");
14771505
break;
14781506
case TokenizeStateLineComment:
@@ -1509,6 +1537,7 @@ const char * token_name(TokenId id) {
15091537
case TokenIdBitShiftRight: return ">>";
15101538
case TokenIdBitShiftRightEq: return ">>=";
15111539
case TokenIdBitXorEq: return "^=";
1540+
case TokenIdBracketStarBracket: return "[*]";
15121541
case TokenIdCharLiteral: return "CharLiteral";
15131542
case TokenIdCmpEq: return "==";
15141543
case TokenIdCmpGreaterOrEq: return ">=";

‎src/tokenizer.hpp‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ enum TokenId {
2828
TokenIdBitShiftRight,
2929
TokenIdBitShiftRightEq,
3030
TokenIdBitXorEq,
31+
TokenIdBracketStarBracket,
3132
TokenIdCharLiteral,
3233
TokenIdCmpEq,
3334
TokenIdCmpGreaterOrEq,

‎std/cstr.zig‎

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@ pub const line_sep = switch (builtin.os) {
99
else => "\n",
1010
};
1111

12-
pub fn len(ptr: *const u8) usize {
12+
pub fn len(ptr: [*]const u8) usize {
1313
var count: usize = 0;
1414
while (ptr[count] != 0) : (count += 1) {}
1515
return count;
1616
}
1717

18-
pub fn cmp(a: *const u8, b: *const u8) i8 {
18+
pub fn cmp(a: [*]const u8, b: [*]const u8) i8 {
1919
var index: usize = 0;
2020
while (a[index] == b[index] and a[index] != 0) : (index += 1) {}
2121
if (a[index] > b[index]) {
@@ -27,11 +27,11 @@ pub fn cmp(a: *const u8, b: *const u8) i8 {
2727
}
2828
}
2929

30-
pub fn toSliceConst(str: *const u8) []const u8 {
30+
pub fn toSliceConst(str: [*]const u8) []const u8 {
3131
return str[0..len(str)];
3232
}
3333

34-
pub fn toSlice(str: *u8) []u8 {
34+
pub fn toSlice(str: [*]u8) []u8 {
3535
return str[0..len(str)];
3636
}
3737

‎std/zig/parse.zig‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3292,7 +3292,7 @@ fn tokenIdToPrefixOp(id: @TagType(Token.Id)) ?ast.Node.PrefixOp.Op {
32923292
Token.Id.Minus => ast.Node.PrefixOp.Op{ .Negation = void{} },
32933293
Token.Id.MinusPercent => ast.Node.PrefixOp.Op{ .NegationWrap = void{} },
32943294
Token.Id.Ampersand => ast.Node.PrefixOp.Op{ .AddressOf = void{} },
3295-
Token.Id.Asterisk, Token.Id.AsteriskAsterisk => ast.Node.PrefixOp.Op{
3295+
Token.Id.Asterisk, Token.Id.AsteriskAsterisk, Token.Id.BracketStarBracket => ast.Node.PrefixOp.Op{
32963296
.PtrType = ast.Node.PrefixOp.PtrInfo{
32973297
.align_info = null,
32983298
.const_token = null,

‎std/zig/parser_test.zig‎

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
test "zig fmt: pointer of unknown length" {
2+
try testCanonical(
3+
\\fn foo(ptr: [*]u8) void {}
4+
\\
5+
);
6+
}
7+
18
test "zig fmt: spaces around slice operator" {
29
try testCanonical(
310
\\var a = b[c..d];

‎std/zig/tokenizer.zig‎

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ pub const Token = struct {
143143
FloatLiteral,
144144
LineComment,
145145
DocComment,
146+
BracketStarBracket,
146147
Keyword_align,
147148
Keyword_and,
148149
Keyword_asm,
@@ -263,6 +264,8 @@ pub const Tokenizer = struct {
263264
Period,
264265
Period2,
265266
SawAtSign,
267+
LBracket,
268+
LBracketStar,
266269
};
267270

268271
pub fn next(self: *Tokenizer) Token {
@@ -325,9 +328,7 @@ pub const Tokenizer = struct {
325328
break;
326329
},
327330
'[' => {
328-
result.id = Token.Id.LBracket;
329-
self.index += 1;
330-
break;
331+
state = State.LBracket;
331332
},
332333
']' => {
333334
result.id = Token.Id.RBracket;
@@ -429,6 +430,28 @@ pub const Tokenizer = struct {
429430
},
430431
},
431432

433+
State.LBracket => switch (c) {
434+
'*' => {
435+
state = State.LBracketStar;
436+
},
437+
else => {
438+
result.id = Token.Id.LBracket;
439+
break;
440+
},
441+
},
442+
443+
State.LBracketStar => switch (c) {
444+
']' => {
445+
result.id = Token.Id.BracketStarBracket;
446+
self.index += 1;
447+
break;
448+
},
449+
else => {
450+
result.id = Token.Id.Invalid;
451+
break;
452+
},
453+
},
454+
432455
State.Ampersand => switch (c) {
433456
'=' => {
434457
result.id = Token.Id.AmpersandEqual;
@@ -1008,6 +1031,7 @@ pub const Tokenizer = struct {
10081031
State.CharLiteralEscape2,
10091032
State.CharLiteralEnd,
10101033
State.StringLiteralBackslash,
1034+
State.LBracketStar,
10111035
=> {
10121036
result.id = Token.Id.Invalid;
10131037
},
@@ -1024,6 +1048,9 @@ pub const Tokenizer = struct {
10241048
State.Slash => {
10251049
result.id = Token.Id.Slash;
10261050
},
1051+
State.LBracket => {
1052+
result.id = Token.Id.LBracket;
1053+
},
10271054
State.Zero => {
10281055
result.id = Token.Id.IntegerLiteral;
10291056
},
@@ -1142,6 +1169,15 @@ test "tokenizer" {
11421169
testTokenize("test", []Token.Id{Token.Id.Keyword_test});
11431170
}
11441171

1172+
test "tokenizer - unknown length pointer" {
1173+
testTokenize(
1174+
\\[*]u8
1175+
, []Token.Id{
1176+
Token.Id.BracketStarBracket,
1177+
Token.Id.Identifier,
1178+
});
1179+
}
1180+
11451181
test "tokenizer - char literal with hex escape" {
11461182
testTokenize(
11471183
\\'\x1b'

0 commit comments

Comments
 (0)
Please sign in to comment.