Skip to content

Commit

Permalink
introduce [*] for unknown length pointers
Browse files Browse the repository at this point in the history
See #770

Currently it does not have any different behavior than `*`
but it is now recommended to use `[*]` for unknown length
pointers to be future-proof.

Instead of [ * ] being separate tokens as the proposal
suggested, this commit implements `[*]` as a single token.
  • Loading branch information
andrewrk committed Jun 2, 2018
1 parent 7b386ea commit f06bce5
Show file tree
Hide file tree
Showing 9 changed files with 87 additions and 12 deletions.
2 changes: 1 addition & 1 deletion doc/langref.html.in
Expand Up @@ -6450,7 +6450,7 @@ ContainerInitBody = list(StructLiteralField, ",") | list(Expression, ",")

StructLiteralField = "." Symbol "=" Expression

PrefixOp = "!" | "-" | "~" | ("*" option("align" "(" Expression option(":" Integer ":" Integer) ")" ) option("const") option("volatile")) | "?" | "??" | "-%" | "try" | "await"
PrefixOp = "!" | "-" | "~" | (("*" | "[*]") option("align" "(" Expression option(":" Integer ":" Integer) ")" ) option("const") option("volatile")) | "?" | "??" | "-%" | "try" | "await"

PrimaryExpression = Integer | Float | String | CharLiteral | KeywordLiteral | GroupedExpression | BlockExpression(BlockOrExpression) | Symbol | ("@" Symbol FnCallExpression) | ArrayType | FnProto | AsmExpression | ContainerDecl | ("continue" option(":" Symbol)) | ErrorSetDecl | PromiseType

Expand Down
1 change: 1 addition & 0 deletions src/all_types.hpp
Expand Up @@ -625,6 +625,7 @@ struct AstNodePrefixOpExpr {
};

struct AstNodePointerType {
Token *star_token;
AstNode *align_expr;
BigInt *bit_offset_start;
BigInt *bit_offset_end;
Expand Down
5 changes: 3 additions & 2 deletions src/parser.cpp
Expand Up @@ -1174,6 +1174,7 @@ static PrefixOp tok_to_prefix_op(Token *token) {

static AstNode *ast_parse_pointer_type(ParseContext *pc, size_t *token_index, Token *star_tok) {
AstNode *node = ast_create_node(pc, NodeTypePointerType, star_tok);
node->data.pointer_type.star_token = star_tok;

Token *token = &pc->tokens->at(*token_index);
if (token->id == TokenIdKeywordAlign) {
Expand Down Expand Up @@ -1211,11 +1212,11 @@ static AstNode *ast_parse_pointer_type(ParseContext *pc, size_t *token_index, To

/*
PrefixOpExpression = PrefixOp ErrorSetExpr | SuffixOpExpression
PrefixOp = "!" | "-" | "~" | ("*" option("align" "(" Expression option(":" Integer ":" Integer) ")" ) option("const") option("volatile")) | "?" | "??" | "-%" | "try" | "await"
PrefixOp = "!" | "-" | "~" | (("*" | "[*]") option("align" "(" Expression option(":" Integer ":" Integer) ")" ) option("const") option("volatile")) | "?" | "??" | "-%" | "try" | "await"
*/
static AstNode *ast_parse_prefix_op_expr(ParseContext *pc, size_t *token_index, bool mandatory) {
Token *token = &pc->tokens->at(*token_index);
if (token->id == TokenIdStar) {
if (token->id == TokenIdStar || token->id == TokenIdBracketStarBracket) {
*token_index += 1;
return ast_parse_pointer_type(pc, token_index, token);
}
Expand Down
31 changes: 30 additions & 1 deletion src/tokenizer.cpp
Expand Up @@ -219,6 +219,8 @@ enum TokenizeState {
TokenizeStateSawAtSign,
TokenizeStateCharCode,
TokenizeStateError,
TokenizeStateLBracket,
TokenizeStateLBracketStar,
};


Expand Down Expand Up @@ -539,8 +541,8 @@ void tokenize(Buf *buf, Tokenization *out) {
end_token(&t);
break;
case '[':
t.state = TokenizeStateLBracket;
begin_token(&t, TokenIdLBracket);
end_token(&t);
break;
case ']':
begin_token(&t, TokenIdRBracket);
Expand Down Expand Up @@ -852,6 +854,30 @@ void tokenize(Buf *buf, Tokenization *out) {
continue;
}
break;
case TokenizeStateLBracket:
switch (c) {
case '*':
t.state = TokenizeStateLBracketStar;
set_token_id(&t, t.cur_tok, TokenIdBracketStarBracket);
break;
default:
// reinterpret as just an lbracket
t.pos -= 1;
end_token(&t);
t.state = TokenizeStateStart;
continue;
}
break;
case TokenizeStateLBracketStar:
switch (c) {
case ']':
end_token(&t);
t.state = TokenizeStateStart;
break;
default:
invalid_char_error(&t, c);
}
break;
case TokenizeStateSawPlusPercent:
switch (c) {
case '=':
Expand Down Expand Up @@ -1467,12 +1493,14 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateLineString:
case TokenizeStateLineStringEnd:
case TokenizeStateSawBarBar:
case TokenizeStateLBracket:
end_token(&t);
break;
case TokenizeStateSawDotDot:
case TokenizeStateSawBackslash:
case TokenizeStateLineStringContinue:
case TokenizeStateLineStringContinueC:
case TokenizeStateLBracketStar:
tokenize_error(&t, "unexpected EOF");
break;
case TokenizeStateLineComment:
Expand Down Expand Up @@ -1509,6 +1537,7 @@ const char * token_name(TokenId id) {
case TokenIdBitShiftRight: return ">>";
case TokenIdBitShiftRightEq: return ">>=";
case TokenIdBitXorEq: return "^=";
case TokenIdBracketStarBracket: return "[*]";
case TokenIdCharLiteral: return "CharLiteral";
case TokenIdCmpEq: return "==";
case TokenIdCmpGreaterOrEq: return ">=";
Expand Down
1 change: 1 addition & 0 deletions src/tokenizer.hpp
Expand Up @@ -28,6 +28,7 @@ enum TokenId {
TokenIdBitShiftRight,
TokenIdBitShiftRightEq,
TokenIdBitXorEq,
TokenIdBracketStarBracket,
TokenIdCharLiteral,
TokenIdCmpEq,
TokenIdCmpGreaterOrEq,
Expand Down
8 changes: 4 additions & 4 deletions std/cstr.zig
Expand Up @@ -9,13 +9,13 @@ pub const line_sep = switch (builtin.os) {
else => "\n",
};

pub fn len(ptr: *const u8) usize {
pub fn len(ptr: [*]const u8) usize {
var count: usize = 0;
while (ptr[count] != 0) : (count += 1) {}
return count;
}

pub fn cmp(a: *const u8, b: *const u8) i8 {
pub fn cmp(a: [*]const u8, b: [*]const u8) i8 {
var index: usize = 0;
while (a[index] == b[index] and a[index] != 0) : (index += 1) {}
if (a[index] > b[index]) {
Expand All @@ -27,11 +27,11 @@ pub fn cmp(a: *const u8, b: *const u8) i8 {
}
}

pub fn toSliceConst(str: *const u8) []const u8 {
pub fn toSliceConst(str: [*]const u8) []const u8 {
return str[0..len(str)];
}

pub fn toSlice(str: *u8) []u8 {
pub fn toSlice(str: [*]u8) []u8 {
return str[0..len(str)];
}

Expand Down
2 changes: 1 addition & 1 deletion std/zig/parse.zig
Expand Up @@ -3292,7 +3292,7 @@ fn tokenIdToPrefixOp(id: @TagType(Token.Id)) ?ast.Node.PrefixOp.Op {
Token.Id.Minus => ast.Node.PrefixOp.Op{ .Negation = void{} },
Token.Id.MinusPercent => ast.Node.PrefixOp.Op{ .NegationWrap = void{} },
Token.Id.Ampersand => ast.Node.PrefixOp.Op{ .AddressOf = void{} },
Token.Id.Asterisk, Token.Id.AsteriskAsterisk => ast.Node.PrefixOp.Op{
Token.Id.Asterisk, Token.Id.AsteriskAsterisk, Token.Id.BracketStarBracket => ast.Node.PrefixOp.Op{
.PtrType = ast.Node.PrefixOp.PtrInfo{
.align_info = null,
.const_token = null,
Expand Down
7 changes: 7 additions & 0 deletions std/zig/parser_test.zig
@@ -1,3 +1,10 @@
test "zig fmt: pointer of unknown length" {
try testCanonical(
\\fn foo(ptr: [*]u8) void {}
\\
);
}

test "zig fmt: spaces around slice operator" {
try testCanonical(
\\var a = b[c..d];
Expand Down
42 changes: 39 additions & 3 deletions std/zig/tokenizer.zig
Expand Up @@ -143,6 +143,7 @@ pub const Token = struct {
FloatLiteral,
LineComment,
DocComment,
BracketStarBracket,
Keyword_align,
Keyword_and,
Keyword_asm,
Expand Down Expand Up @@ -263,6 +264,8 @@ pub const Tokenizer = struct {
Period,
Period2,
SawAtSign,
LBracket,
LBracketStar,
};

pub fn next(self: *Tokenizer) Token {
Expand Down Expand Up @@ -325,9 +328,7 @@ pub const Tokenizer = struct {
break;
},
'[' => {
result.id = Token.Id.LBracket;
self.index += 1;
break;
state = State.LBracket;
},
']' => {
result.id = Token.Id.RBracket;
Expand Down Expand Up @@ -429,6 +430,28 @@ pub const Tokenizer = struct {
},
},

State.LBracket => switch (c) {
'*' => {
state = State.LBracketStar;
},
else => {
result.id = Token.Id.LBracket;
break;
},
},

State.LBracketStar => switch (c) {
']' => {
result.id = Token.Id.BracketStarBracket;
self.index += 1;
break;
},
else => {
result.id = Token.Id.Invalid;
break;
},
},

State.Ampersand => switch (c) {
'=' => {
result.id = Token.Id.AmpersandEqual;
Expand Down Expand Up @@ -1008,6 +1031,7 @@ pub const Tokenizer = struct {
State.CharLiteralEscape2,
State.CharLiteralEnd,
State.StringLiteralBackslash,
State.LBracketStar,
=> {
result.id = Token.Id.Invalid;
},
Expand All @@ -1024,6 +1048,9 @@ pub const Tokenizer = struct {
State.Slash => {
result.id = Token.Id.Slash;
},
State.LBracket => {
result.id = Token.Id.LBracket;
},
State.Zero => {
result.id = Token.Id.IntegerLiteral;
},
Expand Down Expand Up @@ -1142,6 +1169,15 @@ test "tokenizer" {
testTokenize("test", []Token.Id{Token.Id.Keyword_test});
}

test "tokenizer - unknown length pointer" {
testTokenize(
\\[*]u8
, []Token.Id{
Token.Id.BracketStarBracket,
Token.Id.Identifier,
});
}

test "tokenizer - char literal with hex escape" {
testTokenize(
\\'\x1b'
Expand Down

0 comments on commit f06bce5

Please sign in to comment.