Skip to content

Commit

Permalink
Improve invalid character error messages (#566)
Browse files Browse the repository at this point in the history
See #544
  • Loading branch information
tiehuis authored and andrewrk committed Oct 26, 2017
1 parent f4ca348 commit 6663638
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 9 deletions.
56 changes: 47 additions & 9 deletions src/tokenizer.cpp
Expand Up @@ -416,6 +416,44 @@ static void handle_string_escape(Tokenize *t, uint8_t c) {
}
}

static const char* get_escape_shorthand(uint8_t c) {
switch (c) {
case '\0':
return "\\0";
case '\a':
return "\\a";
case '\b':
return "\\b";
case '\t':
return "\\t";
case '\n':
return "\\n";
case '\v':
return "\\v";
case '\f':
return "\\f";
case '\r':
return "\\r";
default:
return nullptr;
}
}

static void invalid_char_error(Tokenize *t, uint8_t c) {
if (c == '\r') {
tokenize_error(t, "invalid carriage return, only '\\n' line endings are supported");
} else if (isprint(c)) {
tokenize_error(t, "invalid character: '%c'", c);
} else {
const char *sh = get_escape_shorthand(c);
if (sh) {
tokenize_error(t, "invalid character: '%s'", sh);
} else {
tokenize_error(t, "invalid character: '\\x%x'", c);
}
}
}

void tokenize(Buf *buf, Tokenization *out) {
Tokenize t = {0};
t.out = out;
Expand Down Expand Up @@ -580,7 +618,7 @@ void tokenize(Buf *buf, Tokenization *out) {
t.state = TokenizeStateSawQuestionMark;
break;
default:
tokenize_error(&t, "invalid character: '%c'", c);
invalid_char_error(&t, c);
}
break;
case TokenizeStateSawQuestionMark:
Expand Down Expand Up @@ -890,7 +928,7 @@ void tokenize(Buf *buf, Tokenization *out) {
t.state = TokenizeStateLineString;
break;
default:
tokenize_error(&t, "invalid character: '%c'", c);
invalid_char_error(&t, c);
break;
}
break;
Expand Down Expand Up @@ -919,7 +957,7 @@ void tokenize(Buf *buf, Tokenization *out) {
break;
case '\\':
if (t.cur_tok->data.str_lit.is_c_str) {
tokenize_error(&t, "invalid character: '%c'", c);
invalid_char_error(&t, c);
}
t.state = TokenizeStateLineStringContinue;
break;
Expand Down Expand Up @@ -949,7 +987,7 @@ void tokenize(Buf *buf, Tokenization *out) {
buf_append_char(&t.cur_tok->data.str_lit.str, '\n');
break;
default:
tokenize_error(&t, "invalid character: '%c'", c);
invalid_char_error(&t, c);
break;
}
break;
Expand Down Expand Up @@ -1073,7 +1111,7 @@ void tokenize(Buf *buf, Tokenization *out) {
handle_string_escape(&t, '\"');
break;
default:
tokenize_error(&t, "invalid character: '%c'", c);
invalid_char_error(&t, c);
}
break;
case TokenizeStateCharCode:
Expand Down Expand Up @@ -1147,7 +1185,7 @@ void tokenize(Buf *buf, Tokenization *out) {
t.state = TokenizeStateStart;
break;
default:
tokenize_error(&t, "invalid character: '%c'", c);
invalid_char_error(&t, c);
}
break;
case TokenizeStateZero:
Expand Down Expand Up @@ -1189,7 +1227,7 @@ void tokenize(Buf *buf, Tokenization *out) {
uint32_t digit_value = get_digit_value(c);
if (digit_value >= t.radix) {
if (is_symbol_char(c)) {
tokenize_error(&t, "invalid character: '%c'", c);
invalid_char_error(&t, c);
}
// not my char
t.pos -= 1;
Expand Down Expand Up @@ -1233,7 +1271,7 @@ void tokenize(Buf *buf, Tokenization *out) {
uint32_t digit_value = get_digit_value(c);
if (digit_value >= t.radix) {
if (is_symbol_char(c)) {
tokenize_error(&t, "invalid character: '%c'", c);
invalid_char_error(&t, c);
}
// not my char
t.pos -= 1;
Expand Down Expand Up @@ -1282,7 +1320,7 @@ void tokenize(Buf *buf, Tokenization *out) {
uint32_t digit_value = get_digit_value(c);
if (digit_value >= t.radix) {
if (is_symbol_char(c)) {
tokenize_error(&t, "invalid character: '%c'", c);
invalid_char_error(&t, c);
}
// not my char
t.pos -= 1;
Expand Down
23 changes: 23 additions & 0 deletions test/compile_errors.zig
Expand Up @@ -2252,4 +2252,27 @@ pub fn addCases(cases: &tests.CompileErrorContext) {
\\}
,
".tmp_source.zig:9:13: error: type '&MyType' does not support field access");

cases.add("carriage return special case",
"fn test() -> bool {\r\n" ++
" true\r\n" ++
"}\r\n"
,
".tmp_source.zig:1:20: error: invalid carriage return, only '\\n' line endings are supported");

cases.add("non-printable invalid character",
"\xff\xfe" ++
\\fn test() -> bool {\r
\\ true\r
\\}
,
".tmp_source.zig:1:1: error: invalid character: '\\xff'");

cases.add("non-printable invalid character with escape alternative",
"fn test() -> bool {\n" ++
"\ttrue\n" ++
"}\n"
,
".tmp_source.zig:2:1: error: invalid character: '\\t'");

}

0 comments on commit 6663638

Please sign in to comment.