Skip to content

Commit

Permalink
Merge pull request #2816 from crystal-lang/feature/unsafe_chr
Browse files Browse the repository at this point in the history
Make `Int#chr` raise on out of range, and added `Int#unsafe_chr`
Ary Borenszweig authored Jun 12, 2016

Verified

This commit was signed with the committer’s verified signature.
edolstra Eelco Dolstra
2 parents 5626e12 + 0b58490 commit b64fefd
Showing 19 changed files with 130 additions and 73 deletions.
4 changes: 2 additions & 2 deletions spec/std/char_spec.cr
Original file line number Diff line number Diff line change
@@ -210,7 +210,7 @@ describe "Char" do

it "raises on codepoint bigger than 0x10ffff" do
expect_raises InvalidByteSequenceError do
(0x10ffff + 1).chr.bytesize
(0x10ffff + 1).unsafe_chr.bytesize
end
end
end
@@ -261,7 +261,7 @@ describe "Char" do

it "raises on codepoint bigger than 0x10ffff when doing each_byte" do
expect_raises InvalidByteSequenceError do
(0x10ffff + 1).chr.each_byte { |b| }
(0x10ffff + 1).unsafe_chr.each_byte { |b| }
end
end

15 changes: 15 additions & 0 deletions spec/std/int_spec.cr
Original file line number Diff line number Diff line change
@@ -437,4 +437,19 @@ describe "Int" do
value.clone.should eq(value)
end
end

it "#chr" do
65.chr.should eq('A')

{% if Crystal::VERSION == "0.18.0" %}
expect_raises(ArgumentError, "#{0x10ffff + 1} out of char range") do
(0x10ffff + 1).chr
end
{% end %}
end

it "#unsafe_chr" do
65.unsafe_chr.should eq('A')
(0x10ffff + 1).unsafe_chr.ord.should eq(0x10ffff + 1)
end
end
2 changes: 1 addition & 1 deletion src/base64.cr
Original file line number Diff line number Diff line change
@@ -225,7 +225,7 @@ module Base64

# :nodoc:
DECODE_TABLE = Array(Int8).new(256) do |i|
case i.chr
case i.unsafe_chr
when 'A'..'Z' then (i - 0x41).to_i8
when 'a'..'z' then (i - 0x47).to_i8
when '0'..'9' then (i + 0x04).to_i8
2 changes: 1 addition & 1 deletion src/big/big_float.cr
Original file line number Diff line number Diff line change
@@ -178,7 +178,7 @@ struct BigFloat < Float
length.times do |i|
next if cstr[i] == 45 # '-'
io << '.' if i == expptr
io << cstr[i].chr
io << cstr[i].unsafe_chr
end
(expptr - length).times { io << 0 } if expptr > 0
end
14 changes: 10 additions & 4 deletions src/char.cr
Original file line number Diff line number Diff line change
@@ -52,6 +52,12 @@ struct Char
# The character representing the end of a C string.
ZERO = '\0'

# The maximum character
MAX = 0x10ffff.unsafe_chr

# The maximum valid codepoint for a character
MAX_CODEPOINT = 0x10ffff

# Returns the difference of the codepoint values of this char and *other*.
#
# ```
@@ -259,7 +265,7 @@ struct Char
# ```
def downcase
if uppercase?
(self.ord + 32).chr
(self.ord + 32).unsafe_chr
else
self
end
@@ -274,7 +280,7 @@ struct Char
# ```
def upcase
if lowercase?
(self.ord - 32).chr
(self.ord - 32).unsafe_chr
else
self
end
@@ -506,7 +512,7 @@ struct Char
yield (0xe0 | (c >> 12)).to_u8
yield (0x80 | ((c >> 6) & 0x3f)).to_u8
yield (0x80 | (c & 0x3f)).to_u8
elsif c <= 0x10ffff
elsif c <= MAX_CODEPOINT
# 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
yield (0xf0 | (c >> 18)).to_u8
yield (0x80 | ((c >> 12) & 0x3f)).to_u8
@@ -536,7 +542,7 @@ struct Char
elsif c <= 0xffff
# 1110xxxx 10xxxxxx 10xxxxxx
3
elsif c <= 0x10ffff
elsif c <= MAX_CODEPOINT
# 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
4
else
4 changes: 2 additions & 2 deletions src/char/reader.cr
Original file line number Diff line number Diff line change
@@ -104,7 +104,7 @@ struct Char
end

decode_char_at(next_pos) do |code_point, width|
code_point.chr
code_point.unsafe_chr
end
end

@@ -212,7 +212,7 @@ struct Char
decode_char_at(@pos) do |code_point, width|
@current_char_width = width
@end = @pos == @string.bytesize
@current_char = code_point.chr
@current_char = code_point.unsafe_chr
end
end

2 changes: 1 addition & 1 deletion src/compiler/crystal/semantic/main_visitor.cr
Original file line number Diff line number Diff line change
@@ -2009,7 +2009,7 @@ module Crystal
when "to_u64" then mod.uint64
when "to_f", "to_f64" then mod.float64
when "to_f32" then mod.float32
when "chr" then mod.char
when "unsafe_chr", "chr" then mod.char # TODO: remove "chr" after 0.18.0
else
raise "Bug: unknown cast operator #{typed_def.name}"
end
2 changes: 1 addition & 1 deletion src/csv/builder.cr
Original file line number Diff line number Diff line change
@@ -135,7 +135,7 @@ class CSV::Builder

private def needs_quotes?(value)
value.each_byte do |byte|
case byte.chr
case byte.unsafe_chr
when ',', '\n', '"'
return true
end
2 changes: 1 addition & 1 deletion src/http/common.cr
Original file line number Diff line number Diff line change
@@ -85,7 +85,7 @@ module HTTP

# Get where the header value starts (skip space)
middle_index = colon_index + 1
while middle_index < bytesize && cstr[middle_index].chr.whitespace?
while middle_index < bytesize && cstr[middle_index].unsafe_chr.whitespace?
middle_index += 1
end

4 changes: 2 additions & 2 deletions src/http/headers.cr
Original file line number Diff line number Diff line change
@@ -40,7 +40,7 @@ struct HTTP::Headers
end

private def normalize_byte(byte)
char = byte.chr
char = byte.unsafe_chr

return byte if char.lowercase? || char == '-' # Optimize the common case
return byte + 32 if char.uppercase?
@@ -268,7 +268,7 @@ struct HTTP::Headers
# are '\t', ' ', all US-ASCII printable characters and
# range from '\x80' to '\xff' (but the last is obsoleted.)
value.each_byte do |byte|
char = byte.chr
char = byte.unsafe_chr
next if char == '\t'
if char < ' ' || char > '\u{ff}' || char == '\u{7e}'
raise ArgumentError.new("header content contains invalid character #{char.inspect}")
2 changes: 1 addition & 1 deletion src/http/params.cr
Original file line number Diff line number Diff line change
@@ -35,7 +35,7 @@ module HTTP
bytesize = query.bytesize
while i < bytesize
byte = query.unsafe_byte_at(i)
char = byte.chr
char = byte.unsafe_chr

case char
when '='
16 changes: 16 additions & 0 deletions src/int.cr
Original file line number Diff line number Diff line change
@@ -60,6 +60,22 @@ struct Int
alias Unsigned = UInt8 | UInt16 | UInt32 | UInt64
alias Primitive = Signed | Unsigned

{% if Crystal::VERSION == "0.18.0" %}
# Returns a `Char` that has the unicode codepoint of *self*.
#
# Raises `ArgumentError` if this integer's value doesn't fit a char's range (`0..0x10ffff`).
#
# ```
# 97.chr # => 'a'
# ```
def chr
unless 0 <= self <= Char::MAX_CODEPOINT
raise ArgumentError.new("#{self} out of char range")
end
unsafe_chr
end
{% end %}

def ~
self ^ -1
end
10 changes: 5 additions & 5 deletions src/io.cr
Original file line number Diff line number Diff line change
@@ -392,16 +392,16 @@ module IO
return nil unless first

first = first.to_u32
return first.chr, 1 if first < 0x80
return first.unsafe_chr, 1 if first < 0x80

second = read_utf8_masked_byte
return ((first & 0x1f) << 6 | second).chr, 2 if first < 0xe0
return ((first & 0x1f) << 6 | second).unsafe_chr, 2 if first < 0xe0

third = read_utf8_masked_byte
return ((first & 0x0f) << 12 | (second << 6) | third).chr, 3 if first < 0xf0
return ((first & 0x0f) << 12 | (second << 6) | third).unsafe_chr, 3 if first < 0xf0

fourth = read_utf8_masked_byte
return ((first & 0x07) << 18 | (second << 12) | (third << 6) | fourth).chr, 4 if first < 0xf8
return ((first & 0x07) << 18 | (second << 12) | (third << 6) | fourth).unsafe_chr, 4 if first < 0xf8

raise InvalidByteSequenceError.new("Unexpected byte 0x#{first.to_s(16)} in UTF-8 byte sequence")
end
@@ -628,7 +628,7 @@ module IO

# One byte: use gets(Char)
if delimiter.bytesize == 1
return gets(delimiter.unsafe_byte_at(0).chr)
return gets(delimiter.unsafe_byte_at(0).unsafe_chr)
end

# One char: use gets(Char)
8 changes: 4 additions & 4 deletions src/io/buffered.cr
Original file line number Diff line number Diff line change
@@ -124,25 +124,25 @@ module IO::Buffered
first = @in_buffer_rem[0].to_u32
if first < 0x80
@in_buffer_rem += 1
return first.chr, 1
return first.unsafe_chr, 1
end

second = (@in_buffer_rem[1] & 0x3f).to_u32
if first < 0xe0
@in_buffer_rem += 2
return ((first & 0x1f) << 6 | second).chr, 2
return ((first & 0x1f) << 6 | second).unsafe_chr, 2
end

third = (@in_buffer_rem[2] & 0x3f).to_u32
if first < 0xf0
@in_buffer_rem += 3
return ((first & 0x0f) << 12 | (second << 6) | third).chr, 3
return ((first & 0x0f) << 12 | (second << 6) | third).unsafe_chr, 3
end

fourth = (@in_buffer_rem[3] & 0x3f).to_u32
if first < 0xf8
@in_buffer_rem += 4
return ((first & 0x07) << 18 | (second << 12) | (third << 6) | fourth).chr, 4
return ((first & 0x07) << 18 | (second << 12) | (third << 6) | fourth).unsafe_chr, 4
end

raise InvalidByteSequenceError.new("Unexpected byte 0x#{first.to_s(16)} in UTF-8 byte sequence")
36 changes: 18 additions & 18 deletions src/markdown/parser.cr
Original file line number Diff line number Diff line change
@@ -72,7 +72,7 @@ class Markdown::Parser
bytesize = line.bytesize
str = line.to_unsafe
pos = level
while pos < bytesize && str[pos].chr.whitespace?
while pos < bytesize && str[pos].unsafe_chr.whitespace?
pos += 1
end

@@ -292,7 +292,7 @@ class Markdown::Parser
str = line.to_unsafe
pos = 0

while pos < bytesize && str[pos].chr.whitespace?
while pos < bytesize && str[pos].unsafe_chr.whitespace?
pos += 1
end

@@ -306,9 +306,9 @@ class Markdown::Parser
last_is_space = true

while pos < bytesize
case str[pos].chr
case str[pos].unsafe_chr
when '*'
if pos + 1 < bytesize && str[pos + 1].chr == '*'
if pos + 1 < bytesize && str[pos + 1].unsafe_chr == '*'
if two_stars || has_closing?('*', 2, str, (pos + 2), bytesize)
@renderer.text line.byte_slice(cursor, pos - cursor)
pos += 1
@@ -331,7 +331,7 @@ class Markdown::Parser
one_star = !one_star
end
when '_'
if pos + 1 < bytesize && str[pos + 1].chr == '_'
if pos + 1 < bytesize && str[pos + 1].unsafe_chr == '_'
if two_underscores || (last_is_space && has_closing?('_', 2, str, (pos + 2), bytesize))
@renderer.text line.byte_slice(cursor, pos - cursor)
pos += 1
@@ -401,7 +401,7 @@ class Markdown::Parser
in_link = false
end
end
last_is_space = pos < bytesize && str[pos].chr.whitespace?
last_is_space = pos < bytesize && str[pos].unsafe_chr.whitespace?
pos += 1
end

@@ -419,17 +419,17 @@ class Markdown::Parser
return false unless idx

if count == 2
return false unless idx + 1 < bytesize && str[idx + 1].chr == char
return false unless idx + 1 < bytesize && str[idx + 1].unsafe_chr == char
end

!str[idx - 1].chr.whitespace?
!str[idx - 1].unsafe_chr.whitespace?
end

def check_link(str, pos, bytesize)
# We need to count nested brackets to do it right
bracket_count = 1
while pos < bytesize
case str[pos].chr
case str[pos].unsafe_chr
when '['
bracket_count += 1
when ']'
@@ -477,7 +477,7 @@ class Markdown::Parser
bytesize = line.bytesize
str = line.to_unsafe
pos = 0
while pos < bytesize && pos < 6 && str[pos].chr == '#'
while pos < bytesize && pos < 6 && str[pos].unsafe_chr == '#'
pos += 1
end
pos == 0 ? nil : pos
@@ -487,7 +487,7 @@ class Markdown::Parser
bytesize = line.bytesize
str = line.to_unsafe
pos = 0
while pos < bytesize && pos < 4 && str[pos].chr.whitespace?
while pos < bytesize && pos < 4 && str[pos].unsafe_chr.whitespace?
pos += 1
end

@@ -502,17 +502,17 @@ class Markdown::Parser
bytesize = line.bytesize
str = line.to_unsafe
pos = 0
while pos < bytesize && str[pos].chr.whitespace?
while pos < bytesize && str[pos].unsafe_chr.whitespace?
pos += 1
end

return false unless pos < bytesize
return false unless prefix ? str[pos].chr == prefix : (str[pos].chr == '*' || str[pos].chr == '-' || str[pos].chr == '+')
return false unless prefix ? str[pos].unsafe_chr == prefix : (str[pos].unsafe_chr == '*' || str[pos].unsafe_chr == '-' || str[pos].unsafe_chr == '+')

pos += 1

return false unless pos < bytesize
str[pos].chr.whitespace?
str[pos].unsafe_chr.whitespace?
end

def previous_line_is_not_intended_and_starts_with_bullet_list_marker?(prefix)
@@ -535,19 +535,19 @@ class Markdown::Parser
bytesize = line.bytesize
str = line.to_unsafe
pos = 0
while pos < bytesize && str[pos].chr.whitespace?
while pos < bytesize && str[pos].unsafe_chr.whitespace?
pos += 1
end

return false unless pos < bytesize
return false unless str[pos].chr.digit?
return false unless str[pos].unsafe_chr.digit?

while pos < bytesize && str[pos].chr.digit?
while pos < bytesize && str[pos].unsafe_chr.digit?
pos += 1
end

return false unless pos < bytesize
str[pos].chr == '.'
str[pos].unsafe_chr == '.'
end

def next_lines_empty_of_code?
Loading

0 comments on commit b64fefd

Please sign in to comment.