Skip to content

Commit

Permalink
String: always compute codepoints size when ascii optimization is ava…
Browse files Browse the repository at this point in the history
…ilable

This only implies a first very small slowdown for computing the size, but
after that methods that deal with ASCII-only strings should execute much faster.

As a comparison, for this code:

```
# Build a String at runtime so its size is not known at compile-time
str = String.build do |str|
  50000.times { str << "abc" }
  str << 'x'
end

time = Time.now
a = 0
100_000.times do
  a += str[150000].ord
end
puts a
puts Time.now - time
```

Time before: 13.72s. Time after: 0.0008s.

This could have a huge impact on code that does string indexing
and slicing over a same string that's known to be ASCII only
(for example URIs)
Ary Borenszweig committed Jul 14, 2016
1 parent 351fe7f commit f6874e1
Showing 1 changed file with 15 additions and 19 deletions.
34 changes: 15 additions & 19 deletions src/string.cr
Original file line number Diff line number Diff line change
@@ -647,7 +647,7 @@ class String
#
# Raises `ArgumentError` if `count` is negative.
def [](start : Int, count : Int)
if single_byte_optimizable?
if ascii_only?
return byte_slice(start, count)
end

@@ -728,7 +728,7 @@ class String
end

def at(index : Int)
if single_byte_optimizable?
if ascii_only?
byte = byte_at?(index)
return byte ? byte.unsafe_chr : yield
end
@@ -746,7 +746,7 @@ class String

def byte_slice(start : Int, count : Int)
start += bytesize if start < 0
single_byte_optimizable = single_byte_optimizable?
single_byte_optimizable = ascii_only?

if 0 <= start < bytesize
raise ArgumentError.new "negative count" if count < 0
@@ -929,7 +929,7 @@ class String
return byte_slice(0, bytesize - 2)
end

if to_unsafe[bytesize - 1] < 128 || single_byte_optimizable?
if to_unsafe[bytesize - 1] < 128 || ascii_only?
return byte_slice(0, bytesize - 1)
end

@@ -999,7 +999,7 @@ class String
bytes, count = String.char_bytes_and_bytesize(other)

new_bytesize = bytesize + count
new_size = single_byte_optimizable? ? new_bytesize : 0
new_size = ascii_only? ? new_bytesize : 0

insert_impl(byte_index, bytes.to_unsafe, count, new_bytesize, new_size)
end
@@ -1028,7 +1028,7 @@ class String
raise IndexError.new unless byte_index

new_bytesize = bytesize + other.bytesize
new_size = single_byte_optimizable? && other.single_byte_optimizable? ? new_bytesize : 0
new_size = ascii_only? && other.ascii_only? ? new_bytesize : 0

insert_impl(byte_index, other.to_unsafe, other.bytesize, new_bytesize, new_size)
end
@@ -1418,7 +1418,7 @@ class String
buffer.value = byte
buffer += 1
end
{buffer, single_byte_optimizable? ? bytesize - (to_index - from_index) + 1 : 0}
{buffer, ascii_only? ? bytesize - (to_index - from_index) + 1 : 0}
end
end

@@ -2131,7 +2131,7 @@ class String
# "こんにちは".char_index_to_byte_index(5) # => 15
# ```
def char_index_to_byte_index(index)
if single_byte_optimizable?
if ascii_only?
return 0 <= index <= bytesize ? index : nil
end

@@ -2147,7 +2147,7 @@ class String
# It is valid to pass `bytesize` to *index*, and in this case the answer
# will be the size of this string.
def byte_index_to_char_index(index)
if single_byte_optimizable?
if ascii_only?
return 0 <= index <= bytesize ? index : nil
end

@@ -2188,7 +2188,7 @@ class String
end

ary = Array(String).new
single_byte_optimizable = single_byte_optimizable?
single_byte_optimizable = ascii_only?
index = 0
i = 0
looking_for_space = false
@@ -2250,7 +2250,7 @@ class String
ary = Array(String).new

byte_offset = 0
single_byte_optimizable = single_byte_optimizable?
single_byte_optimizable = ascii_only?

reader = Char::Reader.new(self)
reader.each_with_index do |char, i|
@@ -2296,7 +2296,7 @@ class String
byte_offset = 0
separator_bytesize = separator.bytesize

single_byte_optimizable = single_byte_optimizable?
single_byte_optimizable = ascii_only?

i = 0
stop = bytesize - separator.bytesize + 1
@@ -2724,7 +2724,7 @@ class String
# end
# ```
def each_char
if single_byte_optimizable?
if ascii_only?
each_byte do |byte|
yield byte.unsafe_chr
end
@@ -2976,7 +2976,7 @@ class String
def ends_with?(char : Char)
return false unless bytesize > 0

if char.ord < 0x80 || single_byte_optimizable?
if char.ord < 0x80 || ascii_only?
return to_unsafe[bytesize - 1] == char.ord
end

@@ -3025,11 +3025,7 @@ class String
end

def ascii_only?
@bytesize == 0 || size == @bytesize
end

protected def single_byte_optimizable?
@bytesize == @length
@bytesize == size
end

protected def size_known?

0 comments on commit f6874e1

Please sign in to comment.