Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: crystal-lang/crystal
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 3e144248996b
Choose a base ref
...
head repository: crystal-lang/crystal
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 1d02669e2819
Choose a head ref
  • 3 commits
  • 2 files changed
  • 1 contributor

Commits on Oct 14, 2017

  1. Copy the full SHA
    24791fd View commit details
  2. Clean up String#split specs

    RX14 committed Oct 14, 2017
    Copy the full SHA
    7bdd444 View commit details
  3. Copy the full SHA
    1d02669 View commit details
Showing with 89 additions and 34 deletions.
  1. +18 −5 spec/std/string_spec.cr
  2. +71 −29 src/string.cr
23 changes: 18 additions & 5 deletions spec/std/string_spec.cr
Original file line number Diff line number Diff line change
@@ -820,19 +820,26 @@ describe "String" do
end

describe "split" do
describe "by whitespace" do
it { " foo bar\n\t baz ".split.should eq(["foo", "bar", "baz"]) }
it { " foo bar\n\t baz ".split(1).should eq([" foo bar\n\t baz "]) }
it { " foo bar\n\t baz ".split(2).should eq(["foo", "bar\n\t baz "]) }
it { "日本語 \n\t 日本 \n\n".split.should eq(["日本語", "日本", ""]) }
end

describe "by char" do
it { "".split(',').should eq([""]) }
it { "".split(',', remove_empty: true).should eq([] of String) }
it { "foo,bar,,baz,".split(',').should eq(["foo", "bar", "", "baz", ""]) }
it { "foo,bar,,baz,".split(',', remove_empty: true).should eq(["foo", "bar", "baz"]) }
it { "foo,bar,,baz".split(',').should eq(["foo", "bar", "", "baz"]) }
it { "foo,bar,,baz".split(',', remove_empty: true).should eq(["foo", "bar", "baz"]) }
it { "foo".split(',').should eq(["foo"]) }
it { "foo".split(' ').should eq(["foo"]) }
it { " foo".split(' ').should eq(["", "", "", "foo"]) }
it { "foo ".split(' ').should eq(["foo", "", "", ""]) }
it { " foo bar".split(' ').should eq(["", "", "", "foo", "", "bar"]) }
it { " foo bar\n\t baz ".split(' ').should eq(["", "", "", "foo", "", "", "bar\n\t", "", "baz", "", "", ""]) }
it { " foo bar\n\t baz ".split.should eq(["foo", "bar", "baz"]) }
it { " foo bar\n\t baz ".split(1).should eq([" foo bar\n\t baz "]) }
it { " foo bar\n\t baz ".split(2).should eq(["foo", "bar\n\t baz "]) }
it { " foo bar\n\t baz ".split(" ").should eq(["", "", "", "foo", "", "", "bar\n\t", "", "baz", "", "", ""]) }
it { "foo,bar,baz,qux".split(',', 1).should eq(["foo,bar,baz,qux"]) }
it { "foo,bar,baz,qux".split(',', 3).should eq(["foo", "bar", "baz,qux"]) }
@@ -841,18 +848,20 @@ describe "String" do
it { "foo bar baz qux".split(' ', 3).should eq(["foo", "bar", "baz qux"]) }
it { "foo bar baz qux".split(' ', 30).should eq(["foo", "bar", "baz", "qux"]) }
it { "a,b,".split(',', 3).should eq(["a", "b", ""]) }
it { "日本語 \n\t 日本 \n\n".split.should eq(["日本語", "日本", ""]) }
it { "日本ん語日本ん語".split('ん').should eq(["日本", "語日本", ""]) }
it { "=".split('=').should eq(["", ""]) }
it { "a=".split('=').should eq(["a", ""]) }
it { "=b".split('=').should eq(["", "b"]) }
it { "=".split('=', 2).should eq(["", ""]) }
it { "=".split('=', 2, remove_empty: true).should eq([] of String) }
end

describe "by string" do
it { "".split(",").should eq([""]) }
it { "".split(":-").should eq([""]) }
it { "".split(":-", remove_empty: true).should eq([] of String) }
it { "foo:-bar:-:-baz:-".split(":-").should eq(["foo", "bar", "", "baz", ""]) }
it { "foo:-bar:-:-baz:-".split(":-", remove_empty: true).should eq(["foo", "bar", "baz"]) }
it { "foo:-bar:-:-baz".split(":-").should eq(["foo", "bar", "", "baz"]) }
it { "foo".split(":-").should eq(["foo"]) }
it { "foo".split("").should eq(["f", "o", "o"]) }
@@ -865,11 +874,14 @@ describe "String" do
it { "a=".split("=").should eq(["a", ""]) }
it { "=b".split("=").should eq(["", "b"]) }
it { "=".split("=", 2).should eq(["", ""]) }
it { "=".split("=", 2, remove_empty: true).should eq([] of String) }
end

describe "by regex" do
it { "".split(/\n\t/).should eq([""] of String) }
it { "".split(/\n\t/).should eq([""]) }
it { "".split(/\n\t/, remove_empty: true).should eq([] of String) }
it { "foo\n\tbar\n\t\n\tbaz".split(/\n\t/).should eq(["foo", "bar", "", "baz"]) }
it { "foo\n\tbar\n\t\n\tbaz".split(/\n\t/, remove_empty: true).should eq(["foo", "bar", "baz"]) }
it { "foo\n\tbar\n\t\n\tbaz".split(/(?:\n\t)+/).should eq(["foo", "bar", "baz"]) }
it { "foo,bar".split(/,/, 1).should eq(["foo,bar"]) }
it { "foo,bar,".split(/,/).should eq(["foo", "bar", ""]) }
@@ -888,6 +900,7 @@ describe "String" do
it { "a=".split(/\=/).should eq(["a", ""]) }
it { "=b".split(/\=/).should eq(["", "b"]) }
it { "=".split(/\=/, 2).should eq(["", ""]) }
it { "=".split(/\=/, 2, remove_empty: true).should eq([] of String) }
it { ",".split(/(?:(x)|(,))/).should eq(["", ",", ""]) }

it "keeps groups" do
100 changes: 71 additions & 29 deletions src/string.cr
Original file line number Diff line number Diff line change
@@ -2951,11 +2951,11 @@ class String
!!index(search)
end

# Makes an array by splitting the string on any ASCII whitespace characters
# (and removing that whitespace).
# Makes an array by splitting the string on any amount of ASCII whitespace
# characters (and removing that whitespace).
#
# If *limit* is present, up to *limit* new strings will be created,
# with the entire remainder added to the last string.
# If *limit* is present, up to *limit* new strings will be created, with the
# entire remainder added to the last string.
#
# ```
# old_pond = "
@@ -2974,10 +2974,11 @@ class String
ary
end

# Splits the string after any ASCII whitespace character and yields each part to a block.
# Splits the string after any amount of ASCII whitespace characters and yields
# each non-whitespace part to a block.
#
# If *limit* is present, up to *limit* new strings will be created,
# with the entire remainder added to the last string.
# If *limit* is present, up to *limit* new strings will be created, with the
# entire remainder added to the last string.
#
# ```
# ary = [] of String
@@ -3052,13 +3053,16 @@ class String
# If *limit* is present, up to *limit* new strings will be created,
# with the entire remainder added to the last string.
#
# If *remove_empty* is `true`, any empty strings are removed from the result.
#
# ```
# "foo,bar,baz".split(',') # => ["foo", "bar", "baz"]
# "foo,bar,baz".split(',', 2) # => ["foo", "bar,baz"]
# "foo,,bar,baz".split(',') # => ["foo", "", "bar", "baz"]
# "foo,,bar,baz".split(',', remove_empty: true) # => ["foo", "bar", "baz"]
# "foo,bar,baz".split(',', 2) # => ["foo", "bar,baz"]
# ```
def split(separator : Char, limit = nil)
def split(separator : Char, limit = nil, *, remove_empty = false)
ary = Array(String).new
split(separator, limit) do |string|
split(separator, limit, remove_empty: remove_empty) do |string|
ary << string
end
ary
@@ -3069,18 +3073,29 @@ class String
# If *limit* is present, up to *limit* new strings will be created,
# with the entire remainder added to the last string.
#
# If *remove_empty* is `true`, any empty strings are not yielded.
#
# ```
# ary = [] of String
#
# "foo,bar,baz".split(',') { |string| ary << string }
# "foo,,bar,baz".split(',') { |string| ary << string }
# ary # => ["foo", "", "bar", "baz"]
# ary.clear
#
# "foo,,bar,baz".split(',', remove_empty: true) { |string| ary << string }
# ary # => ["foo", "bar", "baz"]
# ary.clear
#
# "foo,bar,baz".split(',', 2) { |string| ary << string }
# ary # => ["foo", "bar,baz"]
# ```
def split(separator : Char, limit = nil, &block : String -> _)
if empty? || limit && limit <= 1
def split(separator : Char, limit = nil, *, remove_empty = false, &block : String -> _)
if empty?
yield "" unless remove_empty
return
end

if limit && limit <= 1
yield self
return
end
@@ -3092,14 +3107,15 @@ class String
reader.each do |char|
if char == separator
piece_bytesize = reader.pos - byte_offset
yield String.new(to_unsafe + byte_offset, piece_bytesize)
yield String.new(to_unsafe + byte_offset, piece_bytesize) unless remove_empty && piece_bytesize == 0
yielded += 1
byte_offset = reader.pos + reader.current_char_width
break if limit && yielded + 1 == limit
end
end

piece_bytesize = bytesize - byte_offset
return if remove_empty && piece_bytesize == 0
yield String.new(to_unsafe + byte_offset, piece_bytesize)
end

@@ -3110,15 +3126,18 @@ class String
#
# If *separator* is an empty string (`""`), the string will be separated into one-character strings.
#
# If *remove_empty* is `true`, any empty strings are removed from the result.
#
# ```
# long_river_name = "Mississippi"
# long_river_name.split("ss") # => ["Mi", "i", "ippi"]
# long_river_name.split("i") # => ["M", "ss", "ss", "pp", ""]
# long_river_name.split("") # => ["M", "i", "s", "s", "i", "s", "s", "i", "p", "p", "i"]
# long_river_name.split("ss") # => ["Mi", "i", "ippi"]
# long_river_name.split("i") # => ["M", "ss", "ss", "pp", ""]
# long_river_name.split("i", remove_empty: true) # => ["M", "ss", "ss", "pp"]
# long_river_name.split("") # => ["M", "i", "s", "s", "i", "s", "s", "i", "p", "p", "i"]
# ```
def split(separator : String, limit = nil)
def split(separator : String, limit = nil, *, remove_empty = false)
ary = Array(String).new
split(separator, limit) do |string|
split(separator, limit, remove_empty: remove_empty) do |string|
ary << string
end
ary
@@ -3131,6 +3150,8 @@ class String
#
# If *separator* is an empty string (`""`), the string will be separated into one-character strings.
#
# If *remove_empty* is `true`, any empty strings are removed from the result.
#
# ```
# ary = [] of String
# long_river_name = "Mississippi"
@@ -3143,11 +3164,20 @@ class String
# ary # => ["M", "ss", "ss", "pp", ""]
# ary.clear
#
# long_river_name.split("i", remove_empty: true) { |s| ary << s }
# ary # => ["M", "ss", "ss", "pp"]
# ary.clear
#
# long_river_name.split("") { |s| ary << s }
# ary # => ["M", "i", "s", "s", "i", "s", "s", "i", "p", "p", "i"]
# ```
def split(separator : String, limit = nil, &block : String -> _)
if empty? || (limit && limit <= 1)
def split(separator : String, limit = nil, *, remove_empty = false, &block : String -> _)
if empty?
yield "" unless remove_empty
return
end

if limit && limit <= 1
yield self
return
end
@@ -3171,7 +3201,9 @@ class String
if (to_unsafe + i).memcmp(separator.to_unsafe, separator_bytesize) == 0
piece_bytesize = i - byte_offset
piece_size = single_byte_optimizable ? piece_bytesize : 0
yield String.new(to_unsafe + byte_offset, piece_bytesize, piece_size)
unless remove_empty && piece_bytesize == 0
yield String.new(to_unsafe + byte_offset, piece_bytesize, piece_size)
end
yielded += 1
byte_offset = i + separator_bytesize
i += separator_bytesize - 1
@@ -3181,6 +3213,7 @@ class String
end

piece_bytesize = bytesize - byte_offset
return if remove_empty && piece_bytesize == 0
piece_size = single_byte_optimizable ? piece_bytesize : 0
yield String.new(to_unsafe + byte_offset, piece_bytesize, piece_size)
end
@@ -3192,6 +3225,8 @@ class String
#
# If *separator* is an empty regex (`//`), the string will be separated into one-character strings.
#
# If *remove_empty* is `true`, any empty strings are removed from the result.
#
# ```
# ary = [] of String
# long_river_name = "Mississippi"
@@ -3203,9 +3238,9 @@ class String
# long_river_name.split(//) { |s| ary << s }
# ary # => ["M", "i", "s", "s", "i", "s", "s", "i", "p", "p", "i"]
# ```
def split(separator : Regex, limit = nil)
def split(separator : Regex, limit = nil, *, remove_empty = false)
ary = Array(String).new
split(separator, limit) do |string|
split(separator, limit, remove_empty: remove_empty) do |string|
ary << string
end
ary
@@ -3218,13 +3253,20 @@ class String
#
# If *separator* is an empty regex (`//`), the string will be separated into one-character strings.
#
# If *remove_empty* is `true`, any empty strings are removed from the result.
#
# ```
# long_river_name = "Mississippi"
# long_river_name.split(/s+/) # => ["Mi", "i", "ippi"]
# long_river_name.split(//) # => ["M", "i", "s", "s", "i", "s", "s", "i", "p", "p", "i"]
# ```
def split(separator : Regex, limit = nil, &block : String -> _)
if empty? || (limit && limit <= 1)
def split(separator : Regex, limit = nil, *, remove_empty = false, &block : String -> _)
if empty?
yield "" unless remove_empty
return
end

if limit && limit <= 1
yield self
return
end
@@ -3249,7 +3291,7 @@ class String
else
slice_size = index - slice_offset

yield byte_slice(slice_offset, slice_size)
yield byte_slice(slice_offset, slice_size) unless remove_empty && slice_size == 0
count += 1

1.upto(match.size) do |i|
@@ -3265,7 +3307,7 @@ class String
break if match_offset >= bytesize
end

yield byte_slice(slice_offset)
yield byte_slice(slice_offset) unless remove_empty && slice_offset == bytesize
end

private def split_by_empty_separator(limit, &block : String -> _)