Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: jruby/jruby
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 8bfaa93acb26
Choose a base ref
...
head repository: jruby/jruby
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 5791b43af762
Choose a head ref
  • 2 commits
  • 7 files changed
  • 1 contributor

Commits on Mar 11, 2015

  1. [Truffle] Hard-code the Kernel#` return string to UTF-8 (instead of U…

    …S-ASCII).
    
    Both are wrong, since we should be using the default external encoding, but UTF-8 is more flexible.
    nirvdrum committed Mar 11, 2015
    Copy the full SHA
    0ea625d View commit details
  2. Copy the full SHA
    5791b43 View commit details
16 changes: 0 additions & 16 deletions spec/truffle/tags/core/string/split_tags.txt
Original file line number Diff line number Diff line change
@@ -1,23 +1,7 @@
fails:String#split with String throws an ArgumentError if the pattern is not a valid string
fails:String#split with String splits on multibyte characters
fails:String#split with String suppresses trailing empty fields when limit isn't given or 0
fails:String#split with String returns an array with one entry if limit is 1: the original string
fails:String#split with String returns at most limit fields when limit > 1
fails:String#split with String doesn't suppress or limit fields when limit is negative
fails:String#split with String defaults to $; when string isn't given or nil
fails:String#split with String ignores leading and continuous whitespace when string is a single space
fails:String#split with String splits between characters when its argument is an empty string
fails:String#split with String tries converting its pattern argument to a string via to_str
fails:String#split with String tries converting limit to an integer via to_int
fails:String#split with String returns subclass instances based on self
fails:String#split with String taints the resulting strings if self is tainted
fails:String#split with Regexp defaults to $; when regexp isn't given or nil
fails:String#split with Regexp includes all captures in the result array
fails:String#split with Regexp does not include non-matching captures in the result array
fails:String#split with Regexp tries converting limit to an integer via to_int
fails:String#split with Regexp returns subclass instances based on self
fails:String#split with Regexp taints the resulting strings if self is tainted
fails:String#split with Regexp taints an empty string if self is tainted
fails:String#split with Regexp retains the encoding of the source string
fails:String#split with Regexp splits a string on each character for a multibyte encoding and empty split
fails:String#split with Regexp returns an ArgumentError if an invalid UTF-8 string is supplied
Original file line number Diff line number Diff line change
@@ -114,7 +114,8 @@ public RubyString backtick(RubyString command) {
throw new RuntimeException(e);
}

return context.makeString(resultBuilder.toString());
// TODO (nirvdrum 10-Mar-15) This should be using the default external encoding, rather than hard-coded to UTF-8.
return context.makeString(resultBuilder.toString(), RubyEncoding.getEncoding("UTF-8").getEncoding());
}

}
Original file line number Diff line number Diff line change
@@ -1601,58 +1601,6 @@ public int size(RubyString string) {
}
}

@CoreMethod(names = "split", optional = 2, lowerFixnumParameters = 2, taintFromSelf = true)
public abstract static class SplitNode extends CoreMethodNode {

public SplitNode(RubyContext context, SourceSection sourceSection) {
super(context, sourceSection);
}

public SplitNode(SplitNode prev) {
super(prev);
}

@Specialization
public RubyArray split(RubyString string, RubyString sep, @SuppressWarnings("unused") UndefinedPlaceholder limit) {
notDesignedForCompilation();

return splitHelper(string, sep.toString());
}

@Specialization
public RubyArray split(RubyString string, RubyRegexp sep, @SuppressWarnings("unused") UndefinedPlaceholder limit) {
notDesignedForCompilation();

return RubyArray.fromObjects(getContext().getCoreLibrary().getArrayClass(), (Object[]) sep.split(string, false, 0));
}

@Specialization
public RubyArray split(RubyString string, RubyRegexp sep, int limit) {
notDesignedForCompilation();

return RubyArray.fromObjects(getContext().getCoreLibrary().getArrayClass(), (Object[]) sep.split(string, limit > 0, limit));
}

@Specialization
public RubyArray split(RubyString string, @SuppressWarnings("unused") UndefinedPlaceholder sep, @SuppressWarnings("unused") UndefinedPlaceholder limit) {
notDesignedForCompilation();

return splitHelper(string, " ");
}

private RubyArray splitHelper(RubyString string, String sep) {
final String[] components = string.toString().split(Pattern.quote(sep));

final Object[] objects = new Object[components.length];

for (int n = 0; n < objects.length; n++) {
objects[n] = getContext().makeString(string.getLogicalClass(), components[n]);
}

return RubyArray.fromObjects(getContext().getCoreLibrary().getArrayClass(), objects);
}
}

@CoreMethod(names = "succ", taintFromSelf = true)
public abstract static class SuccNode extends CoreMethodNode {

Original file line number Diff line number Diff line change
@@ -60,13 +60,99 @@
import org.jruby.util.ConvertBytes;
import org.jruby.util.StringSupport;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

/**
* Rubinius primitives associated with the Ruby {@code String} class.
*/
public abstract class StringPrimitiveNodes {

@RubiniusPrimitive(name = "string_awk_split")
public static abstract class StringAwkSplitPrimitiveNode extends RubiniusPrimitiveNode {

@Child private TaintResultNode taintResultNode;

public StringAwkSplitPrimitiveNode(RubyContext context, SourceSection sourceSection) {
super(context, sourceSection);
taintResultNode = new TaintResultNode(context, sourceSection, true, new int[]{});
}

public StringAwkSplitPrimitiveNode(StringAwkSplitPrimitiveNode prev) {
super(prev);
taintResultNode = prev.taintResultNode;
}

@Specialization
public RubyArray stringAwkSplit(RubyString string, int lim) {
notDesignedForCompilation();

final List<RubyString> ret = new ArrayList<>();
final ByteList value = string.getBytes();
final boolean limit = lim > 0;
int i = lim > 0 ? 1 : 0;

byte[]bytes = value.getUnsafeBytes();
int p = value.getBegin();
int ptr = p;
int len = value.getRealSize();
int end = p + len;
Encoding enc = value.getEncoding();
boolean skip = true;

int e = 0, b = 0;
final boolean singlebyte = StringSupport.isSingleByteOptimizable(string, enc);
while (p < end) {
final int c;
if (singlebyte) {
c = bytes[p++] & 0xff;
} else {
try {
c = StringSupport.codePoint(getContext().getRuntime(), enc, bytes, p, end);
} catch (org.jruby.exceptions.RaiseException ex) {
throw new RaiseException(getContext().toTruffle(ex.getException(), this));
}

p += StringSupport.length(enc, bytes, p, end);
}

if (skip) {
if (enc.isSpace(c)) {
b = p - ptr;
} else {
e = p - ptr;
skip = false;
if (limit && lim <= i) break;
}
} else {
if (enc.isSpace(c)) {
ret.add(makeString(string, b, e - b));
skip = true;
b = p - ptr;
if (limit) i++;
} else {
e = p - ptr;
}
}
}

if (len > 0 && (limit || len > b || lim < 0)) ret.add(makeString(string, b, len - b));

return RubyArray.fromObjects(getContext().getCoreLibrary().getArrayClass(), ret.toArray());
}

private RubyString makeString(RubyString source, int index, int length) {
final ByteList bytes = new ByteList(source.getBytes(), index, length);
bytes.setEncoding(source.getBytes().getEncoding());

final RubyString ret = getContext().makeString(source.getLogicalClass(), bytes);
taintResultNode.maybeTaint(source, ret);

return ret;
}
}

@RubiniusPrimitive(name = "string_byte_substring")
public static abstract class StringByteSubstringPrimitiveNode extends RubiniusPrimitiveNode {

1 change: 1 addition & 0 deletions truffle/src/main/ruby/core.rb
Original file line number Diff line number Diff line change
@@ -67,6 +67,7 @@
require_relative 'core/rubinius/common/symbol'
require_relative 'core/rubinius/common/regexp'
require_relative 'core/rubinius/common/signal'
require_relative 'core/rubinius/common/splitter'
require_relative 'core/rubinius/common/mutex'
require_relative 'core/rubinius/common/throw_catch'
require_relative 'core/rubinius/common/time'
180 changes: 180 additions & 0 deletions truffle/src/main/ruby/core/rubinius/common/splitter.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
# Copyright (c) 2007-2014, Evan Phoenix and contributors
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * Neither the name of Rubinius nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

module Rubinius
class Splitter
def self.split_characters(string, pattern, limit, tail_empty)
if limit
string.chars.take(limit - 1) << string[(limit - 1)..-1]
else
ret = string.chars.to_a
# Use #byteslice because it returns the right class and taints
# automatically. This is just appending a "", which is this
# strange protocol if a negative limit is passed in
ret << string.byteslice(0,0) if tail_empty
ret
end
end

def self.valid_encoding?(string)
raise ArgumentError, "invalid byte sequence in #{string.encoding.name}" unless string.valid_encoding?
end

def self.split(string, pattern, limit)
# Odd edge case
return [] if string.empty?

tail_empty = false

if undefined.equal?(limit)
limited = false
else
limit = Rubinius::Type.coerce_to limit, Fixnum, :to_int

if limit > 0
return [string.dup] if limit == 1
limited = true
else
tail_empty = true
limited = false
end
end

pattern ||= ($; || " ")

if pattern == ' '
if limited
lim = limit
elsif tail_empty
lim = -1
else
lim = 0
end

return Rubinius.invoke_primitive :string_awk_split, string, lim
elsif pattern.kind_of?(Regexp)
else
pattern = StringValue(pattern) unless pattern.kind_of?(String)

valid_encoding?(string)
valid_encoding?(pattern)

trim_end = !tail_empty || limit == 0

unless limited
if pattern.empty?
if trim_end
return string.chars.to_a
end
else
return split_on_string(string, pattern, trim_end)
end
end

pattern = Regexp.new(Regexp.quote(pattern))
end

# Handle // as a special case.
if pattern.source.empty?
return split_characters(string, pattern, limited && limit, tail_empty)
end

start = 0
ret = []

last_match = nil
last_match_end = 0

while match = pattern.match_from(string, start)
break if limited && limit - ret.size <= 1

collapsed = match.collapsing?

unless collapsed && (match.full.at(0) == last_match_end)
ret << match.pre_match_from(last_match_end)

# length > 1 means there are captures
if match.length > 1
ret.concat(match.captures.compact)
end
end

start = match.full.at(1)
if collapsed
start += 1
end

last_match = match
last_match_end = last_match.full.at(1)
end

if last_match
ret << last_match.post_match
elsif ret.empty?
ret << string.dup
end

# Trim from end
if undefined.equal?(limit) || limit == 0
while s = ret.at(-1) and s.empty?
ret.pop
end
end

ret
end

def self.split_on_string(string, pattern, trim_end)
pos = 0

ret = []

pat_size = pattern.bytesize
str_size = string.bytesize

while pos < str_size
nxt = string.find_string(pattern, pos)
break unless nxt

match_size = nxt - pos
ret << string.byteslice(pos, match_size)

pos = nxt + pat_size
end

# No more separators, but we need to grab the last part still.
ret << string.byteslice(pos, str_size - pos)

if trim_end
while s = ret.at(-1) and s.empty?
ret.pop
end
end

ret
end
end
end
4 changes: 4 additions & 0 deletions truffle/src/main/ruby/core/rubinius/common/string.rb
Original file line number Diff line number Diff line change
@@ -48,6 +48,10 @@ def hex
to_inum(16, false)
end

def split(pattern=nil, limit=undefined)
Rubinius::Splitter.split(self, pattern, limit)
end

def chars
if block_given?
each_char do |char|