Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: jruby/jruby
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: dff5cee6dc79
Choose a base ref
...
head repository: jruby/jruby
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: a795fb2fa7a9
Choose a head ref
  • 4 commits
  • 5 files changed
  • 1 contributor

Commits on Dec 14, 2015

  1. fix tr 'string'.count(u1, u2) ... with multiple non-single byte encod…

    …ing chars
    
    the internal Map (and the dummy marker value) has been changed to IntHashMap 
    this has been done entirely due easier debugging (toString works with IntHashMap)
    kares committed Dec 14, 2015
    Copy the full SHA
    7062b56 View commit details
  2. Copy the full SHA
    644412e View commit details
  3. Copy the full SHA
    f26ebc7 View commit details
  4. Copy the full SHA
    a795fb2 View commit details
67 changes: 36 additions & 31 deletions core/src/main/java/org/jruby/RubyString.java
Original file line number Diff line number Diff line change
@@ -4406,48 +4406,53 @@ public IRubyObject count19(ThreadContext context) {
// MRI: rb_str_count, first half
@JRubyMethod(name = "count")
public IRubyObject count19(ThreadContext context, IRubyObject arg) {
Ruby runtime = context.runtime;

RubyString otherStr = arg.convertToString();
ByteList otherBL = otherStr.getByteList();
Encoding enc = checkEncoding(otherStr);

if (otherBL.length() == 1 && enc.isAsciiCompatible() &&
enc.isReverseMatchAllowed(otherBL.unsafeBytes(), otherBL.begin(), otherBL.begin() + otherBL.getRealSize()) &&
!isCodeRangeBroken()) {
int n = 0;
int[] len_p = {0};
int c = EncodingUtils.encCodepointLength(runtime, otherBL.unsafeBytes(), otherBL.begin(), otherBL.begin() + otherBL.getRealSize(), len_p, enc);
final Ruby runtime = context.runtime;

if (value.length() ==0) return RubyFixnum.zero(runtime);
byte[]bytes = value.unsafeBytes();
int p = value.getBegin();
int end = p + value.length();
while (p < end) {
if ((bytes[p++] & 0xff) == c) n++;
final RubyString countStr = arg.convertToString();
final ByteList countValue = countStr.getByteList();
final Encoding enc = checkEncoding(countStr);

if ( countValue.length() == 1 && enc.isAsciiCompatible() ) {
final byte[] countBytes = countValue.unsafeBytes();
final int begin = countValue.begin(), size = countValue.length();
if ( enc.isReverseMatchAllowed(countBytes, begin, begin + size) && ! isCodeRangeBroken() ) {
if ( value.length() == 0 ) return RubyFixnum.zero(runtime);

int n = 0;
int[] len_p = {0};
int c = EncodingUtils.encCodepointLength(runtime, countBytes, begin, begin + size, len_p, enc);

final byte[] bytes = value.unsafeBytes();
int i = value.begin();
final int end = i + value.length();
while ( i < end ) {
if ( ( bytes[i++] & 0xff ) == c ) n++;
}
return RubyFixnum.newFixnum(runtime, n);
}
return RubyFixnum.newFixnum(runtime, n);
}

final boolean[]table = new boolean[StringSupport.TRANS_SIZE + 1];
StringSupport.TrTables tables = StringSupport.trSetupTable(otherStr.value, context.runtime, table, null, true, enc);
final boolean[] table = new boolean[StringSupport.TRANS_SIZE + 1];
StringSupport.TrTables tables = StringSupport.trSetupTable(countValue, runtime, table, null, true, enc);
return runtime.newFixnum(StringSupport.countCommon19(value, runtime, table, tables, enc));
}

// MRI: rb_str_count for arity > 1, first half
@JRubyMethod(name = "count", required = 1, rest = true)
public IRubyObject count19(ThreadContext context, IRubyObject[] args) {
Ruby runtime = context.runtime;
if (value.getRealSize() == 0) return RubyFixnum.zero(runtime);
final Ruby runtime = context.runtime;

RubyString otherStr = args[0].convertToString();
Encoding enc = checkEncoding(otherStr);
final boolean[]table = new boolean[StringSupport.TRANS_SIZE + 1];
StringSupport.TrTables tables = StringSupport.trSetupTable(otherStr.value, runtime, table, null, true, enc);
for (int i = 1; i<args.length; i++) {
otherStr = args[i].convertToString();
enc = checkEncoding(otherStr);
tables = StringSupport.trSetupTable(otherStr.value, runtime, table, tables, false, enc);
if ( value.length() == 0 ) return RubyFixnum.zero(runtime);

RubyString countStr = args[0].convertToString();
Encoding enc = checkEncoding(countStr);

final boolean[] table = new boolean[StringSupport.TRANS_SIZE + 1];
StringSupport.TrTables tables = StringSupport.trSetupTable(countStr.value, runtime, table, null, true, enc);
for ( int i = 1; i < args.length; i++ ) {
countStr = args[i].convertToString();
enc = checkEncoding(countStr);
tables = StringSupport.trSetupTable(countStr.value, runtime, table, tables, false, enc);
}

return runtime.newFixnum(StringSupport.countCommon19(value, runtime, table, tables, enc));
50 changes: 28 additions & 22 deletions core/src/main/java/org/jruby/util/StringSupport.java
Original file line number Diff line number Diff line change
@@ -37,15 +37,14 @@
import org.joni.Matcher;
import org.jruby.Ruby;
import org.jruby.RubyArray;
import org.jruby.RubyBasicObject;
import org.jruby.RubyEncoding;
import org.jruby.RubyIO;
import org.jruby.RubyObject;
import org.jruby.RubyString;
import org.jruby.runtime.Block;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;

import org.jruby.util.collections.IntHashMap;
import org.jruby.util.io.EncodingUtils;
import sun.misc.Unsafe;

@@ -859,7 +858,7 @@ public static ByteList dumpCommon(Ruby runtime, ByteList byteList) {
return outBytes;
}

public static boolean isEVStr(byte[]bytes, int p, int end) {
public static boolean isEVStr(byte[] bytes, int p, int end) {
return p < end ? isEVStr(bytes[p] & 0xff) : false;
}

@@ -871,10 +870,10 @@ public static boolean isEVStr(int c) {
* rb_str_count
*/

public static int countCommon19(ByteList value, Ruby runtime, boolean[] table, TrTables tables, Encoding enc) {
final byte[] bytes = value.getUnsafeBytes();
int p = value.getBegin();
final int end = p + value.getRealSize();
public static int countCommon19(ByteList str, Ruby runtime, boolean[] table, TrTables tables, Encoding enc) {
final byte[] bytes = str.getUnsafeBytes();
int p = str.getBegin();
final int end = p + str.getRealSize();

int count = 0;
while (p < end) {
@@ -979,7 +978,7 @@ public TR(ByteList bytes) {
gen = false;
}

byte[] buf;
final byte[] buf;
int p, pend, now, max;
boolean gen;
}
@@ -988,13 +987,14 @@ public TR(ByteList bytes) {
* tr_setup_table
*/
public static final class TrTables {
IntHash<IRubyObject> del, noDel;
IntHashMap<Object> del, noDel; // used as ~ Set
}

private static final Object DUMMY_VALUE = "";

public static TrTables trSetupTable(final ByteList str, final Ruby runtime,
final boolean[] stable, TrTables tables, final boolean first, final Encoding enc) {

IntHash<IRubyObject> table = null, ptable = null;
int i, l[] = {0};
final boolean cflag;

@@ -1019,6 +1019,7 @@ else if (stable[TRANS_SIZE] && !cflag) {
if (tables == null) tables = new TrTables();

byte[] buf = null; // lazy initialized
IntHashMap<Object> table = null, ptable = null;

int c;
while ((c = trNext(tr, runtime, enc)) != -1) {
@@ -1033,22 +1034,29 @@ else if (stable[TRANS_SIZE] && !cflag) {
buf[c & 0xff] = (byte) (cflag ? 0 : 1);
}
else {
final int key = c;

if (table == null && (first || tables.del != null || stable[TRANS_SIZE])) {
if (cflag) {
if ( table == null && (first || tables.del != null || stable[TRANS_SIZE]) ) {
if ( cflag ) {
ptable = tables.noDel;
table = ptable != null ? ptable : new IntHash<IRubyObject>();
table = ptable != null ? ptable : new IntHashMap<>(8);
tables.noDel = table;
}
else {
table = new IntHash<IRubyObject>();
table = new IntHashMap<>(8);
ptable = tables.del;
tables.del = table;
}
}
if (table != null && (ptable == null || (cflag ^ ptable.get(key) == null))) {
table.put(key, RubyBasicObject.NEVER);

if ( table != null ) {
final int key = c;
if ( ptable == null ) table.put(key, DUMMY_VALUE);
else {
if ( cflag ) table.put(key, DUMMY_VALUE);
else {
final boolean val = ptable.get(key) != null;
table.put(key, val ? DUMMY_VALUE : null);
}
}
}
}
}
@@ -1063,17 +1071,15 @@ else if (stable[TRANS_SIZE] && !cflag) {
}
}

if (table == null && !cflag) {
tables.del = null;
}
if ( table == null && ! cflag ) tables.del = null;

return tables;
}

public static boolean trFind(final int c, final boolean[] table, final TrTables tables) {
if (c < TRANS_SIZE) return table[c];

final IntHash<IRubyObject> del = tables.del, noDel = tables.noDel;
final IntHashMap<Object> del = tables.del, noDel = tables.noDel;

if (del != null) {
if (del.get(c) != null &&
9 changes: 9 additions & 0 deletions test/jruby/test_string.rb
Original file line number Diff line number Diff line change
@@ -87,6 +87,15 @@ def test_sub_utf8
do_sub "あ" + EOL + EOL + "あ", 6, 3, 1
end

def test_count
assert_equal(1, "abc\u{3042 3044 3046}".count("\u3042"))
assert_equal(1, "abc\u{3042 3044 3046}".count("\u3044"))
assert_equal(2, "abc\u{3042 3044 3046}".count("abc\u3044", 'bc'))
assert_equal(0, "abc\u{3042 3044 3046}".count("\u3042", "\u3044", "\u3046"))
assert_equal(1, "abc\u{3042 3044 3046}".count("c\u3044\u3042", "\u3042\u3042\u3044", "\u3042"))
assert_equal(2, "abc\u{3042 3044 3046}".count("^\u3042", "^\u3044", "^\u3046", "^c"))
end

private

def do_sub buf, e1, e2, e3
2 changes: 2 additions & 0 deletions test/mri/excludes/TestM17NComb.rb
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
exclude :test_str_intern, "needs investigation"
exclude :test_str_count, "does not raise compatibility error"
exclude :test_str_crypt_nonstrict, "#crypt failing: Errno::EINVAL: Invalid argument"
1 change: 0 additions & 1 deletion test/mri/excludes/TestString.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
exclude :test_count, "needs investigation"
exclude :test_partition, "needs investigation"
exclude :test_rpartition, "needs investigation"
exclude :test_rstrip, "needs investigation"