Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: jruby/jruby
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: de0b131ae841
Choose a base ref
...
head repository: jruby/jruby
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 1e205ced9152
Choose a head ref
  • 4 commits
  • 8 files changed
  • 1 contributor

Commits on Apr 1, 2015

  1. Copy the full SHA
    3692562 View commit details
  2. Copy the full SHA
    efb1aed View commit details
  3. Copy the full SHA
    bb1c024 View commit details
  4. Copy the full SHA
    1e205ce View commit details
225 changes: 5 additions & 220 deletions core/src/main/java/org/jruby/RubyString.java
Original file line number Diff line number Diff line change
@@ -50,7 +50,6 @@
import org.jcodings.specific.UTF32LEEncoding;
import org.jcodings.specific.UTF8Encoding;
import org.jcodings.unicode.UnicodeEncoding;
import org.jcodings.util.IntHash;
import org.joni.Matcher;
import org.joni.Option;
import org.joni.Regex;
@@ -91,7 +90,6 @@
import static org.jruby.util.StringSupport.MBCLEN_CHARFOUND_P;
import static org.jruby.util.StringSupport.MBCLEN_INVALID_P;
import static org.jruby.util.StringSupport.MBCLEN_NEEDMORE_P;
import static org.jruby.util.StringSupport.TRANS_SIZE;
import static org.jruby.util.StringSupport.codeLength;
import static org.jruby.util.StringSupport.codePoint;
import static org.jruby.util.StringSupport.codeRangeScan;
@@ -914,6 +912,7 @@ public final void modify19() {
clearCodeRange();
}

@Override
public void modifyAndKeepCodeRange() {
modify();
keepCodeRange();
@@ -4850,231 +4849,17 @@ private IRubyObject trTrans19(ThreadContext context, IRubyObject src, IRubyObjec
RubyString replStr = repl.convertToString();
ByteList replList = replStr.value;
RubyString srcStr = src.convertToString();
ByteList srcList = srcStr.value;

if (value.getRealSize() == 0) return runtime.getNil();
if (replList.getRealSize() == 0) return delete_bang19(context, src);

int cr = getCodeRange();
Encoding e1 = checkEncoding(srcStr);
Encoding e2 = checkEncoding(replStr);
Encoding enc = e1 == e2 ? e1 : srcStr.checkEncoding(replStr);

final StringSupport.TR trSrc = new StringSupport.TR(srcList);
boolean cflag = false;
int[] l = {0};

if (value.getRealSize() > 1 &&
EncodingUtils.encAscget(trSrc.buf, trSrc.p, trSrc.pend, l, enc) == '^' &&
trSrc.p + 1 < trSrc.pend){
cflag = true;
trSrc.p++;
}

int c, c0, last = 0;
final int[]trans = new int[StringSupport.TRANS_SIZE];
final StringSupport.TR trRepl = new StringSupport.TR(replList);
boolean modify = false;
IntHash<Integer> hash = null;
boolean singlebyte = singleByteOptimizable();

if (cflag) {
for (int i=0; i< StringSupport.TRANS_SIZE; i++) {
trans[i] = 1;
}

while ((c = StringSupport.trNext(trSrc, runtime, enc)) != -1) {
if (c < StringSupport.TRANS_SIZE) {
trans[c] = -1;
} else {
if (hash == null) hash = new IntHash<Integer>();
hash.put(c, 1); // QTRUE
}
}
while ((c = StringSupport.trNext(trRepl, runtime, enc)) != -1) {} /* retrieve last replacer */
last = trRepl.now;
for (int i=0; i< StringSupport.TRANS_SIZE; i++) {
if (trans[i] != -1) {
trans[i] = last;
}
}
} else {
for (int i=0; i< StringSupport.TRANS_SIZE; i++) {
trans[i] = -1;
}

while ((c = StringSupport.trNext(trSrc, runtime, enc)) != -1) {
int r = StringSupport.trNext(trRepl, runtime, enc);
if (r == -1) r = trRepl.now;
if (c < StringSupport.TRANS_SIZE) {
trans[c] = r;
if (codeLength(enc, r) != 1) singlebyte = false;
} else {
if (hash == null) hash = new IntHash<Integer>();
hash.put(c, r);
}
}
}

if (cr == CR_VALID) {
cr = CR_7BIT;
}
modifyAndKeepCodeRange();
int s = value.getBegin();
int send = s + value.getRealSize();
byte sbytes[] = value.getUnsafeBytes();

if (sflag) {
int clen, tlen;
int max = value.getRealSize();
int save = -1;
byte[] buf = new byte[max];
int t = 0;
while (s < send) {
boolean mayModify = false;
c0 = c = codePoint(runtime, e1, sbytes, s, send);
clen = codeLength(e1, c);
tlen = enc == e1 ? clen : codeLength(enc, c);
s += clen;

if (c < TRANS_SIZE) {
c = trCode(c, trans, hash, cflag, last, false);
} else if (hash != null) {
Integer tmp = hash.get(c);
if (tmp == null) {
if (cflag) {
c = last;
} else {
c = -1;
}
} else if (cflag) {
c = -1;
} else {
c = tmp;
}
} else {
c = -1;
}
CodeRangeable ret = StringSupport.trTransHelper(runtime, this, srcStr, replStr, sflag);

if (c != -1) {
if (save == c) {
if (cr == CR_7BIT && !Encoding.isAscii(c)) cr = CR_VALID;
continue;
}
save = c;
tlen = codeLength(enc, c);
modify = true;
} else {
save = -1;
c = c0;
if (enc != e1) mayModify = true;
}

while (t + tlen >= max) {
max *= 2;
buf = Arrays.copyOf(buf, max);
}
enc.codeToMbc(c, buf, t);
// MRI does not check s < send again because their null terminator can still be compared
if (mayModify && (s >= send || ByteList.memcmp(sbytes, s, buf, t, tlen) != 0)) modify = true;
if (cr == CR_7BIT && !Encoding.isAscii(c)) cr = CR_VALID;
t += tlen;
}
value.setUnsafeBytes(buf);
value.setRealSize(t);
} else if (enc.isSingleByte() || (singlebyte && hash == null)) {
while (s < send) {
c = sbytes[s] & 0xff;
if (trans[c] != -1) {
if (!cflag) {
c = trans[c];
sbytes[s] = (byte)c;
} else {
sbytes[s] = (byte)last;
}
modify = true;
}
if (cr == CR_7BIT && !Encoding.isAscii(c)) cr = CR_VALID;
s++;
}
} else {
int clen, tlen, max = (int)(value.realSize() * 1.2);
byte[] buf = new byte[max];
int t = 0;

while (s < send) {
boolean mayModify = false;
c0 = c = codePoint(runtime, e1, sbytes, s, send);
clen = codeLength(e1, c);
tlen = enc == e1 ? clen : codeLength(enc, c);

if (c < TRANS_SIZE) {
c = trans[c];
} else if (hash != null) {
Integer tmp = hash.get(c);
if (tmp == null) {
if (cflag) {
c = last;
} else {
c = -1;
}
} else if (cflag) {
c = -1;
} else {
c = tmp;
}
}
else {
c = cflag ? last : -1;
}
if (c != -1) {
tlen = codeLength(enc, c);
modify = true;
} else {
c = c0;
if (enc != e1) mayModify = true;
}
while (t + tlen >= max) {
max <<= 1;
buf = Arrays.copyOf(buf, max);
}
// headius: I don't see how s and t could ever be the same, since they refer to different buffers
// if (s != t) {
enc.codeToMbc(c, buf, t);
if (mayModify && ByteList.memcmp(sbytes, s, buf, t, tlen) != 0) {
modify = true;
}
// }

if (cr == CR_7BIT && !Encoding.isAscii(c)) cr = CR_VALID;
s += clen;
t += tlen;
}
value.setUnsafeBytes(buf);
value.setRealSize(t);
}

if (modify) {
if (cr != CR_BROKEN) setCodeRange(cr);
associateEncoding(enc);
return this;
if (ret == null) {
return runtime.getNil();
}
return runtime.getNil();
}

private int trCode(int c, int[]trans, IntHash<Integer> hash, boolean cflag, int last, boolean set) {
if (c < StringSupport.TRANS_SIZE) {
return trans[c];
} else if (hash != null) {
Integer tmp = hash.get(c);
if (tmp == null) {
return cflag ? last : -1;
} else {
return cflag ? -1 : tmp;
}
} else {
return cflag && set ? last : -1;
}
return (IRubyObject) ret;
}

/** rb_str_tr_s / rb_str_tr_s_bang
1 change: 1 addition & 0 deletions core/src/main/java/org/jruby/util/CodeRangeable.java
Original file line number Diff line number Diff line change
@@ -35,5 +35,6 @@ public interface CodeRangeable extends ByteListHolder {
public void setCodeRange(int codeRange);
public void clearCodeRange();
public void keepCodeRange();
public void modifyAndKeepCodeRange();
public Encoding checkEncoding(CodeRangeable other);
}
233 changes: 233 additions & 0 deletions core/src/main/java/org/jruby/util/StringSupport.java
Original file line number Diff line number Diff line change
@@ -1720,6 +1720,239 @@ else if (enc == UTF8Encoding.INSTANCE){
}
}

/**
* rb_str_tr / rb_str_tr_bang
*/

public static CodeRangeable trTransHelper(Ruby runtime, CodeRangeable self, CodeRangeable srcStr, CodeRangeable replStr, boolean sflag) {
// This method does not handle the cases where either srcStr or replStr are empty. It is the responsibility
// of the caller to take the appropriate action in those cases.

final ByteList srcList = srcStr.getByteList();
final ByteList replList = replStr.getByteList();

int cr = self.getCodeRange();
Encoding e1 = self.checkEncoding(srcStr);
Encoding e2 = self.checkEncoding(replStr);
Encoding enc = e1 == e2 ? e1 : srcStr.checkEncoding(replStr);

final StringSupport.TR trSrc = new StringSupport.TR(srcList);
boolean cflag = false;
int[] l = {0};

if (self.getByteList().getRealSize() > 1 &&
EncodingUtils.encAscget(trSrc.buf, trSrc.p, trSrc.pend, l, enc) == '^' &&
trSrc.p + 1 < trSrc.pend){
cflag = true;
trSrc.p++;
}

int c, c0, last = 0;
final int[]trans = new int[StringSupport.TRANS_SIZE];
final StringSupport.TR trRepl = new StringSupport.TR(replList);
boolean modify = false;
IntHash<Integer> hash = null;
boolean singlebyte = StringSupport.isSingleByteOptimizable(self, EncodingUtils.STR_ENC_GET(self));

if (cflag) {
for (int i=0; i< StringSupport.TRANS_SIZE; i++) {
trans[i] = 1;
}

while ((c = StringSupport.trNext(trSrc, runtime, enc)) != -1) {
if (c < StringSupport.TRANS_SIZE) {
trans[c] = -1;
} else {
if (hash == null) hash = new IntHash<Integer>();
hash.put(c, 1); // QTRUE
}
}
while ((c = StringSupport.trNext(trRepl, runtime, enc)) != -1) {} /* retrieve last replacer */
last = trRepl.now;
for (int i=0; i< StringSupport.TRANS_SIZE; i++) {
if (trans[i] != -1) {
trans[i] = last;
}
}
} else {
for (int i=0; i< StringSupport.TRANS_SIZE; i++) {
trans[i] = -1;
}

while ((c = StringSupport.trNext(trSrc, runtime, enc)) != -1) {
int r = StringSupport.trNext(trRepl, runtime, enc);
if (r == -1) r = trRepl.now;
if (c < StringSupport.TRANS_SIZE) {
trans[c] = r;
if (codeLength(enc, r) != 1) singlebyte = false;
} else {
if (hash == null) hash = new IntHash<Integer>();
hash.put(c, r);
}
}
}

if (cr == CR_VALID) {
cr = CR_7BIT;
}
self.modifyAndKeepCodeRange();
int s = self.getByteList().getBegin();
int send = s + self.getByteList().getRealSize();
byte sbytes[] = self.getByteList().getUnsafeBytes();

if (sflag) {
int clen, tlen;
int max = self.getByteList().getRealSize();
int save = -1;
byte[] buf = new byte[max];
int t = 0;
while (s < send) {
boolean mayModify = false;
c0 = c = codePoint(runtime, e1, sbytes, s, send);
clen = codeLength(e1, c);
tlen = enc == e1 ? clen : codeLength(enc, c);
s += clen;

if (c < TRANS_SIZE) {
c = trCode(c, trans, hash, cflag, last, false);
} else if (hash != null) {
Integer tmp = hash.get(c);
if (tmp == null) {
if (cflag) {
c = last;
} else {
c = -1;
}
} else if (cflag) {
c = -1;
} else {
c = tmp;
}
} else {
c = -1;
}

if (c != -1) {
if (save == c) {
if (cr == CR_7BIT && !Encoding.isAscii(c)) cr = CR_VALID;
continue;
}
save = c;
tlen = codeLength(enc, c);
modify = true;
} else {
save = -1;
c = c0;
if (enc != e1) mayModify = true;
}

while (t + tlen >= max) {
max *= 2;
buf = Arrays.copyOf(buf, max);
}
enc.codeToMbc(c, buf, t);
// MRI does not check s < send again because their null terminator can still be compared
if (mayModify && (s >= send || ByteList.memcmp(sbytes, s, buf, t, tlen) != 0)) modify = true;
if (cr == CR_7BIT && !Encoding.isAscii(c)) cr = CR_VALID;
t += tlen;
}
self.getByteList().setUnsafeBytes(buf);
self.getByteList().setRealSize(t);
} else if (enc.isSingleByte() || (singlebyte && hash == null)) {
while (s < send) {
c = sbytes[s] & 0xff;
if (trans[c] != -1) {
if (!cflag) {
c = trans[c];
sbytes[s] = (byte)c;
} else {
sbytes[s] = (byte)last;
}
modify = true;
}
if (cr == CR_7BIT && !Encoding.isAscii(c)) cr = CR_VALID;
s++;
}
} else {
int clen, tlen, max = (int)(self.getByteList().realSize() * 1.2);
byte[] buf = new byte[max];
int t = 0;

while (s < send) {
boolean mayModify = false;
c0 = c = codePoint(runtime, e1, sbytes, s, send);
clen = codeLength(e1, c);
tlen = enc == e1 ? clen : codeLength(enc, c);

if (c < TRANS_SIZE) {
c = trans[c];
} else if (hash != null) {
Integer tmp = hash.get(c);
if (tmp == null) {
if (cflag) {
c = last;
} else {
c = -1;
}
} else if (cflag) {
c = -1;
} else {
c = tmp;
}
}
else {
c = cflag ? last : -1;
}
if (c != -1) {
tlen = codeLength(enc, c);
modify = true;
} else {
c = c0;
if (enc != e1) mayModify = true;
}
while (t + tlen >= max) {
max <<= 1;
buf = Arrays.copyOf(buf, max);
}
// headius: I don't see how s and t could ever be the same, since they refer to different buffers
// if (s != t) {
enc.codeToMbc(c, buf, t);
if (mayModify && ByteList.memcmp(sbytes, s, buf, t, tlen) != 0) {
modify = true;
}
// }

if (cr == CR_7BIT && !Encoding.isAscii(c)) cr = CR_VALID;
s += clen;
t += tlen;
}
self.getByteList().setUnsafeBytes(buf);
self.getByteList().setRealSize(t);
}

if (modify) {
if (cr != CR_BROKEN) self.setCodeRange(cr);
StringSupport.associateEncoding(self, enc);
return self;
}
return null;
}

private static int trCode(int c, int[]trans, IntHash<Integer> hash, boolean cflag, int last, boolean set) {
if (c < StringSupport.TRANS_SIZE) {
return trans[c];
} else if (hash != null) {
Integer tmp = hash.get(c);
if (tmp == null) {
return cflag ? last : -1;
} else {
return cflag ? -1 : tmp;
}
} else {
return cflag && set ? last : -1;
}
}

private static int rb_memsearch_ss(byte[] xsBytes, int xs, int m, byte[] ysBytes, int ys, int n) {
int y;

4 changes: 2 additions & 2 deletions core/src/main/java/org/jruby/util/io/EncodingUtils.java
Original file line number Diff line number Diff line change
@@ -1863,7 +1863,7 @@ public static Encoding getActualEncoding(Encoding enc, ByteList byteList) {
return enc;
}

public static Encoding STR_ENC_GET(RubyString str) {
public static Encoding STR_ENC_GET(ByteListHolder str) {
return getEncoding(str.getByteList());
}
}
}
17 changes: 0 additions & 17 deletions spec/truffle/tags/core/string/tr_tags.txt

This file was deleted.

Original file line number Diff line number Diff line change
@@ -65,6 +65,7 @@
import org.jruby.truffle.runtime.rubinius.RubiniusByteArray;
import org.jruby.util.ByteList;
import org.jruby.util.CodeRangeSupport;
import org.jruby.util.CodeRangeable;
import org.jruby.util.Pack;
import org.jruby.util.StringSupport;
import org.jruby.util.io.EncodingUtils;
@@ -862,7 +863,7 @@ public DeleteBangNode(DeleteBangNode prev) {
}

@Specialization
public Object deleteBang(VirtualFrame frame, RubyString string, Object[] otherStrings) {
public Object deleteBang(VirtualFrame frame, RubyString string, Object... otherStrings) {
if (string.getBytes().length() == 0) {
return nil();
}
@@ -876,7 +877,7 @@ public Object deleteBang(VirtualFrame frame, RubyString string, Object[] otherSt
}

@CompilerDirectives.TruffleBoundary
private Object deleteBangSlow(VirtualFrame frame, RubyString string, Object[] args) {
private Object deleteBangSlow(VirtualFrame frame, RubyString string, Object... args) {
RubyString[] otherStrings = new RubyString[args.length];

for (int i = 0; i < args.length; i++) {
@@ -2019,6 +2020,58 @@ public static boolean reverseIsEqualToSelf(RubyString string) {
}
}

@CoreMethod(names = "tr!", required = 2, raiseIfFrozenSelf = true)
@NodeChildren({
@NodeChild(value = "self"),
@NodeChild(value = "fromStr"),
@NodeChild(value = "toStr")
})
public abstract static class TrNode extends RubyNode {

@Child private DeleteBangNode deleteBangNode;

public TrNode(RubyContext context, SourceSection sourceSection) {
super(context, sourceSection);
}

public TrNode(TrNode prev) {
super(prev);
deleteBangNode = prev.deleteBangNode;
}

@CreateCast("fromStr") public RubyNode coerceFromStrToString(RubyNode fromStr) {
return ToStrNodeFactory.create(getContext(), getSourceSection(), fromStr);
}

@CreateCast("toStr") public RubyNode coerceToStrToString(RubyNode toStr) {
return ToStrNodeFactory.create(getContext(), getSourceSection(), toStr);
}

@Specialization
public Object tr(VirtualFrame frame, RubyString self, RubyString fromStr, RubyString toStr) {
if (self.getByteList().getRealSize() == 0) {
return nil();
}

if (toStr.getByteList().getRealSize() == 0) {
if (deleteBangNode == null) {
CompilerDirectives.transferToInterpreter();
deleteBangNode = insert(StringNodesFactory.DeleteBangNodeFactory.create(getContext(), getSourceSection(), new RubyNode[] {}));
}

return deleteBangNode.deleteBang(frame, self, fromStr);
}

final CodeRangeable ret = StringSupport.trTransHelper(getContext().getRuntime(), self, fromStr, toStr, false);

if (ret == null) {
return nil();
}

return ret;
}
}

@CoreMethod(names = "unpack", required = 1)
public abstract static class UnpackNode extends ArrayCoreMethodNode {

Original file line number Diff line number Diff line change
@@ -150,6 +150,12 @@ public final void modify(int length) {
bytes.invalidate();
}

@Override
public final void modifyAndKeepCodeRange() {
modify();
keepCodeRange();
}

@Override
@TruffleBoundary
public Encoding checkEncoding(CodeRangeable other) {
Original file line number Diff line number Diff line change
@@ -137,6 +137,12 @@ public final void modify(int length) {
throw new UnsupportedOperationException();
}

@Override
public final void modifyAndKeepCodeRange() {
modify();
keepCodeRange();
}

@Override
public Encoding checkEncoding(CodeRangeable other) {
// TODO (nirvdrum Jan. 13, 2015): This should check if the encodings are compatible rather than just always succeeding.