Skip to content

Commit

Permalink
[Truffle] Fixed an infinite loop in RubyRegexp#gsub and got some more…
Browse files Browse the repository at this point in the history
… specs passing.
  • Loading branch information
nirvdrum committed Dec 24, 2014
1 parent 4e40732 commit a5f607a
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 22 deletions.
Expand Up @@ -611,7 +611,7 @@ public RubyString gsub(VirtualFrame frame, RubyString string, RubyString regexpS
public RubyString gsub(RubyString string, RubyRegexp regexp, RubyString replacement) {
notDesignedForCompilation();

return regexp.gsub(string.toString(), replacement.toString());
return regexp.gsub(string, replacement.toString());
}
}

Expand Down
37 changes: 27 additions & 10 deletions core/src/main/java/org/jruby/truffle/runtime/core/RubyRegexp.java
Expand Up @@ -260,28 +260,42 @@ private void setThread(String name, Object value) {
}

@CompilerDirectives.TruffleBoundary
public RubyString gsub(String string, String replacement) {
public RubyString gsub(RubyString string, String replacement) {
final RubyContext context = getContext();

final byte[] stringBytes = string.getBytes(StandardCharsets.UTF_8);
final byte[] stringBytes = string.getBytes().bytes();
final Encoding encoding = string.getBytes().getEncoding();
final Matcher matcher = regex.matcher(stringBytes);

final StringBuilder builder = new StringBuilder();
int p = string.getBytes().getBegin();
int end = 0;
int range = p + string.getBytes().getRealSize();
int lastMatchEnd = 0;

int p = 0;
final StringBuilder builder = new StringBuilder();

while (true) {
final int match = matcher.search(p, stringBytes.length, Option.DEFAULT);
Object matchData = matchCommon(string.getBytes(), false, false, matcher, p + end, range);

if (matchData == context.getCoreLibrary().getNilObject()) {
builder.append(StandardCharsets.UTF_8.decode(ByteBuffer.wrap(stringBytes, lastMatchEnd, range - lastMatchEnd)));

if (match == -1) {
builder.append(StandardCharsets.UTF_8.decode(ByteBuffer.wrap(stringBytes, p, stringBytes.length - p)));
break;
} else {
builder.append(StandardCharsets.UTF_8.decode(ByteBuffer.wrap(stringBytes, p, matcher.getBegin() - p)));
}

Region region = matcher.getEagerRegion();

for (int i = 0; i < region.numRegs; i++) {
int regionStart = region.beg[i];
int regionEnd = region.end[i];

builder.append(StandardCharsets.UTF_8.decode(ByteBuffer.wrap(stringBytes, lastMatchEnd, regionStart - lastMatchEnd)));
builder.append(StandardCharsets.UTF_8.decode(ByteBuffer.wrap(replacement.getBytes(StandardCharsets.UTF_8))));

lastMatchEnd = regionEnd;
}

p = matcher.getEnd();
end = StringSupport.positionEndForScan(string.getBytes(), matcher, encoding, p, range);
}

return context.makeString(builder.toString());
Expand Down Expand Up @@ -382,6 +396,9 @@ public Object scan(RubyString string) {
final List<RubyString> parts = new ArrayList<>();

Object[] values = md.getValues();

// The first element is the entire matched string, so skip over it because we're only interested in
// the constituent matched parts.
for (int i = 1; i < values.length; i++) {
parts.add((RubyString) values[i]);
}
Expand Down
11 changes: 0 additions & 11 deletions spec/truffle/tags/core/string/gsub_tags.txt
@@ -1,11 +1,3 @@
fails:String#gsub with pattern and replacement inserts the replacement around every character when the pattern collapses
fails:String#gsub with pattern and replacement respects $KCODE when the pattern collapses
fails:String#gsub with pattern and replacement doesn't freak out when replacing ^
fails:String#gsub with pattern and replacement returns a copy of self with all occurrences of pattern replaced with replacement
fails:String#gsub with pattern and replacement ignores a block if supplied
fails:String#gsub with pattern and replacement supports \G which matches at the beginning of the remaining (non-matched) string
fails:String#gsub with pattern and replacement supports /i for ignoring case
fails:String#gsub with pattern and replacement doesn't interpret regexp metacharacters if pattern is a string
fails:String#gsub with pattern and replacement replaces \1 sequences with the regexp's corresponding capture
fails:String#gsub with pattern and replacement treats \1 sequences without corresponding captures as empty strings
fails:String#gsub with pattern and replacement replaces \& and \0 with the complete match
Expand All @@ -14,11 +6,8 @@ fails:String#gsub with pattern and replacement replaces \' with everything after
fails:String#gsub with pattern and replacement replaces \+ with the last paren that actually matched
fails:String#gsub with pattern and replacement treats \+ as an empty string if there was no captures
fails:String#gsub with pattern and replacement maps \\ in replacement to \
fails:String#gsub with pattern and replacement leaves unknown \x escapes in replacement untouched
fails:String#gsub with pattern and replacement leaves \ at the end of replacement untouched
fails:String#gsub with pattern and replacement replaces \k named backreferences with the regexp's corresponding capture
fails:String#gsub with pattern and replacement taints the result if the original string or replacement is tainted
fails:String#gsub with pattern and replacement handles pattern collapse without $KCODE
fails:String#gsub with pattern and replacement untrusts the result if the original string or replacement is untrusted
fails:String#gsub with pattern and replacement tries to convert pattern to a string using to_str
fails:String#gsub with pattern and replacement raises a TypeError when pattern can't be converted to a string
Expand Down

0 comments on commit a5f607a

Please sign in to comment.