Skip to content

Commit

Permalink
2.5 String#grapheme_clusters and String#each_grapheme_cluster
Browse files Browse the repository at this point in the history
lopex committed Mar 27, 2018

Verified

This commit was signed with the committer’s verified signature.
headius Charles Oliver Nutter
1 parent dc63be1 commit 25df05d
Showing 1 changed file with 85 additions and 0 deletions.
85 changes: 85 additions & 0 deletions core/src/main/java/org/jruby/RubyString.java
Original file line number Diff line number Diff line change
@@ -5309,6 +5309,91 @@ public IRubyObject size(IRubyObject[] args) {
};
}

private static ByteList GRAPHEME_CLUSTER_PATTERN = new ByteList(new byte[] {(byte)'\\', (byte)'X'});

private SizeFn eachGraphemeClusterSizeFn() {
final RubyString self = this;
return new SizeFn() {
@Override
public IRubyObject size(IRubyObject[] args) {
Ruby runtime = self.getRuntime();
ByteList value = self.getByteList();
Encoding enc = value.getEncoding();
if (!enc.isUnicode() || isSingleByteOptimizable(self, enc)) return self.length();

Regex reg = RubyRegexp.getRegexpFromCache(runtime, GRAPHEME_CLUSTER_PATTERN, enc, RegexpOptions.NULL_OPTIONS);
int beg = value.getBegin();
int end = beg + value.getRealSize();
Matcher matcher = reg.matcher(value.getUnsafeBytes(), beg, end);
int count = 0;

while (beg < end) {
int len = matcher.match(beg, end, Option.DEFAULT);
if (len <= 0) break;
count++;
beg += len;
}
return RubyFixnum.newFixnum(runtime, count);
}
};
}

private IRubyObject enumerateGraphemeClusters(ThreadContext context, String name, Block block, boolean wantarray) {
RubyString str = this;
RubyArray ary = null;
Ruby runtime = context.getRuntime();
Encoding enc = value.getEncoding();
if (!enc.isUnicode() || isSingleByteOptimizable(str, enc)) return enumerateChars(context, name, block, wantarray);

if (block.isGiven()) {
if (wantarray) {
// this code should be live in 3.0
if (false) {
runtime.getWarnings().warn("given block not used");
ary = RubyArray.newArray(runtime);
} else {
runtime.getWarnings().warning("passing a block to String#grapheme_clusters is deprecated");
wantarray = false;
}
}
} else {
if (wantarray)
ary = RubyArray.newBlankArray(runtime, str.size());
else
return enumeratorizeWithSize(context, str, name, eachGraphemeClusterSizeFn());
}

Regex reg = RubyRegexp.getRegexpFromCache(runtime, GRAPHEME_CLUSTER_PATTERN, enc, RegexpOptions.NULL_OPTIONS);

int beg = value.getBegin();
int end = beg + value.getRealSize();
byte[]bytes = value.getUnsafeBytes();
Matcher matcher = reg.matcher(bytes, beg, end);

while (beg < end) {
int len = matcher.match(beg, end, Option.DEFAULT);
if (len <= 0) break;
RubyString result = newStringShared(runtime, bytes, beg, len, enc);
if (wantarray)
ary.push(result);
else
block.yield(context, result);
beg += len;
}

return wantarray ? ary : str;
}

@JRubyMethod
public IRubyObject grapheme_clusters(ThreadContext context, Block block) {
return enumerateGraphemeClusters(context, "grapheme_clusters", block, true);
}

@JRubyMethod
public IRubyObject each_grapheme_cluster(ThreadContext context, Block block) {
return enumerateGraphemeClusters(context, "each_grapheme_cluster", block, false);
}

/** rb_str_intern
*
*/

0 comments on commit 25df05d

Please sign in to comment.