Skip to content

Commit 22d9d36

Browse files
committedApr 8, 2018
String/Symbol#upcase/downcase/capitalize/swapcase for general case encodings
1 parent bc2a772 commit 22d9d36

File tree

3 files changed

+222
-266
lines changed

3 files changed

+222
-266
lines changed
 

Diff for: ‎core/src/main/java/org/jruby/RubyString.java

+116-145
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,10 @@
4040
package org.jruby;
4141

4242
import jnr.posix.POSIX;
43+
44+
import org.jcodings.Config;
4345
import org.jcodings.Encoding;
46+
import org.jcodings.IntHolder;
4447
import org.jcodings.exception.EncodingException;
4548
import org.jcodings.specific.ASCIIEncoding;
4649
import org.jcodings.specific.USASCIIEncoding;
@@ -1632,8 +1635,8 @@ public IRubyObject casecmp_p(ThreadContext context, IRubyObject other) {
16321635
Encoding enc = StringSupport.areCompatible(this, otherStr);
16331636
if (enc == null) return context.nil;
16341637

1635-
RubyString downcasedString = this.downcase(context);
1636-
RubyString otherDowncasedString = otherStr.downcase(context);
1638+
RubyString downcasedString = this.downcase(context, RubyObject.NULL_ARRAY);
1639+
RubyString otherDowncasedString = otherStr.downcase(context, RubyObject.NULL_ARRAY);
16371640
return downcasedString.equals(otherDowncasedString) ? context.runtime.getTrue() : context.runtime.getFalse();
16381641
}
16391642

@@ -1703,60 +1706,6 @@ public IRubyObject match_p(ThreadContext context, IRubyObject pattern, IRubyObje
17031706
return result;
17041707
}
17051708

1706-
/** rb_str_capitalize / rb_str_capitalize_bang
1707-
*
1708-
*/
1709-
public IRubyObject capitalize(ThreadContext context) {
1710-
return capitalize19(context);
1711-
}
1712-
1713-
public IRubyObject capitalize_bang(ThreadContext context) {
1714-
return capitalize_bang19(context);
1715-
}
1716-
1717-
@JRubyMethod(name = "capitalize")
1718-
public IRubyObject capitalize19(ThreadContext context) {
1719-
RubyString str = strDup(context.runtime);
1720-
str.capitalize_bang19(context);
1721-
return str;
1722-
}
1723-
1724-
@JRubyMethod(name = "capitalize!")
1725-
public IRubyObject capitalize_bang19(ThreadContext context) {
1726-
Ruby runtime = context.runtime;
1727-
Encoding enc = checkDummyEncoding();
1728-
1729-
if (value.getRealSize() == 0) {
1730-
modifyCheck();
1731-
return runtime.getNil();
1732-
}
1733-
1734-
modifyAndKeepCodeRange();
1735-
1736-
int s = value.getBegin();
1737-
int end = s + value.getRealSize();
1738-
byte[]bytes = value.getUnsafeBytes();
1739-
boolean modify = false;
1740-
1741-
int c = codePoint(runtime, enc, bytes, s, end);
1742-
if (enc.isLower(c)) {
1743-
enc.codeToMbc(toUpper(enc, c), bytes, s);
1744-
modify = true;
1745-
}
1746-
1747-
s += codeLength(enc, c);
1748-
while (s < end) {
1749-
c = codePoint(runtime, enc, bytes, s, end);
1750-
if (enc.isUpper(c)) {
1751-
enc.codeToMbc(toLower(enc, c), bytes, s);
1752-
modify = true;
1753-
}
1754-
s += codeLength(enc, c);
1755-
}
1756-
1757-
return modify ? this : runtime.getNil();
1758-
}
1759-
17601709
public IRubyObject op_ge(ThreadContext context, IRubyObject other) {
17611710
return op_ge19(context, other);
17621711
}
@@ -1828,162 +1777,184 @@ public IRubyObject str_eql_p19(ThreadContext context, IRubyObject other) {
18281777
*/
18291778
@Deprecated
18301779
public RubyString upcase(ThreadContext context) {
1831-
return upcase19(context);
1780+
return upcase19(context, RubyObject.NULL_ARRAY);
18321781
}
18331782

18341783
@Deprecated
18351784
public IRubyObject upcase_bang(ThreadContext context) {
1836-
return upcase_bang19(context);
1785+
return upcase_bang19(context, RubyObject.NULL_ARRAY);
18371786
}
18381787

1839-
@JRubyMethod(name = "upcase")
1840-
public RubyString upcase19(ThreadContext context) {
1788+
@JRubyMethod(name = "upcase", rest = true)
1789+
public RubyString upcase19(ThreadContext context, IRubyObject[] args) {
18411790
RubyString str = strDup(context.runtime);
1842-
str.upcase_bang19(context);
1791+
str.upcase_bang19(context, args);
18431792
return str;
18441793
}
18451794

1846-
@JRubyMethod(name = "upcase!")
1847-
public IRubyObject upcase_bang19(ThreadContext context) {
1795+
@JRubyMethod(name = "upcase!", rest = true)
1796+
public IRubyObject upcase_bang19(ThreadContext context, IRubyObject[] args) {
18481797
Ruby runtime = context.runtime;
1849-
Encoding enc = checkDummyEncoding();
1850-
1851-
if (value.getRealSize() == 0) {
1852-
modifyCheck();
1853-
return runtime.getNil();
1854-
}
1855-
1798+
int flags = StringSupport.checkCaseOptions(runtime, args, Config.CASE_UPCASE);
18561799
modifyAndKeepCodeRange();
1800+
Encoding enc = checkDummyEncoding();
18571801

1858-
int s = value.getBegin();
1859-
int end = s + value.getRealSize();
1860-
byte[]bytes = value.getUnsafeBytes();
1861-
1862-
if (singleByteOptimizable(enc)) {
1863-
return singleByteUpcase(runtime, bytes, s, end);
1802+
if (((flags & Config.CASE_ASCII_ONLY) != 0 && (enc.isUTF8() || enc.maxLength() == 1)) ||
1803+
(flags & Config.CASE_FOLD_TURKISH_AZERI) == 0 && getCodeRange() == CR_7BIT) {
1804+
int s = value.getBegin();
1805+
int end = s + value.getRealSize();
1806+
byte[]bytes = value.getUnsafeBytes();
1807+
while (s < end) {
1808+
int c = bytes[s] & 0xff;
1809+
if (Encoding.isAscii(c) && 'a' <= c && c <= 'z') {
1810+
bytes[s] = (byte)('A' + (c - 'a'));
1811+
flags |= Config.CASE_MODIFIED;
1812+
}
1813+
s++;
1814+
}
1815+
} else if ((flags & Config.CASE_ASCII_ONLY) != 0) {
1816+
flags = StringSupport.asciiOnlyCaseMap(runtime, this, flags, enc);
18641817
} else {
1865-
return multiByteUpcase(runtime, enc, bytes, s, end);
1818+
IntHolder flagsP = new IntHolder();
1819+
flagsP.value = flags;
1820+
value = StringSupport.caseMap(runtime, value, flagsP);
1821+
flags = flagsP.value;
18661822
}
1867-
}
1868-
1869-
private IRubyObject singleByteUpcase(Ruby runtime, byte[]bytes, int s, int end) {
1870-
boolean modify = StringSupport.singleByteUpcase(bytes, s, end);
1871-
1872-
return modify ? this : runtime.getNil();
1873-
}
18741823

1875-
private IRubyObject multiByteUpcase(Ruby runtime, Encoding enc, byte[]bytes, int s, int end) {
1876-
try {
1877-
boolean modify = StringSupport.multiByteUpcase(enc, bytes, s, end);
1878-
1879-
return modify ? this : runtime.getNil();
1880-
} catch (IllegalArgumentException e) {
1881-
throw runtime.newArgumentError(e.getMessage());
1882-
}
1824+
return ((flags & Config.CASE_MODIFIED) != 0) ? this : context.nil;
18831825
}
18841826

18851827
@Deprecated
18861828
public RubyString downcase19(ThreadContext context) {
1887-
return downcase(context);
1829+
return downcase(context, RubyObject.NULL_ARRAY);
18881830
}
18891831

18901832
@Deprecated
18911833
public IRubyObject downcase_bang19(ThreadContext context) {
1892-
return downcase_bang(context);
1834+
return downcase_bang(context, RubyObject.NULL_ARRAY);
18931835
}
18941836

18951837
/** rb_str_downcase / rb_str_downcase_bang
18961838
*
18971839
*/
18981840

1899-
@JRubyMethod(name = "downcase")
1900-
public RubyString downcase(ThreadContext context) {
1841+
@JRubyMethod(name = "downcase", rest = true)
1842+
public RubyString downcase(ThreadContext context, IRubyObject[] args) {
19011843
RubyString str = strDup(context.runtime);
1902-
str.downcase_bang(context);
1844+
str.downcase_bang(context, args);
19031845
return str;
19041846
}
19051847

1906-
@JRubyMethod(name = "downcase!")
1907-
public IRubyObject downcase_bang(ThreadContext context) {
1848+
@JRubyMethod(name = "downcase!", rest = true)
1849+
public IRubyObject downcase_bang(ThreadContext context, IRubyObject[] args) {
1850+
Ruby runtime = context.runtime;
1851+
int flags = StringSupport.checkCaseOptions(runtime, args, Config.CASE_DOWNCASE);
1852+
modifyAndKeepCodeRange();
19081853
Encoding enc = checkDummyEncoding();
19091854

1910-
if (value.getRealSize() == 0) {
1911-
modifyCheck();
1912-
return context.nil;
1855+
if (((flags & Config.CASE_ASCII_ONLY) != 0 && (enc.isUTF8() || enc.maxLength() == 1)) ||
1856+
(flags & Config.CASE_FOLD_TURKISH_AZERI) == 0 && getCodeRange() == CR_7BIT) {
1857+
int s = value.getBegin();
1858+
int end = s + value.getRealSize();
1859+
byte[]bytes = value.getUnsafeBytes();
1860+
while (s < end) {
1861+
int c = bytes[s] & 0xff;
1862+
if (Encoding.isAscii(c) && 'A' <= c && c <= 'Z') {
1863+
bytes[s] = (byte)('a' + (c - 'A'));
1864+
flags |= Config.CASE_MODIFIED;
1865+
}
1866+
s++;
1867+
}
1868+
} else if ((flags & Config.CASE_ASCII_ONLY) != 0) {
1869+
flags = StringSupport.asciiOnlyCaseMap(runtime, this, flags, enc);
1870+
} else {
1871+
IntHolder flagsP = new IntHolder();
1872+
flagsP.value = flags;
1873+
value = StringSupport.caseMap(runtime, value, flagsP);
1874+
flags = flagsP.value;
19131875
}
19141876

1915-
modifyAndKeepCodeRange();
1916-
1917-
int s = value.getBegin();
1918-
int end = s + value.getRealSize();
1919-
byte[] bytes = value.getUnsafeBytes();
1877+
return ((flags & Config.CASE_MODIFIED) != 0) ? this : context.nil;
1878+
}
19201879

1921-
if (singleByteOptimizable(enc)) {
1922-
return singleByteDowncase(context, bytes, s, end);
1923-
}
1924-
return multiByteDowncase(context, enc, bytes, s, end);
1880+
/** rb_str_swapcase / rb_str_swapcase_bang
1881+
*
1882+
*/
1883+
public RubyString swapcase(ThreadContext context) {
1884+
return swapcase19(context, RubyObject.NULL_ARRAY);
19251885
}
19261886

1927-
private IRubyObject singleByteDowncase(ThreadContext context, byte[] bytes, int s, int end) {
1928-
boolean modify = StringSupport.singleByteDowncase(bytes, s, end);
1887+
public IRubyObject swapcase_bang(ThreadContext context) {
1888+
return swapcase_bang19(context, RubyObject.NULL_ARRAY);
1889+
}
19291890

1930-
return modify ? this : context.nil;
1891+
@JRubyMethod(name = "swapcase", rest = true)
1892+
public RubyString swapcase19(ThreadContext context, IRubyObject[] args) {
1893+
RubyString str = strDup(context.runtime);
1894+
str.swapcase_bang19(context, args);
1895+
return str;
19311896
}
19321897

1933-
private IRubyObject multiByteDowncase(ThreadContext context, Encoding enc, byte[] bytes, int s, int end) {
1934-
try {
1935-
boolean modify = StringSupport.multiByteDowncase(enc, bytes, s, end);
1898+
@JRubyMethod(name = "swapcase!", rest = true)
1899+
public IRubyObject swapcase_bang19(ThreadContext context, IRubyObject[] args) {
1900+
Ruby runtime = context.runtime;
1901+
int flags = StringSupport.checkCaseOptions(runtime, args, Config.CASE_UPCASE | Config.CASE_DOWNCASE);
1902+
modifyAndKeepCodeRange();
1903+
Encoding enc = checkDummyEncoding();
19361904

1937-
return modify ? this : context.nil;
1938-
} catch (IllegalArgumentException e) {
1939-
throw context.runtime.newArgumentError(e.getMessage());
1905+
if ((flags & Config.CASE_ASCII_ONLY) != 0) {
1906+
StringSupport.asciiOnlyCaseMap(runtime, this, flags, enc);
1907+
} else {
1908+
IntHolder flagsP = new IntHolder();
1909+
flagsP.value = flags;
1910+
value = StringSupport.caseMap(runtime, value, flagsP);
1911+
flags = flagsP.value;
19401912
}
1941-
}
19421913

1914+
return ((flags & Config.CASE_MODIFIED) != 0) ? this : context.nil;
1915+
}
19431916

1944-
/** rb_str_swapcase / rb_str_swapcase_bang
1945-
*
1946-
*/
1947-
public RubyString swapcase(ThreadContext context) {
1948-
return swapcase19(context);
1917+
/** rb_str_capitalize / rb_str_capitalize_bang
1918+
*
1919+
*/
1920+
public IRubyObject capitalize(ThreadContext context) {
1921+
return capitalize19(context, RubyObject.NULL_ARRAY);
19491922
}
19501923

1951-
public IRubyObject swapcase_bang(ThreadContext context) {
1952-
return swapcase_bang19(context);
1924+
public IRubyObject capitalize_bang(ThreadContext context) {
1925+
return capitalize_bang19(context, RubyObject.NULL_ARRAY);
19531926
}
19541927

1955-
@JRubyMethod(name = "swapcase")
1956-
public RubyString swapcase19(ThreadContext context) {
1928+
@JRubyMethod(name = "capitalize", rest = true)
1929+
public IRubyObject capitalize19(ThreadContext context, IRubyObject[] args) {
19571930
RubyString str = strDup(context.runtime);
1958-
str.swapcase_bang19(context);
1931+
str.capitalize_bang19(context, args);
19591932
return str;
19601933
}
19611934

1962-
@JRubyMethod(name = "swapcase!")
1963-
public IRubyObject swapcase_bang19(ThreadContext context) {
1935+
@JRubyMethod(name = "capitalize!", rest = true)
1936+
public IRubyObject capitalize_bang19(ThreadContext context, IRubyObject[] args) {
19641937
Ruby runtime = context.runtime;
1938+
int flags = StringSupport.checkCaseOptions(runtime, args, Config.CASE_UPCASE | Config.CASE_TITLECASE);
19651939
Encoding enc = checkDummyEncoding();
1940+
19661941
if (value.getRealSize() == 0) {
19671942
modifyCheck();
19681943
return runtime.getNil();
19691944
}
1970-
modifyAndKeepCodeRange();
19711945

1972-
int s = value.getBegin();
1973-
int end = s + value.getRealSize();
1974-
byte[]bytes = value.getUnsafeBytes();
1946+
modifyAndKeepCodeRange();
19751947

1976-
if (singleByteOptimizable(enc)) {
1977-
if (StringSupport.singleByteSwapcase(bytes, s, end)) {
1978-
return this;
1979-
}
1948+
if ((flags & Config.CASE_ASCII_ONLY) != 0) {
1949+
StringSupport.asciiOnlyCaseMap(runtime, this, flags, enc);
19801950
} else {
1981-
if (StringSupport.multiByteSwapcase(runtime, enc, bytes, s, end)) {
1982-
return this;
1983-
}
1951+
IntHolder flagsP = new IntHolder();
1952+
flagsP.value = flags;
1953+
value = StringSupport.caseMap(runtime, value, flagsP);
1954+
flags = flagsP.value;
19841955
}
19851956

1986-
return runtime.getNil();
1957+
return ((flags & Config.CASE_MODIFIED) != 0) ? this : context.nil;
19871958
}
19881959

19891960
/** rb_str_dump

Diff for: ‎core/src/main/java/org/jruby/RubySymbol.java

+12-16
Original file line numberDiff line numberDiff line change
@@ -457,32 +457,28 @@ public IRubyObject empty_p(ThreadContext context) {
457457
return newShared(context.runtime).empty_p(context);
458458
}
459459

460-
@JRubyMethod
461-
public IRubyObject upcase(ThreadContext context) {
460+
@JRubyMethod(rest = true)
461+
public IRubyObject upcase(ThreadContext context, IRubyObject[] args) {
Has conversations. Original line has conversations.
462462
Ruby runtime = context.runtime;
463-
464-
return newSymbol(runtime, newShared(runtime).upcase19(context).getByteList());
463+
return newSymbol(runtime, newShared(runtime).upcase19(context, args).getByteList());
465464
}
466465

467-
@JRubyMethod
468-
public IRubyObject downcase(ThreadContext context) {
466+
@JRubyMethod(rest = true)
467+
public IRubyObject downcase(ThreadContext context, IRubyObject[] args) {
469468
Ruby runtime = context.runtime;
470-
471-
return newSymbol(runtime, newShared(runtime).downcase(context).getByteList());
469+
return newSymbol(runtime, newShared(runtime).downcase(context, args).getByteList());
472470
}
473471

474-
@JRubyMethod
475-
public IRubyObject capitalize(ThreadContext context) {
472+
@JRubyMethod(rest = true)
473+
public IRubyObject capitalize(ThreadContext context, IRubyObject[] args) {
476474
Ruby runtime = context.runtime;
477-
478-
return newSymbol(runtime, ((RubyString) newShared(runtime).capitalize19(context)).getByteList());
475+
return newSymbol(runtime, ((RubyString) newShared(runtime).capitalize19(context, args)).getByteList());
479476
}
480477

481-
@JRubyMethod
482-
public IRubyObject swapcase(ThreadContext context) {
478+
@JRubyMethod(rest = true)
479+
public IRubyObject swapcase(ThreadContext context, IRubyObject[] args) {
483480
Ruby runtime = context.runtime;
484-
485-
return newSymbol(runtime, newShared(runtime).swapcase19(context).getByteList());
481+
return newSymbol(runtime, newShared(runtime).swapcase19(context, args).getByteList());
486482
}
487483

488484
@JRubyMethod

Diff for: ‎core/src/main/java/org/jruby/util/StringSupport.java

+94-105
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,9 @@
2929
import static org.jcodings.Encoding.CHAR_INVALID;
3030
import static org.jruby.RubyEnumerator.enumeratorize;
3131

32+
import org.jcodings.Config;
3233
import org.jcodings.Encoding;
34+
import org.jcodings.IntHolder;
3335
import org.jcodings.ascii.AsciiTables;
3436
import org.jcodings.constants.CharacterType;
3537
import org.jcodings.exception.EncodingError;
@@ -607,10 +609,12 @@ public static int offset(RubyString str, int pos) {
607609
return offset(str.getEncoding(), value.getUnsafeBytes(), value.getBegin(), value.getBegin() + value.getRealSize(), pos);
608610
}
609611

612+
@Deprecated
610613
public static int toLower(Encoding enc, int c) {
611614
return Encoding.isAscii(c) ? AsciiTables.ToLowerCaseTable[c] : c;
612615
}
613616

617+
@Deprecated
614618
public static int toUpper(Encoding enc, int c) {
615619
return Encoding.isAscii(c) ? AsciiTables.ToUpperCaseTable[c] : c;
616620
}
@@ -1035,7 +1039,7 @@ public static int rindex(ByteList source, int sourceChars, int subChars, int pos
10351039
}
10361040

10371041
int s = nth(enc, srcBytes, srcBeg, srcBeg + srcLen, pos);
1038-
1042+
10391043
return strRindex(srcBytes, srcBeg, srcLen, subString.getUnsafeBytes(), subString.getBegin(), subLen, s, pos, enc);
10401044
}
10411045

@@ -2294,52 +2298,6 @@ public static boolean multiByteSqueeze(Ruby runtime, ByteList value, boolean squ
22942298
}
22952299
}
22962300

2297-
/**
2298-
* rb_str_swapcase / rb_str_swapcase_bang
2299-
*/
2300-
2301-
public static boolean singleByteSwapcase(byte[] bytes, int s, int end) {
2302-
boolean modify = false;
2303-
while (s < end) {
2304-
int c = bytes[s] & 0xff;
2305-
if (ASCIIEncoding.INSTANCE.isUpper(c)) {
2306-
bytes[s] = AsciiTables.ToLowerCaseTable[c];
2307-
modify = true;
2308-
} else if (ASCIIEncoding.INSTANCE.isLower(c)) {
2309-
bytes[s] = AsciiTables.ToUpperCaseTable[c];
2310-
modify = true;
2311-
}
2312-
s++;
2313-
}
2314-
2315-
return modify;
2316-
}
2317-
2318-
public static boolean multiByteSwapcase(Encoding enc, byte[] bytes, int s, int end) {
2319-
boolean modify = false;
2320-
while (s < end) {
2321-
int c = codePoint(enc, bytes, s, end);
2322-
if (enc.isUpper(c)) {
2323-
enc.codeToMbc(toLower(enc, c), bytes, s);
2324-
modify = true;
2325-
} else if (enc.isLower(c)) {
2326-
enc.codeToMbc(toUpper(enc, c), bytes, s);
2327-
modify = true;
2328-
}
2329-
s += codeLength(enc, c);
2330-
}
2331-
2332-
return modify;
2333-
}
2334-
2335-
public static boolean multiByteSwapcase(Ruby runtime, Encoding enc, byte[] bytes, int s, int end) {
2336-
try {
2337-
return multiByteSwapcase(enc, bytes, s, end);
2338-
} catch (IllegalArgumentException e) {
2339-
throw runtime.newArgumentError(e.getMessage());
2340-
}
2341-
}
2342-
23432301
private static int rb_memsearch_ss(byte[] xsBytes, int xs, int m, byte[] ysBytes, int ys, int n) {
23442302
int y;
23452303

@@ -2451,81 +2409,112 @@ private static int rb_memsearch_qs_utf8(byte[] xsBytes, int xs, int m, byte[] ys
24512409
return -1;
24522410
}
24532411

2454-
public static boolean singleByteDowncase(byte[] bytes, int s, int end) {
2455-
boolean modify = false;
2412+
public static int checkCaseOptions(Ruby runtime, IRubyObject[]args, int flags) {
2413+
if (args.length == 0) return flags;
2414+
if (args.length > 2) throw runtime.newArgumentError("too many options");
24562415

2457-
while (s < end) {
2458-
int c = bytes[s] & 0xff;
2459-
if (ASCIIEncoding.INSTANCE.isUpper(c)) {
2460-
bytes[s] = AsciiTables.ToLowerCaseTable[c];
2461-
modify = true;
2416+
if (args[0] == runtime.newSymbol("turkic")) {
2417+
flags |= Config.CASE_FOLD_TURKISH_AZERI;
2418+
if (args.length == 2) {
2419+
if (args[1] == runtime.newSymbol("lithuanian")) {
2420+
flags |= Config.CASE_FOLD_LITHUANIAN;
2421+
} else {
2422+
throw runtime.newArgumentError("invalid second option");
2423+
}
24622424
}
2463-
s++;
2464-
}
2465-
2466-
return modify;
2467-
}
2468-
2469-
public static boolean multiByteDowncase(Encoding enc, byte[] bytes, int s, int end) {
2470-
boolean modify = false;
2471-
int c;
2472-
while (s < end) {
2473-
if (enc.isAsciiCompatible() && Encoding.isAscii(c = bytes[s] & 0xff)) {
2474-
if (ASCIIEncoding.INSTANCE.isUpper(c)) {
2475-
bytes[s] = AsciiTables.ToLowerCaseTable[c];
2476-
modify = true;
2425+
} else if (args[0] == runtime.newSymbol("lithuanian")) {
2426+
flags |= Config.CASE_FOLD_LITHUANIAN;
2427+
if (args.length == 2) {
2428+
if (args[1] == runtime.newSymbol("turkic")) {
2429+
flags |= Config.CASE_FOLD_TURKISH_AZERI;
2430+
} else {
2431+
throw runtime.newArgumentError("invalid second option");
24772432
}
2478-
s++;
2433+
}
2434+
} else if (args.length > 1) {
2435+
throw runtime.newArgumentError("too many options");
2436+
} else if (args[0] == runtime.newSymbol("ascii")) {
2437+
flags |= Config.CASE_ASCII_ONLY;
2438+
} else if (args[0] == runtime.newSymbol("fold")) {
2439+
if ((flags & (Config.CASE_UPCASE | Config.CASE_DOWNCASE)) == Config.CASE_DOWNCASE) {
2440+
flags ^= Config.CASE_FOLD | Config.CASE_DOWNCASE;
24792441
} else {
2480-
c = codePoint(enc, bytes, s, end);
2481-
if (enc.isUpper(c)) {
2482-
enc.codeToMbc(toLower(enc, c), bytes, s);
2483-
modify = true;
2484-
}
2485-
s += codeLength(enc, c);
2442+
throw runtime.newArgumentError("option :fold only allowed for downcasing");
24862443
}
2444+
} else {
2445+
throw runtime.newArgumentError("invalid option");
24872446
}
2488-
2489-
return modify;
2447+
return flags;
24902448
}
24912449

2492-
public static boolean singleByteUpcase(byte[] bytes, int s, int end) {
2493-
boolean modify = false;
2450+
private static final class MappingBuffer {
2451+
MappingBuffer next;
2452+
byte[] bytes;
2453+
int used;
24942454

2495-
while (s < end) {
2496-
int c = bytes[s] & 0xff;
2497-
if (ASCIIEncoding.INSTANCE.isLower(c)) {
2498-
bytes[s] = AsciiTables.ToUpperCaseTable[c];
2499-
modify = true;
2500-
}
2501-
s++;
2455+
MappingBuffer() {
2456+
}
2457+
2458+
MappingBuffer(int size) {
2459+
bytes = new byte[size];
25022460
}
25032461

2504-
return modify;
25052462
}
25062463

2507-
public static boolean multiByteUpcase(Encoding enc, byte[] bytes, int s, int end) {
2508-
boolean modify = false;
2509-
int c;
2464+
private static final int CASE_MAPPING_ADDITIONAL_LENGTH = 20;
25102465

2511-
while (s < end) {
2512-
if (enc.isAsciiCompatible() && Encoding.isAscii(c = bytes[s] & 0xff)) {
2513-
if (ASCIIEncoding.INSTANCE.isLower(c)) {
2514-
bytes[s] = AsciiTables.ToUpperCaseTable[c];
2515-
modify = true;
2516-
}
2517-
s++;
2518-
} else {
2519-
c = codePoint(enc, bytes, s, end);
2520-
if (enc.isLower(c)) {
2521-
enc.codeToMbc(toUpper(enc, c), bytes, s);
2522-
modify = true;
2523-
}
2524-
s += codeLength(enc, c);
2466+
public static ByteList caseMap(Ruby runtime, ByteList src, IntHolder flags) {
2467+
IntHolder pp = new IntHolder();
2468+
Encoding enc = src.getEncoding();
2469+
pp.value = src.getBegin();
2470+
int end = src.getRealSize() + pp.value;
2471+
byte[]bytes = src.getUnsafeBytes();
2472+
int tgtLen = 0;
2473+
2474+
int buffers = 0;
2475+
MappingBuffer root = new MappingBuffer();
2476+
MappingBuffer buffer = root;
2477+
while (pp.value < end) {
2478+
buffer.next = new MappingBuffer((end - pp.value) * ++buffers + CASE_MAPPING_ADDITIONAL_LENGTH);
2479+
buffer = buffer.next;
2480+
int len = enc.caseMap(flags, bytes, pp, end, buffer.bytes, 0, buffer.bytes.length);
2481+
if (len < 0) throw runtime.newArgumentError("input string invalid");
2482+
buffer.used = len;
2483+
tgtLen += len;
2484+
}
2485+
2486+
final ByteList tgt;
2487+
if (buffers == 1) {
2488+
tgt = new ByteList(buffer.bytes, 0, buffer.used, enc, false);
2489+
} else {
2490+
tgt = new ByteList(tgtLen);
2491+
tgt.setEncoding(enc);
2492+
buffer = root.next;
2493+
int tgtPos = 0;
2494+
while (buffer != null) {
2495+
System.arraycopy(buffer.bytes, 0, tgt.getUnsafeBytes(), tgtPos, buffer.used);
2496+
tgtPos += buffer.used;
2497+
buffer = buffer.next;
25252498
}
25262499
}
25272500

2528-
return modify;
2501+
return tgt;
2502+
}
2503+
2504+
public static int asciiOnlyCaseMap(Ruby runtime, RubyString source, int flags, Encoding enc) {
2505+
ByteList value = source.getByteList();
2506+
if (value.getRealSize() == 0) return flags;
2507+
int s = value.getBegin();
2508+
int end = s + value.getRealSize();
2509+
byte[]bytes = value.getUnsafeBytes();
2510+
2511+
IntHolder flagsP = new IntHolder();
2512+
flagsP.value = flags;
2513+
IntHolder pp = new IntHolder();
2514+
pp.value = s;
2515+
int len = ASCIIEncoding.INSTANCE.caseMap(flagsP, bytes, pp, end, bytes, s, end);
2516+
if (len < 0) throw runtime.newArgumentError("input string invalid");
2517+
return flagsP.value;
25292518
}
25302519

25312520
public static int encCoderangeClean(int cr) {

0 commit comments

Comments
 (0)
Please sign in to comment.