Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: jruby/jcodings
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 6775574485f6
Choose a base ref
...
head repository: jruby/jcodings
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 84ca5cd460da
Choose a head ref
  • 5 commits
  • 4 files changed
  • 1 contributor

Commits on Dec 23, 2017

  1. set surefire encoding

    lopex committed Dec 23, 2017
    Copy the full SHA
    9a533bd View commit details
  2. UnicodeEncoding.caseMap

    lopex committed Dec 23, 2017
    Copy the full SHA
    f9020f6 View commit details
  3. tests for unicode casemapping

    lopex committed Dec 23, 2017
    Copy the full SHA
    bf24c1c View commit details
  4. make javadoc happier for now

    lopex committed Dec 23, 2017
    Copy the full SHA
    88d8b1c View commit details
  5. Copy the full SHA
    84ca5cd View commit details
Showing with 145 additions and 11 deletions.
  1. +15 −4 pom.xml
  2. +1 −0 src/org/jcodings/Config.java
  3. +98 −4 src/org/jcodings/unicode/UnicodeEncoding.java
  4. +31 −3 test/org/jcodings/specific/TestUnicode.java
19 changes: 15 additions & 4 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -58,10 +58,10 @@
<finalName>jcodings</finalName>
<extensions>
<extension>
<groupId>org.apache.maven.wagon</groupId>
<artifactId>wagon-webdav-jackrabbit</artifactId>
<version>2.1</version>
</extension>
<groupId>org.apache.maven.wagon</groupId>
<artifactId>wagon-webdav-jackrabbit</artifactId>
<version>2.1</version>
</extension>
</extensions>
<plugins>
<plugin>
@@ -87,6 +87,9 @@
</goals>
</execution>
</executions>
<configuration>
<additionalparam>-Xdoclint:none</additionalparam>
</configuration>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
@@ -110,6 +113,14 @@
<artifactId>maven-resources-plugin</artifactId>
<version>2.6</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.20.1</version>
<configuration>
<argLine>-Dfile.encoding=UTF-8</argLine>
</configuration>
</plugin>
</plugins>
<resources>
<resource>
1 change: 1 addition & 0 deletions src/org/jcodings/Config.java
Original file line number Diff line number Diff line change
@@ -56,6 +56,7 @@ public interface Config {
final int CASE_FOLD_LITHUANIAN = (1<<21); /* needs Lithuanian-specific mapping */
final int CASE_ASCII_ONLY = (1<<22); /* only modify ASCII range */
final int CASE_IS_TITLECASE = (1<<23); /* character itself is already titlecase */
final int CASE_SPECIALS = (CASE_TITLECASE | CASE_IS_TITLECASE | CASE_UP_SPECIAL | CASE_DOWN_SPECIAL);

final int INTERNAL_ENC_CASE_FOLD_MULTI_CHAR = (1<<30); /* better not change original value! */
final int ENC_CASE_FOLD_MIN = INTERNAL_ENC_CASE_FOLD_MULTI_CHAR;
102 changes: 98 additions & 4 deletions src/org/jcodings/unicode/UnicodeEncoding.java
Original file line number Diff line number Diff line change
@@ -85,16 +85,14 @@ public boolean isCodeCType(int code, int ctype) {
@Override
public int propertyNameToCType(byte[]name, int p, int end) {
byte[]buf = new byte[PROPERTY_NAME_MAX_SIZE];

int p_ = p;
int len = 0;

while(p_ < end) {
for(int p_ = p; p_ < end; p_+= length(name, p_, end)) {
int code = mbcToCode(name, p_, end);
if (code == ' ' || code == '-' || code == '_') continue;
if (code >= 0x80) throw new CharacterPropertyException(ErrorMessages.ERR_INVALID_CHAR_PROPERTY_NAME);
buf[len++] = (byte)code;
if (len >= PROPERTY_NAME_MAX_SIZE) throw new CharacterPropertyException(ErrorMessages.ERR_INVALID_CHAR_PROPERTY_NAME, name, p, end);
p_ += length(name, p_, end);
}

Integer ctype = CTypeName.CTypeNameHash.get(buf, 0, len);
@@ -419,6 +417,102 @@ public void applyAllCaseFold(int flag, ApplyAllCaseFoldFunction fun, Object arg)
}
}

static final int CASE_MAPPING_SLACK = 12;
static final int I_WITH_DOT_ABOVE = 0x0130;
static final int DOTLESS_i = 0x0131;
static final int DOT_ABOVE = 0x0307;

@Override
public final int caseMap(IntHolder flagP, byte[] bytes, IntHolder pp, int end, byte[] to, int toP, int toEnd) {
int flags = flagP.value;
int toStart = toP;
toEnd -= CASE_MAPPING_SLACK;
flags |= (flags & (Config.CASE_UPCASE | Config.CASE_DOWNCASE)) << Config.CASE_SPECIAL_OFFSET;


while (pp.value < end && toP <= toEnd) {
int length = length(bytes, pp.value, end);
if (length < 0) return length;
int code = mbcToCode(bytes, pp.value, end);
pp.value += length;

if (code <= 'z') {
if (code >= 'a' && code <= 'z') {
if ((flags & Config.CASE_UPCASE) != 0) {
flags |= Config.CASE_MODIFIED;
if ((flags & Config.CASE_FOLD_TURKISH_AZERI) != 0 && code == 'i') code = I_WITH_DOT_ABOVE; else code += 'A' - 'a';
}
} else if (code >= 'A' && code <= 'Z') {
if ((flags & (Config.CASE_DOWNCASE | Config.CASE_FOLD)) != 0) {
flags |= Config.CASE_MODIFIED;
if ((flags & Config.CASE_FOLD_TURKISH_AZERI) != 0 && code == 'I') code = DOTLESS_i; else code += 'a' - 'A';
}
}
} else if ((flags & Config.CASE_ASCII_ONLY) == 0 && code >= 0x00B5) {
CodeList folded;
if (code == I_WITH_DOT_ABOVE) {
if ((flags & (Config.CASE_DOWNCASE | Config.CASE_FOLD)) != 0) {
flags |= Config.CASE_MODIFIED;
code = 'i';
if ((flags & Config.CASE_FOLD_TURKISH_AZERI) == 0) {
toP += codeToMbc(code, to, toP);
code = DOT_ABOVE;
}
}
} else if (code == DOTLESS_i) {
if ((flags & Config.CASE_UPCASE) != 0) {
flags |= Config.CASE_MODIFIED;
code = 'I';
}
} else if ((folded = CaseFold.Hash.get(code)) != null) {
if ((flags & Config.CASE_TITLECASE) != 0 && (folded.flags & Config.CASE_IS_TITLECASE) != 0) {

} else if ((flags & folded.flags) != 0) {
int[]codes;
boolean specialCopy = false;
flags |= Config.CASE_MODIFIED;
if ((flags & folded.flags & Config.CASE_SPECIALS) != 0) {
int specialStart = (folded.flags & Config.SpecialIndexMask) >>> Config.SpecialIndexShift;
if ((folded.flags & Config.CASE_IS_TITLECASE) != 0) {
if ((flags & (Config.CASE_UPCASE | Config.CASE_DOWNCASE)) == (Config.CASE_UPCASE | Config.CASE_DOWNCASE))
specialCopy = true;
else
specialStart += CaseMappingSpecials.Values.get(specialStart).length;
}
if (!specialCopy && (folded.flags & Config.CASE_TITLECASE) != 0) {
if ((flags & Config.CASE_TITLECASE) != 0)
specialCopy = true;
else
specialStart += CaseMappingSpecials.Values.get(specialStart).length;
}
if (!specialCopy && (folded.flags & Config.CASE_DOWN_SPECIAL) != 0) {
if ((flags & Config.CASE_DOWN_SPECIAL) == 0)
specialStart += CaseMappingSpecials.Values.get(specialStart).length;
}
codes = CaseMappingSpecials.Values.get(specialStart);
} else {
codes = folded.codes;
}
code = codes[0];

for (int i = 1; i < codes.length; i++) {
toP += codeToMbc(code, to, toP);
code = codes[i];
}
}
} else if ((folded = CaseFold11.Hash.get(code)) != null && (flags & folded.flags) != 0) {
flags |= Config.CASE_MODIFIED;
code = folded.codes[(flags & folded.flags & Config.CASE_TITLECASE) != 0 ? 1 : 0];
}
}
toP += codeToMbc(code, to, toP);
if ((flags & Config.CASE_TITLECASE) != 0) {
flags ^= (Config.CASE_UPCASE | Config.CASE_DOWNCASE | Config.CASE_TITLECASE | Config.CASE_UP_SPECIAL | Config.CASE_DOWN_SPECIAL);}

} // while
flagP.value = flags;
return toP - toStart;
}
static final short UNICODE_ISO_8859_1_CTypeTable[] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x428c, 0x4289, 0x4288, 0x4288, 0x4288, 0x4008, 0x4008,
34 changes: 31 additions & 3 deletions test/org/jcodings/specific/TestUnicode.java
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
package org.jcodings.specific;

import org.jcodings.Config;
import org.jcodings.Encoding;
import org.jcodings.IntHolder;
import org.junit.Test;

import static junit.framework.Assert.*;

public class TestUnicode {
final Encoding enc = UTF8Encoding.INSTANCE;

@Test
public void testUnicodeLength() throws Exception {
byte[] utf8Bytes = "mØØse".getBytes("UTF-8");

assertEquals(7, utf8Bytes.length);
assertEquals(5, UTF8Encoding.INSTANCE.strLength(utf8Bytes, 0, 7));
assertEquals(2, UTF8Encoding.INSTANCE.length(utf8Bytes[1]));
assertEquals('Ø', UTF8Encoding.INSTANCE.mbcToCode(utf8Bytes, 1, 3));
assertEquals(5, enc.strLength(utf8Bytes, 0, 7));
assertEquals(2, enc.length(utf8Bytes[1]));
assertEquals('Ø', enc.mbcToCode(utf8Bytes, 1, 3));
}

@Test
@@ -25,4 +29,28 @@ public void testUnicodeProperties() throws Exception {
int ctype = enc.propertyNameToCType(prop, 0, prop.length);
assertTrue(enc.isCodeCType(code, ctype));
}

String caseMap(String fromS, int flags) throws Exception {
int CASE_MAPPING_ADDITIONAL_LENGTH = 20;
byte[]from = fromS.getBytes("utf-8");
IntHolder fromP = new IntHolder();
fromP.value = 0;
byte[]to = new byte[from.length + CASE_MAPPING_ADDITIONAL_LENGTH];
IntHolder flagP = new IntHolder();
flagP.value = flags;
int len = enc.caseMap(flagP, from, fromP, from.length, to, 0, to.length);
return new String(to, 0, len);
}

@Test
public void testCaseMap() throws Exception {
assertTrue(caseMap("äöü", Config.CASE_UPCASE).equals("ÄÖÜ"));
assertTrue(caseMap("ÄÖÜ", Config.CASE_UPCASE).equals("ÄÖÜ"));
assertTrue(caseMap("ÄÖÜ", Config.CASE_DOWNCASE).equals("äöü"));
assertTrue(caseMap("äöü", Config.CASE_DOWNCASE).equals("äöü"));
assertTrue(caseMap("aÄbÖcÜ", Config.CASE_DOWNCASE).equals("aäböcü"));
assertTrue(caseMap("aäböcü", Config.CASE_UPCASE).equals("AÄBÖCÜ"));
assertTrue(caseMap("aäböcü", Config.CASE_UPCASE | Config.CASE_ASCII_ONLY).equals("AäBöCü"));
assertTrue(caseMap("AÄBÖCÜ", Config.CASE_DOWNCASE | Config.CASE_ASCII_ONLY).equals("aÄbÖcÜ"));
}
}