Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: jruby/jcodings
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 7f60a392bc36
Choose a base ref
...
head repository: jruby/jcodings
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: c226beeefe87
Choose a head ref
  • 3 commits
  • 4 files changed
  • 1 contributor

Commits on Jan 28, 2018

  1. iso8859-3 caseMap

    lopex committed Jan 28, 2018
    Copy the full SHA
    351d6f1 View commit details
  2. fix caseMap

    lopex committed Jan 28, 2018
    Copy the full SHA
    8546494 View commit details
  3. add sharp-s iso caseMap test

    lopex committed Jan 28, 2018
    Copy the full SHA
    c226bee View commit details
4 changes: 2 additions & 2 deletions src/org/jcodings/specific/ISO8859_2Encoding.java
Original file line number Diff line number Diff line change
@@ -49,14 +49,14 @@ public int caseMap(IntHolder flagP, byte[] bytes, IntHolder pp, int end, byte[]
}
} else if ((ISO8859_2CtypeTable[code] & CharacterType.BIT_UPPER) != 0 && (flags & (Config.CASE_DOWNCASE | Config.CASE_FOLD)) != 0) {
flags |= Config.CASE_MODIFIED;
code += LowerCaseTable[code];
code = LowerCaseTable[code];
} else if ((ISO8859_2CtypeTable[code] & CharacterType.BIT_LOWER) != 0 && (flags & Config.CASE_UPCASE) != 0) {
flags |= Config.CASE_MODIFIED;
if (code >= 0xB1 && code <= 0xBF) {
code -= 0x10;
} else {
code -= 0x20;
}
flags |= Config.CASE_MODIFIED;
}
to[toP++] = (byte)code;
if ((flags & Config.CASE_TITLECASE) != 0) {
53 changes: 53 additions & 0 deletions src/org/jcodings/specific/ISO8859_3Encoding.java
Original file line number Diff line number Diff line change
@@ -19,14 +19,67 @@
*/
package org.jcodings.specific;

import org.jcodings.Config;
import org.jcodings.ISOEncoding;
import org.jcodings.IntHolder;
import org.jcodings.constants.CharacterType;

public final class ISO8859_3Encoding extends ISOEncoding {

protected ISO8859_3Encoding() {
super("ISO-8859-3", ISO8859_3CtypeTable, ISO8859_3ToLowerCaseTable, ISO8859_3CaseFoldMap);
}

static final int DOTLESS_i = 0xFD;
static final int I_WITH_DOT_ABOVE = 0xDD;

@Override
public int caseMap(IntHolder flagP, byte[] bytes, IntHolder pp, int end, byte[] to, int toP, int toEnd) {
int toStart = toP;
int flags = flagP.value;

while (pp.value < end && toP < toEnd) {
int code = bytes[pp.value++] & 0xff;
if (code == SHARP_s) {
if ((flags & Config.CASE_UPCASE) != 0) {
flags |= Config.CASE_MODIFIED;
to[toP++] = 'S';
code = (flags & Config.CASE_TITLECASE) != 0 ? 's' : 'S';
} else if ((flags & Config.CASE_FOLD) != 0) {
flags |= Config.CASE_MODIFIED;
to[toP++] = 's';
code = 's';
}
}
else if (code == 0xB5);
else if ((ISO8859_3CtypeTable[code] & CharacterType.BIT_UPPER) != 0 && (flags & (Config.CASE_DOWNCASE | Config.CASE_FOLD)) != 0) {
flags |= Config.CASE_MODIFIED;
if (code == 'I') {
code = (flags & Config.CASE_FOLD_TURKISH_AZERI) != 0 ? DOTLESS_i : 'i';
} else {
code = LowerCaseTable[code];
}
} else if ((ISO8859_3CtypeTable[code] & CharacterType.BIT_LOWER) != 0 && (flags & Config.CASE_UPCASE) != 0) {
flags |= Config.CASE_MODIFIED;
if (code == 'i') {
code = (flags & Config.CASE_FOLD_TURKISH_AZERI) != 0 ? DOTLESS_i : 'I';
} else if (code == DOTLESS_i) {
code = 'I';
} else if (code >= 0xB0 && code <= 0xBF) {
code -= 0x10;
} else {
code -= 0x20;
}
}
to[toP++] = (byte)code;
if ((flags & Config.CASE_TITLECASE) != 0) {
flags ^= (Config.CASE_UPCASE | Config.CASE_DOWNCASE | Config.CASE_TITLECASE);
}
}
flagP.value = flags;
return toP - toStart;
}

static final short ISO8859_3CtypeTable[] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
47 changes: 47 additions & 0 deletions test/org/jcodings/specific/TestCaseMap.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package org.jcodings.specific;

import static junit.framework.Assert.assertTrue;

import java.util.Arrays;
import java.util.List;

import org.jcodings.Config;
import org.jcodings.Encoding;
import org.jcodings.IntHolder;
import org.junit.Test;

public class TestCaseMap {
String caseMap(Encoding enc, String fromS, int flags) throws Exception {
int CASE_MAPPING_ADDITIONAL_LENGTH = 20;
byte[]from = fromS.getBytes(enc.toString());
IntHolder fromP = new IntHolder();
fromP.value = 0;
byte[]to = new byte[from.length + CASE_MAPPING_ADDITIONAL_LENGTH];
IntHolder flagP = new IntHolder();
flagP.value = flags;
int len = enc.caseMap(flagP, from, fromP, from.length, to, 0, to.length);
return new String(to, 0, len, enc.toString());
}

@Test
public void testUnicodeCaseMap() throws Exception {
Encoding enc = UTF8Encoding.INSTANCE;
assertTrue(caseMap(enc, "äöü", Config.CASE_UPCASE).equals("ÄÖÜ"));
assertTrue(caseMap(enc, "ÄÖÜ", Config.CASE_UPCASE).equals("ÄÖÜ"));
assertTrue(caseMap(enc, "ÄÖÜ", Config.CASE_DOWNCASE).equals("äöü"));
assertTrue(caseMap(enc, "äöü", Config.CASE_DOWNCASE).equals("äöü"));
assertTrue(caseMap(enc, "aÄbÖcÜ", Config.CASE_DOWNCASE).equals("aäböcü"));
assertTrue(caseMap(enc, "aäböcü", Config.CASE_UPCASE).equals("AÄBÖCÜ"));
assertTrue(caseMap(enc, "aäböcü", Config.CASE_UPCASE | Config.CASE_ASCII_ONLY).equals("AäBöCü"));
assertTrue(caseMap(enc, "AÄBÖCÜ", Config.CASE_DOWNCASE | Config.CASE_ASCII_ONLY).equals("aÄbÖcÜ"));
}

@Test
public void testISOCaseMap() throws Exception {
List<Encoding> list = Arrays.<Encoding>asList(ISO8859_1Encoding.INSTANCE, ISO8859_2Encoding.INSTANCE, ISO8859_3Encoding.INSTANCE);
for (Encoding enc: list) {
assertTrue(caseMap(enc, "ß", Config.CASE_UPCASE).equals("SS"));
assertTrue(caseMap(enc, "ß", Config.CASE_DOWNCASE).equals("ß"));
}
}
}
31 changes: 3 additions & 28 deletions test/org/jcodings/specific/TestUnicode.java
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
package org.jcodings.specific;

import org.jcodings.Config;
import static junit.framework.Assert.assertEquals;
import static junit.framework.Assert.assertTrue;

import org.jcodings.Encoding;
import org.jcodings.IntHolder;
import org.jcodings.constants.CharacterType;
import org.junit.Test;

import static junit.framework.Assert.*;

public class TestUnicode {
final Encoding enc = UTF8Encoding.INSTANCE;

@@ -36,30 +35,6 @@ public void testUnicodeProperties() throws Exception {
assertEquals(a_ctype, CharacterType.ASCII);
}

String caseMap(String fromS, int flags) throws Exception {
int CASE_MAPPING_ADDITIONAL_LENGTH = 20;
byte[]from = fromS.getBytes("utf-8");
IntHolder fromP = new IntHolder();
fromP.value = 0;
byte[]to = new byte[from.length + CASE_MAPPING_ADDITIONAL_LENGTH];
IntHolder flagP = new IntHolder();
flagP.value = flags;
int len = enc.caseMap(flagP, from, fromP, from.length, to, 0, to.length);
return new String(to, 0, len, "utf-8");
}

@Test
public void testCaseMap() throws Exception {
assertTrue(caseMap("äöü", Config.CASE_UPCASE).equals("ÄÖÜ"));
assertTrue(caseMap("ÄÖÜ", Config.CASE_UPCASE).equals("ÄÖÜ"));
assertTrue(caseMap("ÄÖÜ", Config.CASE_DOWNCASE).equals("äöü"));
assertTrue(caseMap("äöü", Config.CASE_DOWNCASE).equals("äöü"));
assertTrue(caseMap("aÄbÖcÜ", Config.CASE_DOWNCASE).equals("aäböcü"));
assertTrue(caseMap("aäböcü", Config.CASE_UPCASE).equals("AÄBÖCÜ"));
assertTrue(caseMap("aäböcü", Config.CASE_UPCASE | Config.CASE_ASCII_ONLY).equals("AäBöCü"));
assertTrue(caseMap("AÄBÖCÜ", Config.CASE_DOWNCASE | Config.CASE_ASCII_ONLY).equals("aÄbÖcÜ"));
}

@Test
public void testCodeToMbcLength() throws Exception {
assertEquals(enc.codeToMbcLength(0x01), 1);