Skip to content

Commit 4196dba

Browse files
committedFeb 13, 2018
add euc-jp code ranges
1 parent 12dec8f commit 4196dba

File tree

1 file changed

+42
-8
lines changed

1 file changed

+42
-8
lines changed
 

‎src/org/jcodings/specific/BaseEUCJPEncoding.java

+42-8
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
package org.jcodings.specific;
2121

2222
import org.jcodings.CodeRange;
23-
import org.jcodings.Config;
2423
import org.jcodings.EucEncoding;
2524
import org.jcodings.IntHolder;
2625
import org.jcodings.ascii.AsciiTables;
@@ -95,25 +94,60 @@ public boolean isReverseMatchAllowed(byte[]bytes, int p, int end) {
9594
private static final int CR_Hiragana[] = {
9695
1,
9796
0xa4a1, 0xa4f3
98-
}; /* CR_Hiragana */
97+
};
9998

10099
private static final int CR_Katakana[] = {
101100
3,
101+
0x8ea6, 0x8eaf, /* JIS X 0201 Katakana */
102+
0x8eb1, 0x8edd, /* JIS X 0201 Katakana */
102103
0xa5a1, 0xa5f6,
103-
0xaaa6, 0xaaaf,
104-
0xaab1, 0xaadd
105-
}; /* CR_Katakana */
104+
};
105+
106+
private static final int CR_Han[] = {
107+
/* EUC-JP (JIS X 0208 based) */
108+
4,
109+
0xa1b8, 0xa1b8,
110+
0xb0a1, 0xcfd3, /* Kanji level 1 */
111+
0xd0a1, 0xf4a6, /* Kanji level 2 */
112+
0x8fb0a1, 0x8fedf3 /* JIS X 0212 Supplemental Kanji (row 16 .. 77) */
113+
};
114+
115+
private static final int CR_Latin[] = {
116+
4,
117+
0x0041, 0x005a,
118+
0x0061, 0x007a,
119+
0xa3c1, 0xa3da,
120+
0xa3e1, 0xa3fa,
121+
};
122+
123+
private static final int CR_Greek[] = {
124+
2,
125+
0xa6a1, 0xa6b8,
126+
0xa6c1, 0xa6d8,
127+
};
128+
129+
private static final int CR_Cyrillic[] = {
130+
2,
131+
0xa7a1, 0xa7c1,
132+
0xa7d1, 0xa7f1,
133+
};
106134

107135
private static final int PropertyList[][] = new int[][] {
108136
CR_Hiragana,
109-
CR_Katakana
137+
CR_Katakana,
138+
CR_Han,
139+
CR_Latin,
140+
CR_Greek,
141+
CR_Cyrillic
110142
};
111143

112144
private static final CaseInsensitiveBytesHash<Integer> CTypeNameHash = new CaseInsensitiveBytesHash<Integer>();
113145

114146
static {
115-
CTypeNameHash.put("Hiragana".getBytes(), 1 + CharacterType.MAX_STD_CTYPE);
116-
CTypeNameHash.put("Katakana".getBytes(), 2 + CharacterType.MAX_STD_CTYPE);
147+
String[] names = new String[] {"Hiragana", "Katakana", "Han", "Latin", "Greek", "Cyrillic"};
148+
for (int i = 0; i < names.length; i++) {
149+
CTypeNameHash.put(names[i].getBytes(), i + 1 + CharacterType.MAX_STD_CTYPE);
150+
}
117151
}
118152

119153
@Override

0 commit comments

Comments
 (0)
Please sign in to comment.