|
20 | 20 | package org.jcodings.specific;
|
21 | 21 |
|
22 | 22 | import org.jcodings.CodeRange;
|
23 |
| -import org.jcodings.Config; |
24 | 23 | import org.jcodings.EucEncoding;
|
25 | 24 | import org.jcodings.IntHolder;
|
26 | 25 | import org.jcodings.ascii.AsciiTables;
|
@@ -95,25 +94,60 @@ public boolean isReverseMatchAllowed(byte[]bytes, int p, int end) {
|
95 | 94 | private static final int CR_Hiragana[] = {
|
96 | 95 | 1,
|
97 | 96 | 0xa4a1, 0xa4f3
|
98 |
| - }; /* CR_Hiragana */ |
| 97 | + }; |
99 | 98 |
|
100 | 99 | private static final int CR_Katakana[] = {
|
101 | 100 | 3,
|
| 101 | + 0x8ea6, 0x8eaf, /* JIS X 0201 Katakana */ |
| 102 | + 0x8eb1, 0x8edd, /* JIS X 0201 Katakana */ |
102 | 103 | 0xa5a1, 0xa5f6,
|
103 |
| - 0xaaa6, 0xaaaf, |
104 |
| - 0xaab1, 0xaadd |
105 |
| - }; /* CR_Katakana */ |
| 104 | + }; |
| 105 | + |
| 106 | + private static final int CR_Han[] = { |
| 107 | + /* EUC-JP (JIS X 0208 based) */ |
| 108 | + 4, |
| 109 | + 0xa1b8, 0xa1b8, |
| 110 | + 0xb0a1, 0xcfd3, /* Kanji level 1 */ |
| 111 | + 0xd0a1, 0xf4a6, /* Kanji level 2 */ |
| 112 | + 0x8fb0a1, 0x8fedf3 /* JIS X 0212 Supplemental Kanji (row 16 .. 77) */ |
| 113 | + }; |
| 114 | + |
| 115 | + private static final int CR_Latin[] = { |
| 116 | + 4, |
| 117 | + 0x0041, 0x005a, |
| 118 | + 0x0061, 0x007a, |
| 119 | + 0xa3c1, 0xa3da, |
| 120 | + 0xa3e1, 0xa3fa, |
| 121 | + }; |
| 122 | + |
| 123 | + private static final int CR_Greek[] = { |
| 124 | + 2, |
| 125 | + 0xa6a1, 0xa6b8, |
| 126 | + 0xa6c1, 0xa6d8, |
| 127 | + }; |
| 128 | + |
| 129 | + private static final int CR_Cyrillic[] = { |
| 130 | + 2, |
| 131 | + 0xa7a1, 0xa7c1, |
| 132 | + 0xa7d1, 0xa7f1, |
| 133 | + }; |
106 | 134 |
|
107 | 135 | private static final int PropertyList[][] = new int[][] {
|
108 | 136 | CR_Hiragana,
|
109 |
| - CR_Katakana |
| 137 | + CR_Katakana, |
| 138 | + CR_Han, |
| 139 | + CR_Latin, |
| 140 | + CR_Greek, |
| 141 | + CR_Cyrillic |
110 | 142 | };
|
111 | 143 |
|
112 | 144 | private static final CaseInsensitiveBytesHash<Integer> CTypeNameHash = new CaseInsensitiveBytesHash<Integer>();
|
113 | 145 |
|
114 | 146 | static {
|
115 |
| - CTypeNameHash.put("Hiragana".getBytes(), 1 + CharacterType.MAX_STD_CTYPE); |
116 |
| - CTypeNameHash.put("Katakana".getBytes(), 2 + CharacterType.MAX_STD_CTYPE); |
| 147 | + String[] names = new String[] {"Hiragana", "Katakana", "Han", "Latin", "Greek", "Cyrillic"}; |
| 148 | + for (int i = 0; i < names.length; i++) { |
| 149 | + CTypeNameHash.put(names[i].getBytes(), i + 1 + CharacterType.MAX_STD_CTYPE); |
| 150 | + } |
117 | 151 | }
|
118 | 152 |
|
119 | 153 | @Override
|
|
0 commit comments