Added
Link Here
|
1 |
/******************************************************************************* |
2 |
* Copyright (c) 2000, 2005 IBM Corporation and others. |
3 |
* All rights reserved. This program and the accompanying materials |
4 |
* are made available under the terms of the Eclipse Public License v1.0 |
5 |
* which accompanies this distribution, and is available at |
6 |
* http://www.eclipse.org/legal/epl-v10.html |
7 |
* |
8 |
* Contributors: |
9 |
* IBM Corporation - initial API and implementation |
10 |
*******************************************************************************/ |
11 |
|
12 |
package org.eclipse.ui.internal.texteditor.spelling.engine; |
13 |
|
14 |
/** |
15 |
* Default phonetic hash provider for english languages. |
16 |
* <p> |
17 |
* This algorithm uses an adapted version double metaphone algorithm by |
18 |
* Lawrence Philips. |
19 |
* <p> |
20 |
* |
21 |
* @since 3.0 |
22 |
*/ |
23 |
public final class DefaultPhoneticHashProvider implements IPhoneticHashProvider { |
24 |
|
25 |
private static final String[] meta01= { "ACH", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
26 |
private static final String[] meta02= { "BACHER", "MACHER", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
27 |
private static final String[] meta03= { "CAESAR", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
28 |
private static final String[] meta04= { "CHIA", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
29 |
private static final String[] meta05= { "CH", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
30 |
private static final String[] meta06= { "CHAE", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
31 |
private static final String[] meta07= { "HARAC", "HARIS", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
32 |
private static final String[] meta08= { "HOR", "HYM", "HIA", "HEM", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ |
33 |
private static final String[] meta09= { "CHORE", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
34 |
private static final String[] meta10= { "VAN ", "VON ", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
35 |
private static final String[] meta11= { "SCH", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
36 |
private static final String[] meta12= { "ORCHES", "ARCHIT", "ORCHID", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ |
37 |
private static final String[] meta13= { "T", "S", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
38 |
private static final String[] meta14= { "A", "O", "U", "E", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ |
39 |
private static final String[] meta15= { "L", "R", "N", "M", "B", "H", "F", "V", "W", " ", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$ //$NON-NLS-7$ //$NON-NLS-8$ //$NON-NLS-9$ //$NON-NLS-10$ //$NON-NLS-11$ |
40 |
private static final String[] meta16= { "MC", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
41 |
private static final String[] meta17= { "CZ", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
42 |
private static final String[] meta18= { "WICZ", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
43 |
private static final String[] meta19= { "CIA", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
44 |
private static final String[] meta20= { "CC", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
45 |
private static final String[] meta21= { "I", "E", "H", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ |
46 |
private static final String[] meta22= { "HU", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
47 |
private static final String[] meta23= { "UCCEE", "UCCES", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
48 |
private static final String[] meta24= { "CK", "CG", "CQ", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ |
49 |
private static final String[] meta25= { "CI", "CE", "CY", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ |
50 |
private static final String[] meta26= { "GN", "KN", "PN", "WR", "PS", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$ |
51 |
private static final String[] meta27= { " C", " Q", " G", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ |
52 |
private static final String[] meta28= { "C", "K", "Q", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ |
53 |
private static final String[] meta29= { "CE", "CI", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
54 |
private static final String[] meta30= { "DG", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
55 |
private static final String[] meta31= { "I", "E", "Y", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ |
56 |
private static final String[] meta32= { "DT", "DD", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
57 |
private static final String[] meta33= { "B", "H", "D", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ |
58 |
private static final String[] meta34= { "B", "H", "D", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ |
59 |
private static final String[] meta35= { "B", "H", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
60 |
private static final String[] meta36= { "C", "G", "L", "R", "T", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$ |
61 |
private static final String[] meta37= { "EY", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
62 |
private static final String[] meta38= { "LI", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
63 |
private static final String[] meta39= { "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$ //$NON-NLS-7$ //$NON-NLS-8$ //$NON-NLS-9$ //$NON-NLS-10$ //$NON-NLS-11$ //$NON-NLS-12$ |
64 |
private static final String[] meta40= { "ER", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
65 |
private static final String[] meta41= { "DANGER", "RANGER", "MANGER", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ |
66 |
private static final String[] meta42= { "E", "I", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
67 |
private static final String[] meta43= { "RGY", "OGY", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
68 |
private static final String[] meta44= { "E", "I", "Y", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ |
69 |
private static final String[] meta45= { "AGGI", "OGGI", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
70 |
private static final String[] meta46= { "VAN ", "VON ", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
71 |
private static final String[] meta47= { "SCH", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
72 |
private static final String[] meta48= { "ET", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
73 |
private static final String[] meta49= { "C", "X", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
74 |
private static final String[] meta50= { "JOSE", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
75 |
private static final String[] meta51= { "SAN ", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
76 |
private static final String[] meta52= { "SAN ", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
77 |
private static final String[] meta53= { "JOSE", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
78 |
private static final String[] meta54= { "L", "T", "K", "S", "N", "M", "B", "Z", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$ //$NON-NLS-7$ //$NON-NLS-8$ //$NON-NLS-9$ |
79 |
private static final String[] meta55= { "S", "K", "L", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ |
80 |
private static final String[] meta56= { "ILLO", "ILLA", "ALLE", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ |
81 |
private static final String[] meta57= { "AS", "OS", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
82 |
private static final String[] meta58= { "A", "O", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
83 |
private static final String[] meta59= { "ALLE", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
84 |
private static final String[] meta60= { "UMB", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
85 |
private static final String[] meta61= { "ER", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
86 |
private static final String[] meta62= { "P", "B", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
87 |
private static final String[] meta63= { "IE", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
88 |
private static final String[] meta64= { "ME", "MA", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
89 |
private static final String[] meta65= { "ISL", "YSL", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
90 |
private static final String[] meta66= { "SUGAR", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
91 |
private static final String[] meta67= { "SH", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
92 |
private static final String[] meta68= { "HEIM", "HOEK", "HOLM", "HOLZ", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ |
93 |
private static final String[] meta69= { "SIO", "SIA", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
94 |
private static final String[] meta70= { "SIAN", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
95 |
private static final String[] meta71= { "M", "N", "L", "W", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ |
96 |
private static final String[] meta72= { "Z", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
97 |
private static final String[] meta73= { "Z", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
98 |
private static final String[] meta74= { "SC", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
99 |
private static final String[] meta75= { "OO", "ER", "EN", "UY", "ED", "EM", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$ //$NON-NLS-7$ |
100 |
private static final String[] meta76= { "ER", "EN", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
101 |
private static final String[] meta77= { "I", "E", "Y", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ |
102 |
private static final String[] meta78= { "AI", "OI", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
103 |
private static final String[] meta79= { "S", "Z", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
104 |
private static final String[] meta80= { "TION", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
105 |
private static final String[] meta81= { "TIA", "TCH", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
106 |
private static final String[] meta82= { "TH", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
107 |
private static final String[] meta83= { "TTH", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
108 |
private static final String[] meta84= { "OM", "AM", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
109 |
private static final String[] meta85= { "VAN ", "VON ", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
110 |
private static final String[] meta86= { "SCH", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
111 |
private static final String[] meta87= { "T", "D", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
112 |
private static final String[] meta88= { "WR", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
113 |
private static final String[] meta89= { "WH", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
114 |
private static final String[] meta90= { "EWSKI", "EWSKY", "OWSKI", "OWSKY", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ |
115 |
private static final String[] meta91= { "SCH", "" }; //$NON-NLS-1$ //$NON-NLS-2$ |
116 |
private static final String[] meta92= { "WICZ", "WITZ", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
117 |
private static final String[] meta93= { "IAU", "EAU", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
118 |
private static final String[] meta94= { "AU", "OU", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
119 |
private static final String[] meta95= { "W", "K", "CZ", "WITZ" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ |
120 |
|
121 |
/** The mutator characters */ |
122 |
private static final char[] MUTATOR_CHARACTERS= { 'A', 'B', 'X', 'S', 'K', 'J', 'T', 'F', 'H', 'L', 'M', 'N', 'P', 'R', '0' }; |
123 |
|
124 |
/** The vowel characters */ |
125 |
private static final char[] VOWEL_CHARACTERS= new char[] { 'A', 'E', 'I', 'O', 'U', 'Y' }; |
126 |
|
127 |
/** |
128 |
* Test whether the specified string contains one of the candidates in the |
129 |
* list. |
130 |
* |
131 |
* @param candidates |
132 |
* Array of candidates to check |
133 |
* @param token |
134 |
* The token to check for occurrences of the candidates |
135 |
* @param offset |
136 |
* The offset where to begin checking in the string |
137 |
* @param length |
138 |
* The length of the range in the string to check |
139 |
* @return <code>true</code> iff the string contains one of the |
140 |
* candidates, <code>false</code> otherwise. |
141 |
*/ |
142 |
protected static final boolean hasOneOf(final String[] candidates, final char[] token, final int offset, final int length) { |
143 |
|
144 |
if (offset < 0 || offset >= token.length || candidates.length == 0) |
145 |
return false; |
146 |
|
147 |
final String checkable= new String(token, offset, length); |
148 |
for (int index= 0; index < candidates.length; index++) { |
149 |
|
150 |
if (candidates[index].equals(checkable)) |
151 |
return true; |
152 |
} |
153 |
return false; |
154 |
} |
155 |
|
156 |
/** |
157 |
* Test whether the specified token contains one of the candidates in the |
158 |
* list. |
159 |
* |
160 |
* @param candidates |
161 |
* Array of candidates to check |
162 |
* @param token |
163 |
* The token to check for occurrences of the candidates |
164 |
* @return <code>true</code> iff the string contains one of the |
165 |
* candidates, <code>false</code> otherwise. |
166 |
*/ |
167 |
protected static final boolean hasOneOf(final String[] candidates, final String token) { |
168 |
|
169 |
for (int index= 0; index < candidates.length; index++) { |
170 |
|
171 |
if (token.indexOf(candidates[index]) >= 0) |
172 |
return true; |
173 |
} |
174 |
return false; |
175 |
} |
176 |
|
177 |
/** |
178 |
* Tests whether the specified token contains a vowel at the specified |
179 |
* offset. |
180 |
* |
181 |
* @param token |
182 |
* The token to check for a vowel |
183 |
* @param offset |
184 |
* The offset where to begin checking in the token |
185 |
* @param length |
186 |
* The length of the range in the token to check |
187 |
* @return <code>true</code> iff the token contains a vowel, <code>false</code> |
188 |
* otherwise. |
189 |
*/ |
190 |
protected static final boolean hasVowel(final char[] token, final int offset, final int length) { |
191 |
|
192 |
if (offset >= 0 && offset < length) { |
193 |
|
194 |
final char character= token[offset]; |
195 |
for (int index= 0; index < VOWEL_CHARACTERS.length; index++) { |
196 |
|
197 |
if (VOWEL_CHARACTERS[index] == character) |
198 |
return true; |
199 |
} |
200 |
} |
201 |
return false; |
202 |
} |
203 |
|
204 |
/* |
205 |
* @see org.eclipse.spelling.done.IPhoneticHasher#getHash(java.lang.String) |
206 |
*/ |
207 |
public final String getHash(final String word) { |
208 |
|
209 |
final String input= word.toUpperCase() + " "; //$NON-NLS-1$ |
210 |
final char[] hashable= input.toCharArray(); |
211 |
|
212 |
final boolean has95= hasOneOf(meta95, input); |
213 |
final StringBuffer buffer= new StringBuffer(hashable.length); |
214 |
|
215 |
int offset= 0; |
216 |
if (hasOneOf(meta26, hashable, 0, 2)) |
217 |
offset += 1; |
218 |
|
219 |
if (hashable[0] == 'X') { |
220 |
buffer.append('S'); |
221 |
offset += 1; |
222 |
} |
223 |
|
224 |
while (offset < hashable.length) { |
225 |
|
226 |
switch (hashable[offset]) { |
227 |
case 'A' : |
228 |
case 'E' : |
229 |
case 'I' : |
230 |
case 'O' : |
231 |
case 'U' : |
232 |
case 'Y' : |
233 |
if (offset == 0) |
234 |
buffer.append('A'); |
235 |
offset += 1; |
236 |
break; |
237 |
case 'B' : |
238 |
buffer.append('P'); |
239 |
if (hashable[offset + 1] == 'B') |
240 |
offset += 2; |
241 |
else |
242 |
offset += 1; |
243 |
break; |
244 |
case 'C' : |
245 |
if ((offset > 1) && !hasVowel(hashable, offset - 2, hashable.length) && hasOneOf(meta01, hashable, (offset - 1), 3) && (hashable[offset + 2] != 'I') && (hashable[offset + 2] != 'E') || hasOneOf(meta02, hashable, (offset - 2), 6)) { |
246 |
buffer.append('K'); |
247 |
offset += 2; |
248 |
break; |
249 |
} |
250 |
if ((offset == 0) && hasOneOf(meta03, hashable, offset, 6)) { |
251 |
buffer.append('S'); |
252 |
offset += 2; |
253 |
break; |
254 |
} |
255 |
if (hasOneOf(meta04, hashable, offset, 4)) { |
256 |
buffer.append('K'); |
257 |
offset += 2; |
258 |
break; |
259 |
} |
260 |
if (hasOneOf(meta05, hashable, offset, 2)) { |
261 |
if ((offset > 0) && hasOneOf(meta06, hashable, offset, 4)) { |
262 |
buffer.append('K'); |
263 |
offset += 2; |
264 |
break; |
265 |
} |
266 |
if ((offset == 0) && hasOneOf(meta07, hashable, (offset + 1), 5) || hasOneOf(meta08, hashable, offset + 1, 3) && !hasOneOf(meta09, hashable, 0, 5)) { |
267 |
buffer.append('K'); |
268 |
offset += 2; |
269 |
break; |
270 |
} |
271 |
if (hasOneOf(meta10, hashable, 0, 4) || hasOneOf(meta11, hashable, 0, 3) || hasOneOf(meta12, hashable, offset - 2, 6) || hasOneOf(meta13, hashable, offset + 2, 1) || (hasOneOf(meta14, hashable, offset - 1, 1) || (offset == 0)) && hasOneOf(meta15, hashable, offset + 2, 1)) { |
272 |
buffer.append('K'); |
273 |
} else { |
274 |
if (offset > 0) { |
275 |
if (hasOneOf(meta16, hashable, 0, 2)) |
276 |
buffer.append('K'); |
277 |
else |
278 |
buffer.append('X'); |
279 |
} else { |
280 |
buffer.append('X'); |
281 |
} |
282 |
} |
283 |
offset += 2; |
284 |
break; |
285 |
} |
286 |
if (hasOneOf(meta17, hashable, offset, 2) && !hasOneOf(meta18, hashable, offset, 4)) { |
287 |
buffer.append('S'); |
288 |
offset += 2; |
289 |
break; |
290 |
} |
291 |
if (hasOneOf(meta19, hashable, offset, 2)) { |
292 |
buffer.append('X'); |
293 |
offset += 2; |
294 |
break; |
295 |
} |
296 |
if (hasOneOf(meta20, hashable, offset, 2) && !((offset == 1) && hashable[0] == 'M')) { |
297 |
if (hasOneOf(meta21, hashable, offset + 2, 1) && !hasOneOf(meta22, hashable, offset + 2, 2)) { |
298 |
if (((offset == 1) && (hashable[offset - 1] == 'A')) || hasOneOf(meta23, hashable, (offset - 1), 5)) |
299 |
buffer.append("KS"); //$NON-NLS-1$ |
300 |
else |
301 |
buffer.append('X'); |
302 |
offset += 3; |
303 |
break; |
304 |
} else { |
305 |
buffer.append('K'); |
306 |
offset += 2; |
307 |
break; |
308 |
} |
309 |
} |
310 |
if (hasOneOf(meta24, hashable, offset, 2)) { |
311 |
buffer.append('K'); |
312 |
offset += 2; |
313 |
break; |
314 |
} else if (hasOneOf(meta25, hashable, offset, 2)) { |
315 |
buffer.append('S'); |
316 |
offset += 2; |
317 |
break; |
318 |
} |
319 |
buffer.append('K'); |
320 |
if (hasOneOf(meta27, hashable, offset + 1, 2)) |
321 |
offset += 3; |
322 |
else if (hasOneOf(meta28, hashable, offset + 1, 1) && !hasOneOf(meta29, hashable, offset + 1, 2)) |
323 |
offset += 2; |
324 |
else |
325 |
offset += 1; |
326 |
break; |
327 |
case '\u00C7' : |
328 |
buffer.append('S'); |
329 |
offset += 1; |
330 |
break; |
331 |
case 'D' : |
332 |
if (hasOneOf(meta30, hashable, offset, 2)) { |
333 |
if (hasOneOf(meta31, hashable, offset + 2, 1)) { |
334 |
buffer.append('J'); |
335 |
offset += 3; |
336 |
break; |
337 |
} else { |
338 |
buffer.append("TK"); //$NON-NLS-1$ |
339 |
offset += 2; |
340 |
break; |
341 |
} |
342 |
} |
343 |
buffer.append('T'); |
344 |
if (hasOneOf(meta32, hashable, offset, 2)) { |
345 |
offset += 2; |
346 |
} else { |
347 |
offset += 1; |
348 |
} |
349 |
break; |
350 |
case 'F' : |
351 |
if (hashable[offset + 1] == 'F') |
352 |
offset += 2; |
353 |
else |
354 |
offset += 1; |
355 |
buffer.append('F'); |
356 |
break; |
357 |
case 'G' : |
358 |
if (hashable[offset + 1] == 'H') { |
359 |
if ((offset > 0) && !hasVowel(hashable, offset - 1, hashable.length)) { |
360 |
buffer.append('K'); |
361 |
offset += 2; |
362 |
break; |
363 |
} |
364 |
if (offset < 3) { |
365 |
if (offset == 0) { |
366 |
if (hashable[offset + 2] == 'I') |
367 |
buffer.append('J'); |
368 |
else |
369 |
buffer.append('K'); |
370 |
offset += 2; |
371 |
break; |
372 |
} |
373 |
} |
374 |
if ((offset > 1) && hasOneOf(meta33, hashable, offset - 2, 1) || ((offset > 2) && hasOneOf(meta34, hashable, offset - 3, 1)) || ((offset > 3) && hasOneOf(meta35, hashable, offset - 4, 1))) { |
375 |
offset += 2; |
376 |
break; |
377 |
} else { |
378 |
if ((offset > 2) && (hashable[offset - 1] == 'U') && hasOneOf(meta36, hashable, offset - 3, 1)) { |
379 |
buffer.append('F'); |
380 |
} else { |
381 |
if ((offset > 0) && (hashable[offset - 1] != 'I')) |
382 |
buffer.append('K'); |
383 |
} |
384 |
offset += 2; |
385 |
break; |
386 |
} |
387 |
} |
388 |
if (hashable[offset + 1] == 'N') { |
389 |
if ((offset == 1) && hasVowel(hashable, 0, hashable.length) && !has95) { |
390 |
buffer.append("KN"); //$NON-NLS-1$ |
391 |
} else { |
392 |
if (!hasOneOf(meta37, hashable, offset + 2, 2) && (hashable[offset + 1] != 'Y') && !has95) { |
393 |
buffer.append("N"); //$NON-NLS-1$ |
394 |
} else { |
395 |
buffer.append("KN"); //$NON-NLS-1$ |
396 |
} |
397 |
} |
398 |
offset += 2; |
399 |
break; |
400 |
} |
401 |
if (hasOneOf(meta38, hashable, offset + 1, 2) && !has95) { |
402 |
buffer.append("KL"); //$NON-NLS-1$ |
403 |
offset += 2; |
404 |
break; |
405 |
} |
406 |
if ((offset == 0) && ((hashable[offset + 1] == 'Y') || hasOneOf(meta39, hashable, offset + 1, 2))) { |
407 |
buffer.append('K'); |
408 |
offset += 2; |
409 |
break; |
410 |
} |
411 |
if ((hasOneOf(meta40, hashable, offset + 1, 2) || (hashable[offset + 1] == 'Y')) && !hasOneOf(meta41, hashable, 0, 6) && !hasOneOf(meta42, hashable, offset - 1, 1) && !hasOneOf(meta43, hashable, offset - 1, 3)) { |
412 |
buffer.append('K'); |
413 |
offset += 2; |
414 |
break; |
415 |
} |
416 |
if (hasOneOf(meta44, hashable, offset + 1, 1) || hasOneOf(meta45, hashable, offset - 1, 4)) { |
417 |
if (hasOneOf(meta46, hashable, 0, 4) || hasOneOf(meta47, hashable, 0, 3) || hasOneOf(meta48, hashable, offset + 1, 2)) { |
418 |
buffer.append('K'); |
419 |
} else { |
420 |
buffer.append('J'); |
421 |
} |
422 |
offset += 2; |
423 |
break; |
424 |
} |
425 |
if (hashable[offset + 1] == 'G') |
426 |
offset += 2; |
427 |
else |
428 |
offset += 1; |
429 |
buffer.append('K'); |
430 |
break; |
431 |
case 'H' : |
432 |
if (((offset == 0) || hasVowel(hashable, offset - 1, hashable.length)) && hasVowel(hashable, offset + 1, hashable.length)) { |
433 |
buffer.append('H'); |
434 |
offset += 2; |
435 |
} else { |
436 |
offset += 1; |
437 |
} |
438 |
break; |
439 |
case 'J' : |
440 |
if (hasOneOf(meta50, hashable, offset, 4) || hasOneOf(meta51, hashable, 0, 4)) { |
441 |
if ((offset == 0) && (hashable[offset + 4] == ' ') || hasOneOf(meta52, hashable, 0, 4)) { |
442 |
buffer.append('H'); |
443 |
} else { |
444 |
buffer.append('J'); |
445 |
} |
446 |
offset += 1; |
447 |
break; |
448 |
} |
449 |
if ((offset == 0) && !hasOneOf(meta53, hashable, offset, 4)) { |
450 |
buffer.append('J'); |
451 |
} else { |
452 |
if (hasVowel(hashable, offset - 1, hashable.length) && !has95 && ((hashable[offset + 1] == 'A') || hashable[offset + 1] == 'O')) { |
453 |
buffer.append('J'); |
454 |
} else { |
455 |
if (offset == (hashable.length - 1)) { |
456 |
buffer.append('J'); |
457 |
} else { |
458 |
if (!hasOneOf(meta54, hashable, offset + 1, 1) && !hasOneOf(meta55, hashable, offset - 1, 1)) { |
459 |
buffer.append('J'); |
460 |
} |
461 |
} |
462 |
} |
463 |
} |
464 |
if (hashable[offset + 1] == 'J') |
465 |
offset += 2; |
466 |
else |
467 |
offset += 1; |
468 |
break; |
469 |
case 'K' : |
470 |
if (hashable[offset + 1] == 'K') |
471 |
offset += 2; |
472 |
else |
473 |
offset += 1; |
474 |
buffer.append('K'); |
475 |
break; |
476 |
case 'L' : |
477 |
if (hashable[offset + 1] == 'L') { |
478 |
if (((offset == (hashable.length - 3)) && hasOneOf(meta56, hashable, offset - 1, 4)) || ((hasOneOf(meta57, hashable, (hashable.length - 1) - 1, 2) || hasOneOf(meta58, hashable, hashable.length - 1, 1)) && hasOneOf(meta59, hashable, offset - 1, 4))) { |
479 |
buffer.append('L'); |
480 |
offset += 2; |
481 |
break; |
482 |
} |
483 |
offset += 2; |
484 |
} else |
485 |
offset += 1; |
486 |
buffer.append('L'); |
487 |
break; |
488 |
case 'M' : |
489 |
if ((hasOneOf(meta60, hashable, offset - 1, 3) && (((offset + 1) == (hashable.length - 1)) || hasOneOf(meta61, hashable, offset + 2, 2))) || (hashable[offset + 1] == 'M')) |
490 |
offset += 2; |
491 |
else |
492 |
offset += 1; |
493 |
buffer.append('M'); |
494 |
break; |
495 |
case 'N' : |
496 |
if (hashable[offset + 1] == 'N') |
497 |
offset += 2; |
498 |
else |
499 |
offset += 1; |
500 |
buffer.append('N'); |
501 |
break; |
502 |
case '\u00D1' : |
503 |
offset += 1; |
504 |
buffer.append('N'); |
505 |
break; |
506 |
case 'P' : |
507 |
if (hashable[offset + 1] == 'N') { |
508 |
buffer.append('F'); |
509 |
offset += 2; |
510 |
break; |
511 |
} |
512 |
if (hasOneOf(meta62, hashable, offset + 1, 1)) |
513 |
offset += 2; |
514 |
else |
515 |
offset += 1; |
516 |
buffer.append('P'); |
517 |
break; |
518 |
case 'Q' : |
519 |
if (hashable[offset + 1] == 'Q') |
520 |
offset += 2; |
521 |
else |
522 |
offset += 1; |
523 |
buffer.append('K'); |
524 |
break; |
525 |
case 'R' : |
526 |
if (!((offset == (hashable.length - 1)) && !has95 && hasOneOf(meta63, hashable, offset - 2, 2) && !hasOneOf(meta64, hashable, offset - 4, 2))) |
527 |
buffer.append('R'); |
528 |
if (hashable[offset + 1] == 'R') |
529 |
offset += 2; |
530 |
else |
531 |
offset += 1; |
532 |
break; |
533 |
case 'S' : |
534 |
if (hasOneOf(meta65, hashable, offset - 1, 3)) { |
535 |
offset += 1; |
536 |
break; |
537 |
} |
538 |
if ((offset == 0) && hasOneOf(meta66, hashable, offset, 5)) { |
539 |
buffer.append('X'); |
540 |
offset += 1; |
541 |
break; |
542 |
} |
543 |
if (hasOneOf(meta67, hashable, offset, 2)) { |
544 |
if (hasOneOf(meta68, hashable, offset + 1, 4)) |
545 |
buffer.append('S'); |
546 |
else |
547 |
buffer.append('X'); |
548 |
offset += 2; |
549 |
break; |
550 |
} |
551 |
if (hasOneOf(meta69, hashable, offset, 3) || hasOneOf(meta70, hashable, offset, 4)) { |
552 |
buffer.append('S'); |
553 |
offset += 3; |
554 |
break; |
555 |
} |
556 |
if (((offset == 0) && hasOneOf(meta71, hashable, offset + 1, 1)) || hasOneOf(meta72, hashable, offset + 1, 1)) { |
557 |
buffer.append('S'); |
558 |
if (hasOneOf(meta73, hashable, offset + 1, 1)) |
559 |
offset += 2; |
560 |
else |
561 |
offset += 1; |
562 |
break; |
563 |
} |
564 |
if (hasOneOf(meta74, hashable, offset, 2)) { |
565 |
if (hashable[offset + 2] == 'H') |
566 |
if (hasOneOf(meta75, hashable, offset + 3, 2)) { |
567 |
if (hasOneOf(meta76, hashable, offset + 3, 2)) { |
568 |
buffer.append("X"); //$NON-NLS-1$ |
569 |
} else { |
570 |
buffer.append("SK"); //$NON-NLS-1$ |
571 |
} |
572 |
offset += 3; |
573 |
break; |
574 |
} else { |
575 |
buffer.append('X'); |
576 |
offset += 3; |
577 |
break; |
578 |
} |
579 |
if (hasOneOf(meta77, hashable, offset + 2, 1)) { |
580 |
buffer.append('S'); |
581 |
offset += 3; |
582 |
break; |
583 |
} |
584 |
buffer.append("SK"); //$NON-NLS-1$ |
585 |
offset += 3; |
586 |
break; |
587 |
} |
588 |
if (!((offset == (hashable.length - 1)) && hasOneOf(meta78, hashable, offset - 2, 2))) |
589 |
buffer.append('S'); |
590 |
if (hasOneOf(meta79, hashable, offset + 1, 1)) |
591 |
offset += 2; |
592 |
else |
593 |
offset += 1; |
594 |
break; |
595 |
case 'T' : |
596 |
if (hasOneOf(meta80, hashable, offset, 4)) { |
597 |
buffer.append('X'); |
598 |
offset += 3; |
599 |
break; |
600 |
} |
601 |
if (hasOneOf(meta81, hashable, offset, 3)) { |
602 |
buffer.append('X'); |
603 |
offset += 3; |
604 |
break; |
605 |
} |
606 |
if (hasOneOf(meta82, hashable, offset, 2) || hasOneOf(meta83, hashable, offset, 3)) { |
607 |
if (hasOneOf(meta84, hashable, (offset + 2), 2) || hasOneOf(meta85, hashable, 0, 4) || hasOneOf(meta86, hashable, 0, 3)) { |
608 |
buffer.append('T'); |
609 |
} else { |
610 |
buffer.append('0'); |
611 |
} |
612 |
offset += 2; |
613 |
break; |
614 |
} |
615 |
if (hasOneOf(meta87, hashable, offset + 1, 1)) { |
616 |
offset += 2; |
617 |
} else |
618 |
offset += 1; |
619 |
buffer.append('T'); |
620 |
break; |
621 |
case 'V' : |
622 |
if (hashable[offset + 1] == 'V') |
623 |
offset += 2; |
624 |
else |
625 |
offset += 1; |
626 |
buffer.append('F'); |
627 |
break; |
628 |
case 'W' : |
629 |
if (hasOneOf(meta88, hashable, offset, 2)) { |
630 |
buffer.append('R'); |
631 |
offset += 2; |
632 |
break; |
633 |
} |
634 |
if ((offset == 0) && (hasVowel(hashable, offset + 1, hashable.length) || hasOneOf(meta89, hashable, offset, 2))) { |
635 |
buffer.append('A'); |
636 |
} |
637 |
if (((offset == (hashable.length - 1)) && hasVowel(hashable, offset - 1, hashable.length)) || hasOneOf(meta90, hashable, offset - 1, 5) || hasOneOf(meta91, hashable, 0, 3)) { |
638 |
buffer.append('F'); |
639 |
offset += 1; |
640 |
break; |
641 |
} |
642 |
if (hasOneOf(meta92, hashable, offset, 4)) { |
643 |
buffer.append("TS"); //$NON-NLS-1$ |
644 |
offset += 4; |
645 |
break; |
646 |
} |
647 |
offset += 1; |
648 |
break; |
649 |
case 'X' : |
650 |
if (!((offset == (hashable.length - 1)) && (hasOneOf(meta93, hashable, offset - 3, 3) || hasOneOf(meta94, hashable, offset - 2, 2)))) |
651 |
buffer.append("KS"); //$NON-NLS-1$ |
652 |
if (hasOneOf(meta49, hashable, offset + 1, 1)) |
653 |
offset += 2; |
654 |
else |
655 |
offset += 1; |
656 |
break; |
657 |
case 'Z' : |
658 |
if (hashable[offset + 1] == 'H') { |
659 |
buffer.append('J'); |
660 |
offset += 2; |
661 |
break; |
662 |
} else { |
663 |
buffer.append('S'); |
664 |
} |
665 |
if (hashable[offset + 1] == 'Z') |
666 |
offset += 2; |
667 |
else |
668 |
offset += 1; |
669 |
break; |
670 |
default : |
671 |
offset += 1; |
672 |
} |
673 |
} |
674 |
return buffer.toString(); |
675 |
} |
676 |
|
677 |
/* |
678 |
* @see org.eclipse.spelling.done.IPhoneticHasher#getMutators() |
679 |
*/ |
680 |
public final char[] getMutators() { |
681 |
return MUTATOR_CHARACTERS; |
682 |
} |
683 |
} |