Removed
Link Here
|
1 |
/******************************************************************************* |
2 |
* Copyright (c) 2010, 2011 IBM Corporation and others. |
3 |
* All rights reserved. This program and the accompanying materials |
4 |
* are made available under the terms of the Eclipse Public License v1.0 |
5 |
* which accompanies this distribution, and is available at |
6 |
* http://www.eclipse.org/legal/epl-v10.html |
7 |
* |
8 |
* Contributors: |
9 |
* IBM Corporation - initial API and implementation |
10 |
******************************************************************************/ |
11 |
package org.eclipse.equinox.bidi.internal; |
12 |
|
13 |
import org.eclipse.equinox.bidi.BidiComplexEngine; |
14 |
import org.eclipse.equinox.bidi.BidiComplexEnvironment; |
15 |
import org.eclipse.equinox.bidi.custom.*; |
16 |
|
17 |
/** |
18 |
* <code>BidiComplexImpl</code> provides the code which implements the API in |
19 |
* {@link BidiComplexEngine}. All its public methods are shadows of similarly |
20 |
* signed methods of <code>BidiComplexEngine</code>, and their documentation |
21 |
* is by reference to the methods in <code>BidiComplexEngine</code>. |
22 |
* |
23 |
* @author Matitiahu Allouche |
24 |
*/ |
25 |
public class BidiComplexImpl { |
26 |
|
27 |
static final String EMPTY_STRING = ""; //$NON-NLS-1$ |
28 |
static final byte B = Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR; |
29 |
static final byte L = Character.DIRECTIONALITY_LEFT_TO_RIGHT; |
30 |
static final byte R = Character.DIRECTIONALITY_RIGHT_TO_LEFT; |
31 |
static final byte AL = Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC; |
32 |
static final byte AN = Character.DIRECTIONALITY_ARABIC_NUMBER; |
33 |
static final byte EN = Character.DIRECTIONALITY_EUROPEAN_NUMBER; |
34 |
static final char LRM = 0x200E; |
35 |
static final char RLM = 0x200F; |
36 |
static final char LRE = 0x202A; |
37 |
static final char RLE = 0x202B; |
38 |
static final char PDF = 0x202C; |
39 |
static final char[] MARKS = {LRM, RLM}; |
40 |
static final char[] EMBEDS = {LRE, RLE}; |
41 |
static final byte[] STRONGS = {L, R}; |
42 |
static final int PREFIX_LENGTH = 2; |
43 |
static final int SUFFIX_LENGTH = 2; |
44 |
static final int FIXES_LENGTH = PREFIX_LENGTH + SUFFIX_LENGTH; |
45 |
static final int DIRPROPS_ADD = 2; |
46 |
static final int OFFSETS_SHIFT = 3; |
47 |
static final int[] EMPTY_INT_ARRAY = new int[0]; |
48 |
static final BidiComplexEnvironment IGNORE_ENVIRONMENT = new BidiComplexEnvironment(null, false, BidiComplexEnvironment.ORIENT_IGNORE); |
49 |
|
50 |
/** |
51 |
* Prevent creation of a BidiComplexEngine instance |
52 |
*/ |
53 |
private BidiComplexImpl() { |
54 |
// nothing to do |
55 |
} |
56 |
|
57 |
/* |
58 |
// keep private copy of specialsCount to avoid later modification |
59 |
specialsCount = features.getSpecialsCount(); |
60 |
locations = new int[features.getSeparators().length() + specialsCount]; |
61 |
} |
62 |
*/ |
63 |
static long computeNextLocation(IBidiComplexProcessor processor, BidiComplexFeatures features, String text, byte[] dirProps, int[] offsets, int[] locations, int[] state, int curPos) { |
64 |
String separators = features.getSeparators(); |
65 |
int separCount = separators.length(); |
66 |
int specialsCount = features.getSpecialsCount(); |
67 |
int len = text.length(); |
68 |
int nextLocation = len; |
69 |
int idxLocation = 0; |
70 |
// Start with special sequences to give them precedence over simple |
71 |
// separators. This may apply to cases like slash+asterisk versus slash. |
72 |
for (int i = 0; i < specialsCount; i++) { |
73 |
int location = locations[separCount + i]; |
74 |
if (location < curPos) { |
75 |
offsets = ensureRoomInOffsets(offsets); |
76 |
location = processor.indexOfSpecial(features, text, dirProps, offsets, i + 1, curPos); |
77 |
if (location < 0) |
78 |
location = len; |
79 |
locations[separCount + i] = location; |
80 |
} |
81 |
if (location < nextLocation) { |
82 |
nextLocation = location; |
83 |
idxLocation = separCount + i; |
84 |
} |
85 |
} |
86 |
for (int i = 0; i < separCount; i++) { |
87 |
int location = locations[i]; |
88 |
if (location < curPos) { |
89 |
location = text.indexOf(separators.charAt(i), curPos); |
90 |
if (location < 0) |
91 |
location = len; |
92 |
locations[i] = location; |
93 |
} |
94 |
if (location < nextLocation) { |
95 |
nextLocation = location; |
96 |
idxLocation = i; |
97 |
} |
98 |
} |
99 |
return nextLocation + (((long) idxLocation) << 32); |
100 |
} |
101 |
|
102 |
static int getCurOrient(BidiComplexEnvironment environment, String text, byte[] dirProps) { |
103 |
int orient = environment.getOrientation(); |
104 |
if ((orient & BidiComplexEnvironment.ORIENT_CONTEXTUAL_LTR) == 0) { |
105 |
// absolute orientation |
106 |
return orient; |
107 |
} |
108 |
// contextual orientation |
109 |
int len = text.length(); |
110 |
byte dirProp; |
111 |
for (int i = 0; i < len; i++) { |
112 |
// In the following lines, B, L, R and AL represent bidi categories |
113 |
// as defined in the Unicode Bidirectional Algorithm |
114 |
// ( http://www.unicode.org/reports/tr9/ ). |
115 |
// B represents the category Block Separator. |
116 |
// L represents the category Left to Right character. |
117 |
// R represents the category Right to Left character. |
118 |
// AL represents the category Arabic Letter. |
119 |
dirProp = dirProps[i]; |
120 |
if (dirProp == 0) { |
121 |
dirProp = Character.getDirectionality(text.charAt(i)); |
122 |
if (dirProp == B) // B char resolves to L or R depending on orientation |
123 |
continue; |
124 |
dirProps[i] = (byte) (dirProp + DIRPROPS_ADD); |
125 |
} else { |
126 |
dirProp -= DIRPROPS_ADD; |
127 |
} |
128 |
if (dirProp == L) |
129 |
return BidiComplexEnvironment.ORIENT_LTR; |
130 |
if (dirProp == R || dirProp == AL) |
131 |
return BidiComplexEnvironment.ORIENT_RTL; |
132 |
} |
133 |
// return the default orientation corresponding to the contextual orientation |
134 |
return orient & 1; |
135 |
} |
136 |
|
137 |
/** |
138 |
* @see BidiComplexEngine#getCurDirection BidiComplexEngine.getCurDirection |
139 |
*/ |
140 |
public static int getCurDirection(Object _processor, BidiComplexFeatures features, BidiComplexEnvironment environment, String text, byte[] dirProps) { |
141 |
if (environment == null) |
142 |
environment = BidiComplexEnvironment.DEFAULT; |
143 |
if (features == null) { |
144 |
if (_processor == null) |
145 |
return BidiComplexFeatures.DIR_LTR; |
146 |
IBidiComplexProcessor processor; |
147 |
if (_processor instanceof java.lang.String) { |
148 |
processor = BidiComplexStringProcessor.getProcessor((String) _processor); |
149 |
if (processor == null) |
150 |
throw new IllegalArgumentException("Invalid processor type!"); //$NON-NLS-1$ |
151 |
} else if (_processor instanceof IBidiComplexProcessor) |
152 |
processor = (IBidiComplexProcessor) _processor; |
153 |
else |
154 |
throw new IllegalArgumentException("Invalid processor argument!"); //$NON-NLS-1$ |
155 |
features = processor.getFeatures(environment); |
156 |
} |
157 |
int dirArabic = features.getDirArabic(); |
158 |
int dirHebrew = features.getDirHebrew(); |
159 |
// same direction for Arabic and Hebrew? |
160 |
if (dirArabic == dirHebrew) |
161 |
return dirArabic; |
162 |
// check if Arabic or Hebrew letter comes first |
163 |
int len = text.length(); |
164 |
if (dirProps == null) |
165 |
dirProps = new byte[len + 1]; |
166 |
byte dirProp; |
167 |
for (int i = 0; i < len; i++) { |
168 |
// In the following lines, R and AL represent bidi categories |
169 |
// as defined in the Unicode Bidirectional Algorithm |
170 |
// ( http://www.unicode.org/reports/tr9/ ). |
171 |
// R represents the category Right to Left character. |
172 |
// AL represents the category Arabic Letter. |
173 |
byte saveOrient = dirProps[len]; |
174 |
dirProps[len] = -1; // make getDirProp return B |
175 |
dirProp = getDirProp(text, dirProps, i); |
176 |
dirProps[len] = saveOrient; |
177 |
if (dirProp == AL) |
178 |
return dirArabic; |
179 |
if (dirProp == R) |
180 |
return dirHebrew; |
181 |
} |
182 |
// found no Arabic or Hebrew character |
183 |
return BidiComplexFeatures.DIR_LTR; |
184 |
} |
185 |
|
186 |
/** |
187 |
* @see BidiComplexProcessor#getDirProp BidiComplexProcessor.getDirProp |
188 |
*/ |
189 |
public static byte getDirProp(String text, byte[] dirProps, int index) { |
190 |
byte dirProp = dirProps[index]; |
191 |
if (dirProp == 0) { |
192 |
// In the following lines, B, L and R represent bidi categories |
193 |
// as defined in the Unicode Bidirectional Algorithm |
194 |
// ( http://www.unicode.org/reports/tr9/ ). |
195 |
// B represents the category Block Separator. |
196 |
// L represents the category Left to Right character. |
197 |
// R represents the category Right to Left character. |
198 |
dirProp = Character.getDirectionality(text.charAt(index)); |
199 |
if (dirProp == B) { |
200 |
// the last entry of dirProps contains the current component orientation |
201 |
byte orient = dirProps[dirProps.length - 1]; |
202 |
if (orient == -1) |
203 |
return B; |
204 |
dirProp = orient == BidiComplexEnvironment.ORIENT_RTL ? R : L; |
205 |
} |
206 |
dirProps[index] = (byte) (dirProp + DIRPROPS_ADD); |
207 |
return dirProp; |
208 |
} |
209 |
return (byte) (dirProp - DIRPROPS_ADD); |
210 |
} |
211 |
|
212 |
/** |
213 |
* @see BidiComplexProcessor#setDirProp BidiComplexProcessor.setDirProp |
214 |
*/ |
215 |
public static void setDirProp(byte[] dirProps, int index, byte dirProp) { |
216 |
dirProps[index] = (byte) (dirProp + DIRPROPS_ADD); |
217 |
} |
218 |
|
219 |
/** |
220 |
* @see BidiComplexProcessor#processSeparator BidiComplexProcessor.processSeparator |
221 |
*/ |
222 |
public static void processSeparator(BidiComplexFeatures features, String text, byte[] dirProps, int[] offsets, int separLocation) { |
223 |
// In this method, L, R, AL, AN and EN represent bidi categories |
224 |
// as defined in the Unicode Bidirectional Algorithm |
225 |
// ( http://www.unicode.org/reports/tr9/ ). |
226 |
// L represents the category Left to Right character. |
227 |
// R represents the category Right to Left character. |
228 |
// AL represents the category Arabic Letter. |
229 |
// AN represents the category Arabic Number. |
230 |
// EN represents the category European Number. |
231 |
int len = text.length(); |
232 |
// offsets[2] contains the complex expression direction |
233 |
if (offsets[2] == BidiComplexFeatures.DIR_RTL) { |
234 |
// the expression base direction is RTL |
235 |
for (int i = separLocation - 1; i >= 0; i--) { |
236 |
byte dirProp = getDirProp(text, dirProps, i); |
237 |
if (dirProp == R || dirProp == AL) |
238 |
return; |
239 |
if (dirProp == L) { |
240 |
for (int j = separLocation; j < len; j++) { |
241 |
dirProp = getDirProp(text, dirProps, j); |
242 |
if (dirProp == R || dirProp == AL) |
243 |
return; |
244 |
if (dirProp == L || dirProp == EN) { |
245 |
insertMark(text, dirProps, offsets, separLocation); |
246 |
return; |
247 |
} |
248 |
} |
249 |
return; |
250 |
} |
251 |
} |
252 |
return; |
253 |
} |
254 |
|
255 |
// the expression base direction is LTR |
256 |
boolean doneAN = false; |
257 |
boolean ignoreArabic = features.getIgnoreArabic(); |
258 |
boolean ignoreHebrew = features.getIgnoreHebrew(); |
259 |
if (ignoreArabic && ignoreHebrew) |
260 |
return; |
261 |
byte _R, _AL, _AN; |
262 |
if (ignoreArabic) { |
263 |
_AL = Byte.MIN_VALUE; // not a real value |
264 |
_AN = Byte.MIN_VALUE; |
265 |
} else { |
266 |
_AL = AL; |
267 |
_AN = AN; |
268 |
} |
269 |
if (ignoreHebrew) |
270 |
_R = Byte.MIN_VALUE; |
271 |
else |
272 |
_R = R; |
273 |
for (int i = separLocation - 1; i >= 0; i--) { |
274 |
byte dirProp = getDirProp(text, dirProps, i); |
275 |
if (dirProp == L) |
276 |
return; |
277 |
if (dirProp == _R || dirProp == _AL) { |
278 |
for (int j = separLocation; j < len; j++) { |
279 |
dirProp = getDirProp(text, dirProps, j); |
280 |
if (dirProp == L) |
281 |
return; |
282 |
if (dirProp == _R || dirProp == EN || dirProp == _AL || dirProp == _AN) { |
283 |
insertMark(text, dirProps, offsets, separLocation); |
284 |
return; |
285 |
} |
286 |
} |
287 |
return; |
288 |
} |
289 |
if (dirProp == _AN && !doneAN) { |
290 |
for (int j = separLocation; j < len; j++) { |
291 |
dirProp = getDirProp(text, dirProps, j); |
292 |
if (dirProp == L) |
293 |
return; |
294 |
if (dirProp == _AL || dirProp == _AN || dirProp == _R) { |
295 |
insertMark(text, dirProps, offsets, separLocation); |
296 |
return; |
297 |
} |
298 |
} |
299 |
doneAN = true; |
300 |
} |
301 |
} |
302 |
} |
303 |
|
304 |
/** |
305 |
* @see BidiComplexEngine#leanToFullText BidiComplexEngine.leanToFullText |
306 |
*/ |
307 |
public static String leanToFullText(Object processor, BidiComplexFeatures features, BidiComplexEnvironment environment, String text, int[] state) { |
308 |
int len = text.length(); |
309 |
if (len == 0) |
310 |
return text; |
311 |
byte[] dirProps = new byte[len + 1]; |
312 |
int[] offsets = leanToFullCommon(processor, features, environment, text, state, dirProps); |
313 |
int prefixLength = offsets[1]; |
314 |
int count = offsets[0] - OFFSETS_SHIFT; |
315 |
if (count == 0 && prefixLength == 0) |
316 |
return text; |
317 |
int newLen = len + count; |
318 |
if (prefixLength == 1) |
319 |
newLen++; /* +1 for a mark char */ |
320 |
else if (prefixLength == 2) |
321 |
newLen += FIXES_LENGTH; |
322 |
char[] fullChars = new char[newLen]; |
323 |
int added = prefixLength; |
324 |
// add marks at offsets |
325 |
int direction = offsets[2]; |
326 |
char curMark = MARKS[direction]; |
327 |
for (int i = 0, j = OFFSETS_SHIFT; i < len; i++) { |
328 |
char c = text.charAt(i); |
329 |
// offsets[0] contains the number of used entries |
330 |
if (j < offsets[0] && i == offsets[j]) { |
331 |
fullChars[i + added] = curMark; |
332 |
added++; |
333 |
j++; |
334 |
} |
335 |
fullChars[i + added] = c; |
336 |
} |
337 |
if (prefixLength > 0) { /* add prefix/suffix ? */ |
338 |
if (prefixLength == 1) { /* contextual orientation */ |
339 |
fullChars[0] = curMark; |
340 |
} else { |
341 |
// When the orientation is RTL, we need to add EMBED at the |
342 |
// start of the text and PDF at its end. |
343 |
// However, because of a bug in Windows' handling of LRE/PDF, |
344 |
// we add EMBED_PREFIX at the start and EMBED_SUFFIX at the end. |
345 |
char curEmbed = EMBEDS[direction]; |
346 |
fullChars[0] = curEmbed; |
347 |
fullChars[1] = curMark; |
348 |
fullChars[newLen - 1] = PDF; |
349 |
fullChars[newLen - 2] = curMark; |
350 |
} |
351 |
} |
352 |
return new String(fullChars); |
353 |
} |
354 |
|
355 |
/** |
356 |
* @see BidiComplexEngine#leanToFullMap BidiComplexEngine.leanToFullMap |
357 |
*/ |
358 |
public static int[] leanToFullMap(Object processor, BidiComplexFeatures features, BidiComplexEnvironment environment, String text, int[] state) { |
359 |
int len = text.length(); |
360 |
if (len == 0) |
361 |
return EMPTY_INT_ARRAY; |
362 |
byte[] dirProps = new byte[len + 1]; |
363 |
int[] offsets = leanToFullCommon(processor, features, environment, text, state, dirProps); |
364 |
int prefixLength = offsets[1]; |
365 |
int[] map = new int[len]; |
366 |
int count = offsets[0]; // number of used entries |
367 |
int added = prefixLength; |
368 |
for (int pos = 0, i = OFFSETS_SHIFT; pos < len; pos++) { |
369 |
if (i < count && pos == offsets[i]) { |
370 |
added++; |
371 |
i++; |
372 |
} |
373 |
map[pos] = pos + added; |
374 |
} |
375 |
return map; |
376 |
} |
377 |
|
378 |
/** |
379 |
* @see BidiComplexEngine#leanBidiCharOffsets BidiComplexEngine.leanBidiCharOffsets |
380 |
*/ |
381 |
public static int[] leanBidiCharOffsets(Object processor, BidiComplexFeatures features, BidiComplexEnvironment environment, String text, int[] state) { |
382 |
int len = text.length(); |
383 |
if (len == 0) |
384 |
return EMPTY_INT_ARRAY; |
385 |
byte[] dirProps = new byte[len + 1]; |
386 |
int[] offsets = leanToFullCommon(processor, features, environment, text, state, dirProps); |
387 |
// offsets[0] contains the number of used entries |
388 |
int count = offsets[0] - OFFSETS_SHIFT; |
389 |
int[] result = new int[count]; |
390 |
System.arraycopy(offsets, OFFSETS_SHIFT, result, 0, count); |
391 |
return result; |
392 |
} |
393 |
|
394 |
static int[] leanToFullCommon(Object _processor, BidiComplexFeatures features, BidiComplexEnvironment environment, String text, int[] state, byte[] dirProps) { |
395 |
IBidiComplexProcessor processor; |
396 |
if (_processor instanceof java.lang.String) { |
397 |
processor = BidiComplexStringProcessor.getProcessor((String) _processor); |
398 |
if (processor == null) |
399 |
throw new IllegalArgumentException("Invalid processor type!"); //$NON-NLS-1$ |
400 |
} else if (_processor instanceof IBidiComplexProcessor) |
401 |
processor = (IBidiComplexProcessor) _processor; |
402 |
else |
403 |
throw new IllegalArgumentException("Invalid processor argument!"); //$NON-NLS-1$ |
404 |
if (environment == null) |
405 |
environment = BidiComplexEnvironment.DEFAULT; |
406 |
if (features == null) |
407 |
features = processor.getFeatures(environment); |
408 |
if (state == null) { |
409 |
state = new int[1]; |
410 |
state[0] = BidiComplexEngine.STATE_INITIAL; |
411 |
} |
412 |
int len = text.length(); |
413 |
// dirProps: 1 byte for each char in text, + 1 byte = current orientation |
414 |
int orient = getCurOrient(environment, text, dirProps); |
415 |
dirProps[len] = (byte) orient; |
416 |
int separCount = features.getSeparators().length(); |
417 |
int direction = getCurDirection(processor, features, environment, text, dirProps); |
418 |
// current position |
419 |
int curPos = 0; |
420 |
// offsets of marks to add. Entry 0 is the number of used slots; |
421 |
// entry 1 is reserved to pass prefixLength. |
422 |
// entry 2 is reserved to pass direction.. |
423 |
int[] offsets = new int[20]; |
424 |
offsets[0] = OFFSETS_SHIFT; |
425 |
offsets[2] = direction; |
426 |
// initialize locations |
427 |
int[] locations = new int[separCount + features.getSpecialsCount()]; |
428 |
for (int i = 0, k = locations.length; i < k; i++) { |
429 |
locations[i] = -1; |
430 |
} |
431 |
if (state[0] > BidiComplexEngine.STATE_INITIAL) { |
432 |
offsets = ensureRoomInOffsets(offsets); |
433 |
int initState = state[0]; |
434 |
state[0] = BidiComplexEngine.STATE_INITIAL; |
435 |
curPos = processor.processSpecial(features, text, dirProps, offsets, state, initState, -1); |
436 |
} |
437 |
while (true) { |
438 |
// location of next token to handle |
439 |
int nextLocation; |
440 |
// index of next token to handle (if < separCount, this is a separator; otherwise a special case |
441 |
int idxLocation; |
442 |
long res = computeNextLocation(processor, features, text, dirProps, offsets, locations, state, curPos); |
443 |
nextLocation = (int) (res & 0x00000000FFFFFFFF); /* low word */ |
444 |
if (nextLocation >= len) |
445 |
break; |
446 |
idxLocation = (int) (res >> 32); /* high word */ |
447 |
if (idxLocation < separCount) { |
448 |
offsets = ensureRoomInOffsets(offsets); |
449 |
processSeparator(features, text, dirProps, offsets, nextLocation); |
450 |
curPos = nextLocation + 1; |
451 |
} else { |
452 |
offsets = ensureRoomInOffsets(offsets); |
453 |
idxLocation -= (separCount - 1); // because caseNumber starts from 1 |
454 |
curPos = processor.processSpecial(features, text, dirProps, offsets, state, idxLocation, nextLocation); |
455 |
} |
456 |
} |
457 |
if (orient == BidiComplexEnvironment.ORIENT_IGNORE) |
458 |
offsets[1] = 0; |
459 |
else { |
460 |
// recompute orient since it may have changed if contextual |
461 |
orient = getCurOrient(environment, text, dirProps); |
462 |
dirProps[len] = (byte) orient; |
463 |
if (orient == direction && orient != BidiComplexEnvironment.ORIENT_UNKNOWN) |
464 |
offsets[1] = 0; |
465 |
else if ((environment.getOrientation() & BidiComplexEnvironment.ORIENT_CONTEXTUAL_LTR) != 0) |
466 |
offsets[1] = 1; |
467 |
else |
468 |
offsets[1] = 2; |
469 |
} |
470 |
return offsets; |
471 |
} |
472 |
|
473 |
/** |
474 |
* @see BidiComplexEngine#fullToLeanText BidiComplexEngine.fullToLeanText |
475 |
*/ |
476 |
public static String fullToLeanText(Object _processor, BidiComplexFeatures features, BidiComplexEnvironment environment, String text, int[] state) { |
477 |
if (text.length() == 0) |
478 |
return text; |
479 |
IBidiComplexProcessor processor; |
480 |
if (_processor instanceof java.lang.String) { |
481 |
processor = BidiComplexStringProcessor.getProcessor((String) _processor); |
482 |
if (processor == null) |
483 |
throw new IllegalArgumentException("Invalid processor type!"); //$NON-NLS-1$ |
484 |
} else if (_processor instanceof IBidiComplexProcessor) |
485 |
processor = (IBidiComplexProcessor) _processor; |
486 |
else |
487 |
throw new IllegalArgumentException("Invalid processor argument!"); //$NON-NLS-1$ |
488 |
if (environment == null) |
489 |
environment = BidiComplexEnvironment.DEFAULT; |
490 |
if (features == null) |
491 |
features = processor.getFeatures(environment); |
492 |
if (state == null) { |
493 |
state = new int[1]; |
494 |
state[0] = BidiComplexEngine.STATE_INITIAL; |
495 |
} |
496 |
int dir = getCurDirection(processor, features, environment, text, null); |
497 |
char curMark = MARKS[dir]; |
498 |
char curEmbed = EMBEDS[dir]; |
499 |
int i; // used as loop index |
500 |
// remove any prefix and leading mark |
501 |
int lenText = text.length(); |
502 |
for (i = 0; i < lenText; i++) { |
503 |
char c = text.charAt(i); |
504 |
if (c != curEmbed && c != curMark) |
505 |
break; |
506 |
} |
507 |
if (i > 0) { // found at least one prefix or leading mark |
508 |
text = text.substring(i); |
509 |
lenText = text.length(); |
510 |
} |
511 |
// remove any suffix and trailing mark |
512 |
for (i = lenText - 1; i >= 0; i--) { |
513 |
char c = text.charAt(i); |
514 |
if (c != PDF && c != curMark) |
515 |
break; |
516 |
} |
517 |
if (i < 0) // only suffix and trailing marks, no real data |
518 |
return EMPTY_STRING; |
519 |
if (i < (lenText - 1)) { // found at least one suffix or trailing mark |
520 |
text = text.substring(0, i + 1); |
521 |
lenText = text.length(); |
522 |
} |
523 |
char[] chars = text.toCharArray(); |
524 |
// remove marks from chars |
525 |
int cnt = 0; |
526 |
for (i = 0; i < lenText; i++) { |
527 |
char c = chars[i]; |
528 |
if (c == curMark) |
529 |
cnt++; |
530 |
else if (cnt > 0) |
531 |
chars[i - cnt] = c; |
532 |
} |
533 |
String lean = new String(chars, 0, lenText - cnt); |
534 |
String full = leanToFullText(processor, features, IGNORE_ENVIRONMENT, lean, state); |
535 |
if (full.equals(text)) |
536 |
return lean; |
537 |
|
538 |
// There are some marks in full which are not in text and/or vice versa. |
539 |
// We need to add to lean any mark appearing in text and not in full. |
540 |
// The completed lean can never be longer than text itself. |
541 |
char[] newChars = new char[lenText]; |
542 |
char cFull, cText; |
543 |
int idxFull, idxText, idxLean, newCharsPos; |
544 |
int lenFull = full.length(); |
545 |
idxFull = idxText = idxLean = newCharsPos = 0; |
546 |
while (idxText < lenText && idxFull < lenFull) { |
547 |
cFull = full.charAt(idxFull); |
548 |
cText = text.charAt(idxText); |
549 |
if (cFull == cText) { /* chars are equal, proceed */ |
550 |
if (cFull != curMark) |
551 |
newChars[newCharsPos++] = chars[idxLean++]; |
552 |
idxText++; |
553 |
idxFull++; |
554 |
continue; |
555 |
} |
556 |
if (cFull == curMark) { /* extra Mark in full text */ |
557 |
idxFull++; |
558 |
continue; |
559 |
} |
560 |
if (cText == curMark) { /* extra Mark in source full text */ |
561 |
idxText++; |
562 |
// idxText-2 always >= 0 since leading Marks were removed from text |
563 |
if (text.charAt(idxText - 2) == curMark) |
564 |
continue; // ignore successive Marks in text after the first one |
565 |
newChars[newCharsPos++] = curMark; |
566 |
continue; |
567 |
} |
568 |
// we should never get here (extra char which is not a Mark) |
569 |
throw new IllegalStateException("Internal error: extra character not a Mark."); //$NON-NLS-1$ |
570 |
} |
571 |
if (idxText < lenText) /* full ended before text - this should never happen since |
572 |
we removed all marks and PDFs at the end of text */ |
573 |
throw new IllegalStateException("Internal error: unexpected EOL."); //$NON-NLS-1$ |
574 |
|
575 |
lean = new String(newChars, 0, newCharsPos); |
576 |
return lean; |
577 |
} |
578 |
|
579 |
/** |
580 |
* @see BidiComplexEngine#fullToLeanMap BidiComplexEngine.fullToLeanMap |
581 |
*/ |
582 |
public static int[] fullToLeanMap(Object processor, BidiComplexFeatures features, BidiComplexEnvironment environment, String full, int[] state) { |
583 |
int lenFull = full.length(); |
584 |
if (lenFull == 0) |
585 |
return EMPTY_INT_ARRAY; |
586 |
String lean = fullToLeanText(processor, features, environment, full, state); |
587 |
int lenLean = lean.length(); |
588 |
int dir = getCurDirection(processor, features, environment, lean, null); |
589 |
char curMark = MARKS[dir]; |
590 |
char curEmbed = EMBEDS[dir]; |
591 |
int[] map = new int[lenFull]; |
592 |
int idxFull, idxLean; |
593 |
// skip any prefix and leading mark |
594 |
for (idxFull = 0; idxFull < lenFull; idxFull++) { |
595 |
char c = full.charAt(idxFull); |
596 |
if (c != curEmbed && c != curMark) |
597 |
break; |
598 |
map[idxFull] = -1; |
599 |
} |
600 |
// lean must be a subset of Full, so we only check on iLean < leanLen |
601 |
for (idxLean = 0; idxLean < lenLean; idxFull++) { |
602 |
if (full.charAt(idxFull) == lean.charAt(idxLean)) { |
603 |
map[idxFull] = idxLean; |
604 |
idxLean++; |
605 |
} else |
606 |
map[idxFull] = -1; |
607 |
} |
608 |
for (; idxFull < lenFull; idxFull++) |
609 |
map[idxFull] = -1; |
610 |
return map; |
611 |
} |
612 |
|
613 |
/** |
614 |
* @see BidiComplexEngine#fullBidiCharOffsets BidiComplexEngine.fullBidiCharOffsets |
615 |
*/ |
616 |
public static int[] fullBidiCharOffsets(Object processor, BidiComplexFeatures features, BidiComplexEnvironment environment, String full, int[] state) { |
617 |
int lenFull = full.length(); |
618 |
if (lenFull == 0) |
619 |
return EMPTY_INT_ARRAY; |
620 |
String lean = fullToLeanText(processor, features, environment, full, state); |
621 |
int[] offsets = new int[20]; |
622 |
offsets[0] = OFFSETS_SHIFT; |
623 |
int lenLean = lean.length(); |
624 |
int idxLean, idxFull; |
625 |
// lean must be a subset of Full, so we only check on iLean < leanLen |
626 |
for (idxLean = idxFull = 0; idxLean < lenLean; idxFull++) { |
627 |
if (full.charAt(idxFull) == lean.charAt(idxLean)) |
628 |
idxLean++; |
629 |
else { |
630 |
offsets = ensureRoomInOffsets(offsets); |
631 |
insertMark(lean, null, offsets, idxFull); |
632 |
} |
633 |
} |
634 |
for (; idxFull < lenFull; idxFull++) { |
635 |
offsets = ensureRoomInOffsets(offsets); |
636 |
insertMark(lean, null, offsets, idxFull); |
637 |
} |
638 |
int[] result = new int[offsets[0] - OFFSETS_SHIFT]; |
639 |
System.arraycopy(offsets, OFFSETS_SHIFT, result, 0, result.length); |
640 |
return result; |
641 |
} |
642 |
|
643 |
static int[] ensureRoomInOffsets(int[] offsets) { |
644 |
// make sure |
645 |
if ((offsets.length - offsets[0]) < 3) { |
646 |
int[] newOffsets = new int[offsets.length * 2]; |
647 |
System.arraycopy(offsets, 0, newOffsets, 0, offsets[0]); |
648 |
return newOffsets; |
649 |
} |
650 |
return offsets; |
651 |
} |
652 |
|
653 |
/** |
654 |
* @see BidiComplexProcessor#insertMark BidiComplexProcessor.insertMark |
655 |
*/ |
656 |
public static void insertMark(String text, byte[] dirProps, int[] offsets, int offset) { |
657 |
int count = offsets[0];// number of used entries |
658 |
int index = count - 1; // index of greatest member <= offset |
659 |
// look up after which member the new offset should be inserted |
660 |
while (index >= OFFSETS_SHIFT) { |
661 |
int wrkOffset = offsets[index]; |
662 |
if (offset > wrkOffset) |
663 |
break; |
664 |
if (offset == wrkOffset) |
665 |
return; // avoid duplicates |
666 |
index--; |
667 |
} |
668 |
index++; // index now points at where to insert |
669 |
int length = count - index; // number of members to move up |
670 |
if (length > 0) // shift right all members greater than offset |
671 |
System.arraycopy(offsets, index, offsets, index + 1, length); |
672 |
offsets[index] = offset; |
673 |
offsets[0]++; // number of used entries |
674 |
// if the offset is 0, adding a mark does not change anything |
675 |
if (dirProps == null || offset < 1) |
676 |
return; |
677 |
|
678 |
byte dirProp = getDirProp(text, dirProps, offset); |
679 |
// if the current char is a strong one or a digit, we change the |
680 |
// dirProp of the previous char to account for the inserted mark. |
681 |
// In the following lines, L, R, AL, AN and EN represent bidi categories |
682 |
// as defined in the Unicode Bidirectional Algorithm |
683 |
// ( http://www.unicode.org/reports/tr9/ ). |
684 |
// L represents the category Left to Right character. |
685 |
// R represents the category Right to Left character. |
686 |
// AL represents the category Arabic Letter. |
687 |
// AN represents the category Arabic Number. |
688 |
// EN represents the category European Number. |
689 |
if (dirProp == L || dirProp == R || dirProp == AL || dirProp == EN || dirProp == AN) |
690 |
index = offset - 1; |
691 |
else |
692 |
// if the current char is a neutral, we change its own dirProp |
693 |
index = offset; |
694 |
|
695 |
int dir = offsets[2]; // current expression direction |
696 |
setDirProp(dirProps, index, STRONGS[dir]); |
697 |
return; |
698 |
} |
699 |
|
700 |
} |