Added
Link Here
|
1 |
/******************************************************************************* |
2 |
* Copyright (c) 2010, 2011 IBM Corporation and others. |
3 |
* All rights reserved. This program and the accompanying materials |
4 |
* are made available under the terms of the Eclipse Public License v1.0 |
5 |
* which accompanies this distribution, and is available at |
6 |
* http://www.eclipse.org/legal/epl-v10.html |
7 |
* |
8 |
* Contributors: |
9 |
* IBM Corporation - initial API and implementation |
10 |
******************************************************************************/ |
11 |
package org.eclipse.equinox.bidi.custom; |
12 |
|
13 |
import org.eclipse.equinox.bidi.STextDirection; |
14 |
import org.eclipse.equinox.bidi.advanced.STextEnvironment; |
15 |
import org.eclipse.equinox.bidi.advanced.ISTextExpert; |
16 |
import org.eclipse.equinox.bidi.internal.STextImpl; |
17 |
|
18 |
/** |
19 |
* Generic processor to be used as superclass (base class) |
20 |
* for specific structured text processors. |
21 |
* <p> |
22 |
* Here are some guidelines about how to write structured text |
23 |
* processors. |
24 |
* <ul> |
25 |
* <li>Processor instances may be accessed simultaneously by |
26 |
* several threads. They should have no instance variables.</li> |
27 |
* <li>The common logic uses processor methods to query the |
28 |
* characteristics of the specific processor: |
29 |
* <ul> |
30 |
* <li>the separators which separate the structured text into |
31 |
* tokens. See {@link #getSeparators getSeparators}.</li> |
32 |
* <li>the direction which governs the display of tokens |
33 |
* one after the other. See {@link #getDirection getDirection}.</li> |
34 |
* <li>the number of special cases which need to be handled by |
35 |
* code specific to that processor. |
36 |
* See {@link #getSpecialsCount getSpecialsCount}.</li> |
37 |
* </ul></li> |
38 |
* <li>Before starting deeper analysis of the submitted text, the common |
39 |
* logic gives to the processor a chance to shorten the processus by |
40 |
* invoking its {@link #skipProcessing skipProcessing} method.</li> |
41 |
* <li>The common logic then analyzes the text to segment it into tokens |
42 |
* according to the appearance of separators (as retrieved using |
43 |
* {@link #getSeparators getSeparators}).</li> |
44 |
* <li>If the processor indicated a positive number of special cases as |
45 |
* return value from its {@link #getSpecialsCount getSpecialsCount} |
46 |
* method, the common logic will repeatedly invoke the processor's |
47 |
* {@link #indexOfSpecial indexOfSpecial} method to let it signal the |
48 |
* presence of special strings which may further delimit the source text.</li> |
49 |
* <li>When such a special case is signalled by the processor, the common |
50 |
* logic will call the processor's {@link #processSpecial processSpecial} |
51 |
* method to give it the opportunity to handle it as needed. Typical |
52 |
* actions that the processor may perform are to add directional marks |
53 |
* inconditionally (by calling {@link #insertMark insertMark} or |
54 |
* conditionally (by calling {@link #processSeparator processSeparator}).</li> |
55 |
* </ul> |
56 |
* |
57 |
* @author Matitiahu Allouche |
58 |
*/ |
59 |
public class STextProcessor { |
60 |
|
61 |
final private String separators; |
62 |
|
63 |
/** |
64 |
* Creates a new instance of the STextProcessor class. |
65 |
*/ |
66 |
public STextProcessor() { |
67 |
separators = ""; //$NON-NLS-1$ |
68 |
} |
69 |
|
70 |
/** |
71 |
* Creates a new instance of the STextProcessor class. |
72 |
* @param separators string consisting of characters that split the text into fragments |
73 |
*/ |
74 |
public STextProcessor(String separators) { |
75 |
this.separators = separators; |
76 |
} |
77 |
|
78 |
/** |
79 |
* Locate occurrences of special strings within a structured text |
80 |
* and return their indexes one after the other in successive calls. |
81 |
* <p> |
82 |
* This method is called repeatedly if the number of special cases |
83 |
* returned by {@link #getSpecialsCount} is greater than zero. |
84 |
* </p><p> |
85 |
* A processor handling special cases must override this method. |
86 |
* </p> |
87 |
* @param environment the current environment, which may affect the behavior of |
88 |
* the processor. This parameter may be specified as |
89 |
* <code>null</code>, in which case the |
90 |
* {@link STextEnvironment#DEFAULT DEFAULT} |
91 |
* environment should be assumed. |
92 |
* |
93 |
* @param text is the structured text string before |
94 |
* addition of any directional formatting characters. |
95 |
* |
96 |
* @param charTypes is a parameter received by <code>indexOfSpecial</code> |
97 |
* uniquely to be used as argument for calls to methods which |
98 |
* need it. |
99 |
* |
100 |
* @param offsets is a parameter received by <code>indexOfSpecial</code> |
101 |
* uniquely to be used as argument for calls to methods which |
102 |
* need it. |
103 |
* |
104 |
* @param caseNumber number of the special case to locate. |
105 |
* This number varies from 1 to the number of special cases |
106 |
* returned by {@link #getSpecialsCount getSpecialsCount} |
107 |
* for this processor. |
108 |
* The meaning of this number is internal to the class |
109 |
* implementing <code>indexOfSpecial</code>. |
110 |
* |
111 |
* @param fromIndex the index within <code>text</code> to start |
112 |
* the search from. |
113 |
* |
114 |
* @return the position where the start of the special case |
115 |
* corresponding to <code>caseNumber</code> was located. |
116 |
* The method must return the first occurrence of whatever |
117 |
* identifies the start of the special case starting from |
118 |
* <code>fromIndex</code>. The method does not have to check if |
119 |
* this occurrence appears within the scope of another special |
120 |
* case (e.g. a comment starting delimiter within the scope of |
121 |
* a literal or vice-versa). |
122 |
* <br>If no occurrence is found, the method must return -1. |
123 |
* |
124 |
* @throws IllegalStateException If not overridden, this method throws an |
125 |
* <code>IllegalStateException</code>. This is appropriate behavior |
126 |
* (and does not need to be overridden) for processors whose |
127 |
* number of special cases is zero, which means that |
128 |
* <code>indexOfSpecial</code> should never be called for them. |
129 |
*/ |
130 |
public int indexOfSpecial(STextEnvironment environment, String text, STextCharTypes charTypes, STextOffsets offsets, int caseNumber, int fromIndex) { |
131 |
// This method must be overridden by all subclasses with special cases. |
132 |
throw new IllegalStateException("A processor with specialsCount > 0 must have an indexOfSpecial() method."); //$NON-NLS-1$ |
133 |
} |
134 |
|
135 |
/** |
136 |
* This method handles special cases specific to this processor. |
137 |
* It is called when a special case occurrence |
138 |
* is located by {@link #indexOfSpecial}. |
139 |
* <p> |
140 |
* If a special processing cannot be completed within a current call to |
141 |
* <code>processSpecial</code> (for instance, a comment has been started |
142 |
* in the current line but its end appears in a following line), |
143 |
* <code>processSpecial</code> should specify a final state by |
144 |
* putting its value in the first element of the <code>state</code> |
145 |
* parameter. |
146 |
* The meaning of this state is internal to the processor. |
147 |
* On a later call, <code>processSpecial</code> will be called with that value |
148 |
* for parameter <code>caseNumber</code> and <code>-1</code> for parameter |
149 |
* <code>separLocation</code> and should perform whatever initializations are required |
150 |
* depending on the state. |
151 |
* </p><p> |
152 |
* A processor handling special cases (with a number of |
153 |
* special cases greater than zero) must override this method. |
154 |
* </p> |
155 |
* @param environment the current environment, which may affect the behavior of |
156 |
* the processor. This parameter may be specified as |
157 |
* <code>null</code>, in which case the |
158 |
* {@link STextEnvironment#DEFAULT DEFAULT} |
159 |
* environment should be assumed. |
160 |
* |
161 |
* @param text is the structured text string before |
162 |
* addition of any directional formatting characters. |
163 |
* |
164 |
* @param charTypes is a parameter received by <code>processSpecial</code> |
165 |
* uniquely to be used as argument for calls to methods which |
166 |
* need it. |
167 |
* |
168 |
* @param offsets is a parameter received by <code>processSpecial</code> |
169 |
* uniquely to be used as argument for calls to methods which |
170 |
* need it. |
171 |
* |
172 |
* @param state is an integer array with at least one element. |
173 |
* If the processor needs to signal the occurrence of a |
174 |
* special case which must be passed to the next call to |
175 |
* <code>leanToFullText</code> (for instance, a comment or a |
176 |
* literal started but not closed in the current |
177 |
* <code>text</code>), it must put a value in the first element |
178 |
* of the <code>state</code> parameter. |
179 |
* This number must be >= 1 and less or equal to the number of special |
180 |
* cases returned by {@link #getSpecialsCount getSpecialsCount} |
181 |
* by this processor. |
182 |
* This number is passed back to the caller |
183 |
* and should be specified as <code>state</code> argument |
184 |
* in the next call to <code>leanToFullText</code> together |
185 |
* with the continuation text. |
186 |
* The meaning of this number is internal to the processor. |
187 |
* |
188 |
* @param caseNumber number of the special case to handle. |
189 |
* |
190 |
* @param separLocation the position returned by |
191 |
* {@link #indexOfSpecial indexOfSpecial}. In calls to |
192 |
* {@link ISTextExpert#leanToFullText leanToFullText} and other |
193 |
* methods of {@link ISTextExpert} specifying a non-null |
194 |
* <code>state</code> parameter, <code>processSpecial</code> is |
195 |
* called when initializing the processing with the value of |
196 |
* <code>caseNumber</code> equal to the value returned in the |
197 |
* first element of <code>state</code> and the value of |
198 |
* <code>separLocation</code> equal to <code>-1</code>. |
199 |
* |
200 |
* @return the position after the scope of the special case ends. |
201 |
* For instance, the position after the end of a comment, |
202 |
* the position after the end of a literal. |
203 |
* <br>A value greater or equal to the length of <code>text</code> |
204 |
* means that there is no further occurrence of this case in the |
205 |
* current structured text. |
206 |
* |
207 |
* @throws IllegalStateException If not overridden, this method throws an |
208 |
* <code>IllegalStateException</code>. This is appropriate behavior |
209 |
* (and does not need to be overridden) for processors whose |
210 |
* number of special cases is zero, which means that |
211 |
* <code>processSpecial</code> should never be called for them. |
212 |
*/ |
213 |
public int processSpecial(STextEnvironment environment, String text, STextCharTypes charTypes, STextOffsets offsets, int[] state, int caseNumber, int separLocation) { |
214 |
// This method must be overridden by all subclasses with any special case. |
215 |
throw new IllegalStateException("A processor with specialsCount > 0 must have a processSpecial() method."); //$NON-NLS-1$ |
216 |
} |
217 |
|
218 |
/** |
219 |
* This method can be called from within {@link #indexOfSpecial} or |
220 |
* {@link #processSpecial} in extensions of <code>STextProcessor</code> |
221 |
* to specify that a mark character must be added before the character |
222 |
* at the specified position of the <i>lean</i> text when generating the |
223 |
* <i>full</i> text. The mark character will be LRM for structured text |
224 |
* with a LTR base direction, and RLM for structured text with RTL |
225 |
* base direction. The mark character is not added physically by this |
226 |
* method, but its position is noted and will be used when generating |
227 |
* the <i>full</i> text. |
228 |
* |
229 |
* @param text is the structured text string received as |
230 |
* parameter to <code>indexOfSpecial</code> or |
231 |
* <code>processSpecial</code>. |
232 |
* |
233 |
* @param charTypes is a parameter received by <code>indexOfSpecial</code> |
234 |
* or <code>processSpecial</code>, uniquely to be used as argument |
235 |
* for calls to <code>insertMark</code> and other methods used |
236 |
* by processors. |
237 |
* |
238 |
* @param offsets is a parameter received by <code>indexOfSpecial</code> |
239 |
* or <code>processSpecial</code>, uniquely to be used as argument |
240 |
* for calls to <code>insertMark</code> and other methods used |
241 |
* by processors. |
242 |
* |
243 |
* @param offset position of the character in the <i>lean</i> text. |
244 |
* It must be a non-negative number smaller than the length |
245 |
* of the <i>lean</i> text. |
246 |
* For the benefit of efficiency, it is better to insert |
247 |
* multiple marks in ascending order of the offsets. |
248 |
*/ |
249 |
public static final void insertMark(String text, STextCharTypes charTypes, STextOffsets offsets, int offset) { |
250 |
offsets.insertOffset(charTypes, offset); |
251 |
} |
252 |
|
253 |
/** |
254 |
* This method can be called from within {@link #indexOfSpecial} or |
255 |
* {@link #processSpecial} in extensions of <code>STextProcessor</code> to add |
256 |
* a directional mark before a separator if needed for correct display, |
257 |
* depending on the base direction of the text and on the class of the |
258 |
* characters in the <i>lean</i> text preceding and following the separator itself. |
259 |
* <p> |
260 |
* The logic implemented in this method considers the text before |
261 |
* <code>separLocation</code> and the text following it. If, and only if, |
262 |
* a directional mark is needed to insure that the two parts of text |
263 |
* will be laid out according to the base direction, a mark will be |
264 |
* added when generating the <i>full</i> text. |
265 |
* </p> |
266 |
* @param text is the structured text string received as |
267 |
* parameter to <code>indexOfSpecial</code> or |
268 |
* <code>processSpecial</code>. |
269 |
* |
270 |
* @param charTypes is a parameter received by <code>indexOfSpecial</code> |
271 |
* or <code>processSpecial</code>, uniquely to be used as argument |
272 |
* for calls to <code>processSeparator</code> and other methods used |
273 |
* by processors. |
274 |
* |
275 |
* @param offsets is a parameter received by <code>indexOfSpecial</code> |
276 |
* or <code>processSpecial</code>, uniquely to be used as argument |
277 |
* for calls to <code>processSeparator</code> and other methods used |
278 |
* by processors. |
279 |
* |
280 |
* @param separLocation offset of the separator in the <i>lean</i> text. |
281 |
* It must be a non-negative number smaller than the length |
282 |
* of the <i>lean</i> text. |
283 |
*/ |
284 |
public static final void processSeparator(String text, STextCharTypes charTypes, STextOffsets offsets, int separLocation) { |
285 |
STextImpl.processSeparator(text, charTypes, offsets, separLocation); |
286 |
} |
287 |
|
288 |
/** |
289 |
* Indicate the separators to use for the current processor. |
290 |
* This method is invoked before starting the processing. |
291 |
* <p> |
292 |
* If no separators are specified, this method returns an empty string. |
293 |
* </p> |
294 |
* @param environment the current environment, which may affect the behavior of |
295 |
* the processor. This parameter may be specified as |
296 |
* <code>null</code>, in which case the |
297 |
* {@link STextEnvironment#DEFAULT DEFAULT} |
298 |
* environment should be assumed. |
299 |
* |
300 |
* @return a string grouping one-character separators which separate |
301 |
* the structured text into tokens. |
302 |
*/ |
303 |
public String getSeparators(STextEnvironment environment) { |
304 |
return separators; |
305 |
} |
306 |
|
307 |
/** |
308 |
* Indicate the base text direction appropriate for an instance of structured text. |
309 |
* This method is invoked before starting the processing. |
310 |
* <p> |
311 |
* If not overridden, this method returns <code>DIR_LTR</code>. |
312 |
* </p> |
313 |
* @param environment the current environment, which may affect the behavior of |
314 |
* the processor. This parameter may be specified as |
315 |
* <code>null</code>, in which case the |
316 |
* {@link STextEnvironment#DEFAULT DEFAULT} |
317 |
* environment should be assumed. |
318 |
* |
319 |
* @param text is the structured text string to process. |
320 |
* |
321 |
* @return the base direction of the structured text. This direction |
322 |
* may not be the same depending on the environment and on |
323 |
* whether the structured text contains Arabic or Hebrew |
324 |
* letters.<br> |
325 |
* The value returned is either |
326 |
* {@link STextDirection#DIR_LTR DIR_LTR} or {@link STextDirection#DIR_RTL DIR_RTL}. |
327 |
*/ |
328 |
public int getDirection(STextEnvironment environment, String text) { |
329 |
return STextDirection.DIR_LTR; |
330 |
} |
331 |
|
332 |
/** |
333 |
* Indicate the base text direction appropriate for an instance of structured text. |
334 |
* This method is invoked before starting the processing. |
335 |
* <p> |
336 |
* If not overridden, this method returns <code>DIR_LTR</code>. |
337 |
* </p> |
338 |
* @param environment the current environment, which may affect the behavior of |
339 |
* the processor. This parameter may be specified as |
340 |
* <code>null</code>, in which case the |
341 |
* {@link STextEnvironment#DEFAULT DEFAULT} |
342 |
* environment should be assumed. |
343 |
* |
344 |
* @param text is the structured text string to process. |
345 |
* |
346 |
* @param charTypes is a parameter received uniquely to be used as argument |
347 |
* for calls to <code>getCharType</code> and other methods used |
348 |
* by processors. |
349 |
* |
350 |
* @return the base direction of the structured text. This direction |
351 |
* may not be the same depending on the environment and on |
352 |
* whether the structured text contains Arabic or Hebrew |
353 |
* letters.<br> |
354 |
* The value returned is either |
355 |
* {@link STextDirection#DIR_LTR DIR_LTR} or {@link STextDirection#DIR_RTL DIR_RTL}. |
356 |
*/ |
357 |
public int getDirection(STextEnvironment environment, String text, STextCharTypes charTypes) { |
358 |
return STextDirection.DIR_LTR; |
359 |
} |
360 |
|
361 |
/** |
362 |
* Indicate the number of special cases handled by the current processor. |
363 |
* This method is invoked before starting the processing. |
364 |
* If the number returned is zero, {@link #indexOfSpecial} and |
365 |
* {@link #processSpecial} will not be invoked. |
366 |
* <p> |
367 |
* If not overridden, this method returns <code>zero</code>. |
368 |
* </p> |
369 |
* @param environment the current environment, which may affect the behavior of |
370 |
* the processor. This parameter may be specified as |
371 |
* <code>null</code>, in which case the |
372 |
* {@link STextEnvironment#DEFAULT DEFAULT} |
373 |
* environment should be assumed. |
374 |
* |
375 |
* @return the number of special cases for the associated processor. |
376 |
* Special cases exist for some types of structured text |
377 |
* processors. They are implemented by overriding methods |
378 |
* {@link STextProcessor#indexOfSpecial} and {@link STextProcessor#processSpecial}. |
379 |
* Examples of special cases are comments, literals, or |
380 |
* anything which is not identified by a one-character separator. |
381 |
* |
382 |
*/ |
383 |
public int getSpecialsCount(STextEnvironment environment) { |
384 |
return 0; |
385 |
} |
386 |
|
387 |
/** |
388 |
* Checks if there is a need for processing structured text. |
389 |
* This method is invoked before starting the processing. If the |
390 |
* processor returns <code>true</code>, no directional formatting |
391 |
* characters are added to the <i>lean</i> text and the processing |
392 |
* is shortened. |
393 |
* <p> |
394 |
* If not overridden, this method returns <code>false</code>. |
395 |
* </p> |
396 |
* @param environment the current environment, which may affect the behavior of |
397 |
* the processor. This parameter may be specified as |
398 |
* <code>null</code>, in which case the |
399 |
* {@link STextEnvironment#DEFAULT DEFAULT} |
400 |
* environment should be assumed. |
401 |
* |
402 |
* @param text is the structured text string to process. |
403 |
* |
404 |
* @param charTypes is a parameter received uniquely to be used as argument |
405 |
* for calls to <code>getCharType</code> and other methods used |
406 |
* by processors. |
407 |
* |
408 |
* @return a flag indicating if there is no need to process the structured |
409 |
* text to add directional formatting characters. |
410 |
* |
411 |
*/ |
412 |
public boolean skipProcessing(STextEnvironment environment, String text, STextCharTypes charTypes) { |
413 |
return false; |
414 |
} |
415 |
|
416 |
} |