diff --git src/org/eclipse/equinox/bidi/custom/STextCharTypes.java src/org/eclipse/equinox/bidi/custom/STextCharTypes.java index 50d768d..c2ce57f 100644 --- src/org/eclipse/equinox/bidi/custom/STextCharTypes.java +++ src/org/eclipse/equinox/bidi/custom/STextCharTypes.java @@ -33,86 +33,129 @@ static final byte AN = Character.DIRECTIONALITY_ARABIC_NUMBER; static final byte EN = Character.DIRECTIONALITY_EUROPEAN_NUMBER; - private static final int DIRPROPS_ADD = 2; + private static final int CHARTYPES_ADD = 2; + final protected STextProcessor processor; + final protected STextEnvironment environment; final protected String text; // 1 byte for each char in text - private byte[] dirProps; + private byte[] types; - // current orientation - private int orientation = -1; // "-1" means "unknown" + // structured text direction. -1 means not yet computed; -2 means within processor.getDirection + private int direction = -1; - public STextCharTypes(String text) { + /** + * Constructor + * + * @param processor is the processor handling this occurrence of + * structured text. + * + * @param environment the current environment, which may affect the behavior of + * the processor. This parameter may be specified as + * null, in which case the + * {@link STextEnvironment#DEFAULT DEFAULT} + * environment should be assumed. + * + * @param text is the text whose characters are analyzed. + */ + public STextCharTypes(STextProcessor processor, STextEnvironment environment, String text) { + this.processor = processor; + this.environment = environment; this.text = text; - dirProps = new byte[text.length()]; + types = new byte[text.length()]; } - private byte getCachedDirectionAt(int index) { - return (byte) (dirProps[index] - DIRPROPS_ADD); + public int getDirection() { + if (direction < 0) + direction = processor.getDirection(environment, text, this); + return direction; + } + + private byte getCachedTypeAt(int index) { + return (byte) (types[index] - CHARTYPES_ADD); } - private boolean hasCachedDirectionAt(int i) { - return (dirProps[i] != 0); // "0" means "unknown" + private boolean hasCachedTypeAt(int i) { + return (types[i] != 0); // "0" means "unknown" } /** - * @param dirProp bidirectional class of the character. It must be - * one of the values which can be returned by - * java.lang.Character.getDirectionality. + * Returns directionality of the character in the original string at + * the specified index. + * + * @param index position of the character in the lean text + * + * @return the bidi type of the character. It is one of the + * values which can be returned by + * {@link Character#getDirectionality(char)}. */ - public void setBidiTypeAt(int i, byte dirProp) { - dirProps[i] = (byte) (dirProp + DIRPROPS_ADD); + public byte getBidiTypeAt(int index) { + if (hasCachedTypeAt(index)) + return getCachedTypeAt(index); + byte charType = Character.getDirectionality(text.charAt(index)); + if (charType == B) { + if (direction < 0) { + if (direction < -1) // called by processor.getDirection + return charType; // avoid infinite recursion + direction = -2; // signal we go within processor.getDirection + direction = processor.getDirection(environment, text, this); + } + charType = (direction == STextEnvironment.ORIENT_RTL) ? R : L; + } + setBidiTypeAt(index, charType); + return charType; } - public int getOrientation(STextEnvironment environment) { - int result; - int orient = environment.getOrientation(); - if ((orient & STextEnvironment.ORIENT_CONTEXTUAL_LTR) == 0) { // absolute orientation - result = orient; - } else { // contextual orientation: - result = orient & 1; // initiate to the default orientation minus contextual bit - int len = text.length(); - byte dirProp; - for (int i = 0; i < len; i++) { - if (!hasCachedDirectionAt(i)) { - dirProp = Character.getDirectionality(text.charAt(i)); - if (dirProp == B) // B char resolves to L or R depending on orientation - continue; - setBidiTypeAt(i, dirProp); - } else { - dirProp = getCachedDirectionAt(i); - } - if (dirProp == L) { - result = STextEnvironment.ORIENT_LTR; - break; - } - if (dirProp == R || dirProp == AL) { - result = STextEnvironment.ORIENT_RTL; - break; - } - } - } - orientation = result; - return result; + /** + * Force a bidi type on a character. + * + * @param index is the index of the character whose bidi type is set. + * + * @param charType bidirectional type of the character. It must be + * one of the values which can be returned by + * java.lang.Character.getDirectionality. + */ + public void setBidiTypeAt(int index, byte charType) { + types[index] = (byte) (charType + CHARTYPES_ADD); } /** - * Returns directionality of the character in the original string at - * the specified index. - * @param index position of the character in the lean text - * @return the bidirectional class of the character. It is one of the - * values which can be returned by {@link Character#getDirectionality(char)} + * Get the orientation of the component in which the text will + * be displayed. + * + * @param envir is the current environment, which may affect the behavior of + * the processor. This parameter may be specified as + * null, in which case the + * {@link STextEnvironment#DEFAULT DEFAULT} + * environment should be assumed. + * + * @return the orientation as either + * {@link STextEnvironment#ORIENT_LTR} or + * {@link STextEnvironment#ORIENT_RTL}. */ - public byte getBidiTypeAt(int index) { - if (hasCachedDirectionAt(index)) - return getCachedDirectionAt(index); - byte dirProp = Character.getDirectionality(text.charAt(index)); - if (dirProp == B) { - dirProp = (orientation == STextEnvironment.ORIENT_RTL) ? R : L; + public int resolveOrientation(STextEnvironment envir) { + int orient = envir.getOrientation(); + if ((orient & STextEnvironment.ORIENT_CONTEXTUAL_LTR) == 0) { // absolute orientation + return orient; + } + // contextual orientation: + orient &= 1; // initiate to the default orientation minus contextual bit + int len = text.length(); + byte charType; + for (int i = 0; i < len; i++) { + if (!hasCachedTypeAt(i)) { + charType = Character.getDirectionality(text.charAt(i)); + if (charType == B) // B char resolves to L or R depending on orientation + continue; + setBidiTypeAt(i, charType); + } else + charType = getCachedTypeAt(i); + if (charType == L) + return STextEnvironment.ORIENT_LTR; + if (charType == R || charType == AL) + return STextEnvironment.ORIENT_RTL; } - setBidiTypeAt(index, dirProp); - return dirProp; + return orient; } - } diff --git src/org/eclipse/equinox/bidi/custom/STextOffsets.java src/org/eclipse/equinox/bidi/custom/STextOffsets.java new file mode 0 index 0000000..3a1c83e 0 --- /dev/null +++ src/org/eclipse/equinox/bidi/custom/STextOffsets.java @@ -0,0 +1,126 @@ +package org.eclipse.equinox.bidi.custom; + +public class STextOffsets { + static final byte L = Character.DIRECTIONALITY_LEFT_TO_RIGHT; + static final byte R = Character.DIRECTIONALITY_RIGHT_TO_LEFT; + static final byte AL = Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC; + static final byte AN = Character.DIRECTIONALITY_ARABIC_NUMBER; + static final byte EN = Character.DIRECTIONALITY_EUROPEAN_NUMBER; + static final byte[] STRONGS = {L, R}; + static final int OFFSET_SIZE = 20; + int[] offsets = new int[OFFSET_SIZE]; + int count; // number of used entries + int direction = -1; // STT direction + int prefixLength; + + /** + * @return the stored prefix length + */ + public int getPrefixLength() { + return prefixLength; + } + + /** + * Store the prefix length + */ + public void setPrefixLength(int prefLen) { + prefixLength = prefLen; + } + + /** + * @return the number of used entries in the offsets array. + */ + public int getCount() { + return count; + } + + /** + * Mark that all entries in the offsets array are unused. + */ + public void resetCount() { + count = 0; + } + + /** + * Get the value of a specified entry in the offsets array. + * + * @param index is the index of the entry of interest. + * + * @return the value of the specified entry. + */ + public int getOffset(int index) { + return offsets[index]; + } + + /** + * Insert an offset value in the offset array so that the array + * stays in ascending order. + * + * @param procData is a group of data accessible to processors. + * + * @param offset is the value to insert. + */ + public void insertOffset(STextCharTypes charTypes, int offset) { + int index = count - 1; // index of greatest member <= offset + // look up after which member the new offset should be inserted + while (index >= 0) { + int wrkOffset = offsets[index]; + if (offset > wrkOffset) + break; + if (offset == wrkOffset) + return; // avoid duplicates + index--; + } + index++; // index now points at where to insert + int length = count - index; // number of members to move up + if (length > 0) // shift right all members greater than offset + System.arraycopy(offsets, index, offsets, index + 1, length); + offsets[index] = offset; + count++; // number of used entries + // if the offset is 0, adding a mark does not change anything + if (offset < 1) + return; + if (charTypes == null) + return; + + byte charType = charTypes.getBidiTypeAt(offset); + // if the current char is a strong one or a digit, we change the + // charType of the previous char to account for the inserted mark. + if (charType == L || charType == R || charType == AL || charType == EN || charType == AN) + index = offset - 1; + else + // if the current char is a neutral, we change its own charType + index = offset; + + if (direction < 0) + direction = charTypes.getDirection(); + charTypes.setBidiTypeAt(index, STRONGS[direction]); + return; + + } + + /** + * Make sure that there is at least 3 free entries in the offsets array. + */ + public void ensureRoom() { + // make sure there are at least 3 empty slots in offsets + if ((offsets.length - count) < 3) { + int[] newOffsets = new int[offsets.length * 2]; + System.arraycopy(offsets, 0, newOffsets, 0, count); + offsets = newOffsets; + } + } + + /** + * Get all and only the used offset entries. + * + * @return the current used entries of the offsets array. + */ + public int[] getArray() { + if (count == offsets.length) + return offsets; + int[] array = new int[count]; + System.arraycopy(offsets, 0, array, 0, count); + return array; + } +} diff --git src/org/eclipse/equinox/bidi/custom/STextProcessor.java src/org/eclipse/equinox/bidi/custom/STextProcessor.java index f24d84c..7f37bb9 100644 --- src/org/eclipse/equinox/bidi/custom/STextProcessor.java +++ src/org/eclipse/equinox/bidi/custom/STextProcessor.java @@ -98,7 +98,7 @@ * @param text is the structured text string before * addition of any directional formatting characters. * - * @param dirProps is a parameter received by indexOfSpecial + * @param charTypes is a parameter received by indexOfSpecial * uniquely to be used as argument for calls to methods which * need it. * @@ -132,7 +132,7 @@ * number of special cases is zero, which means that * indexOfSpecial should never be called for them. */ - public int indexOfSpecial(STextEnvironment environment, String text, STextCharTypes dirProps, int[] offsets, int caseNumber, int fromIndex) { + public int indexOfSpecial(STextEnvironment environment, String text, STextCharTypes charTypes, STextOffsets offsets, int caseNumber, int fromIndex) { // This method must be overridden by all subclasses with special cases. throw new IllegalStateException("A processor with specialsCount > 0 must have an indexOfSpecial() method."); //$NON-NLS-1$ } @@ -167,7 +167,7 @@ * @param text is the structured text string before * addition of any directional formatting characters. * - * @param dirProps is a parameter received by processSpecial + * @param charTypes is a parameter received by processSpecial * uniquely to be used as argument for calls to methods which * need it. * @@ -216,7 +216,7 @@ * number of special cases is zero, which means that * processSpecial should never be called for them. */ - public int processSpecial(STextEnvironment environment, String text, STextCharTypes dirProps, int[] offsets, int[] state, int caseNumber, int separLocation) { + public int processSpecial(STextEnvironment environment, String text, STextCharTypes charTypes, STextOffsets offsets, int[] state, int caseNumber, int separLocation) { // This method must be overridden by all subclasses with any special case. throw new IllegalStateException("A processor with specialsCount > 0 must have a processSpecial() method."); //$NON-NLS-1$ } @@ -236,7 +236,7 @@ * parameter to indexOfSpecial or * processSpecial. * - * @param dirProps is a parameter received by indexOfSpecial + * @param charTypes is a parameter received by indexOfSpecial * or processSpecial, uniquely to be used as argument * for calls to insertMark and other methods used * by processors. @@ -252,8 +252,8 @@ * For the benefit of efficiency, it is better to insert * multiple marks in ascending order of the offsets. */ - public static final void insertMark(String text, STextCharTypes dirProps, int[] offsets, int offset) { - STextImpl.insertMark(text, dirProps, offsets, offset); + public static final void insertMark(String text, STextCharTypes charTypes, STextOffsets offsets, int offset) { + offsets.insertOffset(charTypes, offset); } /** @@ -273,7 +273,7 @@ * parameter to indexOfSpecial or * processSpecial. * - * @param dirProps is a parameter received by indexOfSpecial + * @param charTypes is a parameter received by indexOfSpecial * or processSpecial, uniquely to be used as argument * for calls to processSeparator and other methods used * by processors. @@ -287,8 +287,8 @@ * It must be a non-negative number smaller than the length * of the lean text. */ - public static final void processSeparator(String text, STextCharTypes dirProps, int[] offsets, int separLocation) { - STextImpl.processSeparator(text, dirProps, offsets, separLocation); + public static final void processSeparator(String text, STextCharTypes charTypes, STextOffsets offsets, int separLocation) { + STextImpl.processSeparator(text, charTypes, offsets, separLocation); } /** @@ -349,8 +349,8 @@ * * @param text is the structured text string to process. * - * @param dirProps is a parameter received uniquely to be used as argument - * for calls to getDirProp and other methods used + * @param charTypes is a parameter received uniquely to be used as argument + * for calls to getCharType and other methods used * by processors. * * @return the base direction of the structured text. This direction @@ -360,7 +360,7 @@ * The value returned is either * {@link STextEngine#DIR_LTR DIR_LTR} or {@link STextEngine#DIR_RTL DIR_RTL}. */ - public int getDirection(STextEnvironment environment, String text, STextCharTypes dirProps) { + public int getDirection(STextEnvironment environment, String text, STextCharTypes charTypes) { return STextEngine.DIR_LTR; } @@ -407,15 +407,15 @@ * * @param text is the structured text string to process. * - * @param dirProps is a parameter received uniquely to be used as argument - * for calls to getDirProp and other methods used + * @param charTypes is a parameter received uniquely to be used as argument + * for calls to getCharType and other methods used * by processors. * * @return a flag indicating if there is no need to process the structured * text to add directional formatting characters. * */ - public boolean skipProcessing(STextEnvironment environment, String text, STextCharTypes dirProps) { + public boolean skipProcessing(STextEnvironment environment, String text, STextCharTypes charTypes) { return false; } diff --git src/org/eclipse/equinox/bidi/internal/STextDelims.java src/org/eclipse/equinox/bidi/internal/STextDelims.java index 3a0cc7a..07682a3 100644 --- src/org/eclipse/equinox/bidi/internal/STextDelims.java +++ src/org/eclipse/equinox/bidi/internal/STextDelims.java @@ -11,8 +11,7 @@ package org.eclipse.equinox.bidi.internal; import org.eclipse.equinox.bidi.STextEnvironment; -import org.eclipse.equinox.bidi.custom.STextCharTypes; -import org.eclipse.equinox.bidi.custom.STextProcessor; +import org.eclipse.equinox.bidi.custom.*; /** * A base processor for structured text composed of text segments separated @@ -44,7 +43,7 @@ * * @see #getDelimiters */ - public int indexOfSpecial(STextEnvironment environment, String text, STextCharTypes dirProps, int[] offsets, int caseNumber, int fromIndex) { + public int indexOfSpecial(STextEnvironment environment, String text, STextCharTypes charTypes, STextOffsets offsets, int caseNumber, int fromIndex) { char delim = getDelimiters().charAt((caseNumber - 1) * 2); return text.indexOf(delim, fromIndex); } @@ -59,8 +58,8 @@ * @return the position after the matching end delimiter, or the length * of text if no end delimiter is found. */ - public int processSpecial(STextEnvironment environment, String text, STextCharTypes dirProps, int[] offsets, int[] state, int caseNumber, int separLocation) { - STextProcessor.processSeparator(text, dirProps, offsets, separLocation); + public int processSpecial(STextEnvironment environment, String text, STextCharTypes charTypes, STextOffsets offsets, int[] state, int caseNumber, int separLocation) { + STextProcessor.processSeparator(text, charTypes, offsets, separLocation); int loc = separLocation + 1; char delim = getDelimiters().charAt((caseNumber * 2) - 1); loc = text.indexOf(delim, loc); diff --git src/org/eclipse/equinox/bidi/internal/STextDelimsEsc.java src/org/eclipse/equinox/bidi/internal/STextDelimsEsc.java index e583e9e..60eb1e4 100644 --- src/org/eclipse/equinox/bidi/internal/STextDelimsEsc.java +++ src/org/eclipse/equinox/bidi/internal/STextDelimsEsc.java @@ -11,8 +11,7 @@ package org.eclipse.equinox.bidi.internal; import org.eclipse.equinox.bidi.STextEnvironment; -import org.eclipse.equinox.bidi.custom.STextCharTypes; -import org.eclipse.equinox.bidi.custom.STextProcessor; +import org.eclipse.equinox.bidi.custom.*; /** * A base processor for structured text composed of text segments separated @@ -50,8 +49,8 @@ * and skips until after the matching end delimiter, * ignoring possibly escaped end delimiters. */ - public int processSpecial(STextEnvironment environment, String text, STextCharTypes dirProps, int[] offsets, int[] state, int caseNumber, int separLocation) { - STextProcessor.processSeparator(text, dirProps, offsets, separLocation); + public int processSpecial(STextEnvironment environment, String text, STextCharTypes charTypes, STextOffsets offsets, int[] state, int caseNumber, int separLocation) { + STextProcessor.processSeparator(text, charTypes, offsets, separLocation); int location = separLocation + 1; char delim = getDelimiters().charAt((caseNumber * 2) - 1); while (true) { diff --git src/org/eclipse/equinox/bidi/internal/STextImpl.java src/org/eclipse/equinox/bidi/internal/STextImpl.java index 5a40645..819c571 100644 --- src/org/eclipse/equinox/bidi/internal/STextImpl.java +++ src/org/eclipse/equinox/bidi/internal/STextImpl.java @@ -12,8 +12,7 @@ import org.eclipse.equinox.bidi.STextEngine; import org.eclipse.equinox.bidi.STextEnvironment; -import org.eclipse.equinox.bidi.custom.STextCharTypes; -import org.eclipse.equinox.bidi.custom.STextProcessor; +import org.eclipse.equinox.bidi.custom.*; /** * STextImpl provides the code which implements the API in @@ -50,22 +49,20 @@ static final char PDF = 0x202C; static final char[] MARKS = {LRM, RLM}; static final char[] EMBEDS = {LRE, RLE}; - static final byte[] STRONGS = {L, R}; static final int PREFIX_LENGTH = 2; static final int SUFFIX_LENGTH = 2; static final int FIXES_LENGTH = PREFIX_LENGTH + SUFFIX_LENGTH; - static final int OFFSETS_SHIFT = 3; static final int[] EMPTY_INT_ARRAY = new int[0]; static final STextEnvironment IGNORE_ENVIRONMENT = new STextEnvironment(null, false, STextEnvironment.ORIENT_IGNORE); /** - * Prevent creation of a STextEngine instance + * Prevent creation of a STextImpl instance */ private STextImpl() { // nothing to do } - static long computeNextLocation(STextProcessor processor, STextEnvironment environment, String text, STextCharTypes dirProps, int[] offsets, int[] locations, int[] state, int curPos) { + static long computeNextLocation(STextProcessor processor, STextEnvironment environment, String text, STextCharTypes charTypes, STextOffsets offsets, int[] locations, int curPos) { String separators = processor.getSeparators(environment); int separCount = separators.length(); int specialsCount = processor.getSpecialsCount(environment); @@ -77,8 +74,8 @@ for (int i = 0; i < specialsCount; i++) { int location = locations[separCount + i]; if (location < curPos) { - offsets = ensureRoomInOffsets(offsets); - location = processor.indexOfSpecial(environment, text, dirProps, offsets, i + 1, curPos); + offsets.ensureRoom(); + location = processor.indexOfSpecial(environment, text, charTypes, offsets, i + 1, curPos); if (location < 0) location = len; locations[separCount + i] = location; @@ -107,22 +104,22 @@ /** * @see STextProcessor#processSeparator STextProcessor.processSeparator */ - public static void processSeparator(String text, STextCharTypes dirProps, int[] offsets, int separLocation) { + public static void processSeparator(String text, STextCharTypes charTypes, STextOffsets offsets, int separLocation) { int len = text.length(); - // offsets[2] contains the structured text direction - if (offsets[2] == STextEngine.DIR_RTL) { + int direction = charTypes.getDirection(); + if (direction == STextEngine.DIR_RTL) { // the structured text base direction is RTL for (int i = separLocation - 1; i >= 0; i--) { - byte dirProp = dirProps.getBidiTypeAt(i); - if (dirProp == R || dirProp == AL) + byte charType = charTypes.getBidiTypeAt(i); + if (charType == R || charType == AL) return; - if (dirProp == L) { + if (charType == L) { for (int j = separLocation; j < len; j++) { - dirProp = dirProps.getBidiTypeAt(j); - if (dirProp == R || dirProp == AL) + charType = charTypes.getBidiTypeAt(j); + if (charType == R || charType == AL) return; - if (dirProp == L || dirProp == EN) { - insertMark(text, dirProps, offsets, separLocation); + if (charType == L || charType == EN) { + offsets.insertOffset(charTypes, separLocation); return; } } @@ -135,28 +132,28 @@ // the structured text base direction is LTR boolean doneAN = false; for (int i = separLocation - 1; i >= 0; i--) { - byte dirProp = dirProps.getBidiTypeAt(i); - if (dirProp == L) + byte charType = charTypes.getBidiTypeAt(i); + if (charType == L) return; - if (dirProp == R || dirProp == AL) { + if (charType == R || charType == AL) { for (int j = separLocation; j < len; j++) { - dirProp = dirProps.getBidiTypeAt(j); - if (dirProp == L) + charType = charTypes.getBidiTypeAt(j); + if (charType == L) return; - if (dirProp == R || dirProp == EN || dirProp == AL || dirProp == AN) { - insertMark(text, dirProps, offsets, separLocation); + if (charType == R || charType == EN || charType == AL || charType == AN) { + offsets.insertOffset(charTypes, separLocation); return; } } return; } - if (dirProp == AN && !doneAN) { + if (charType == AN && !doneAN) { for (int j = separLocation; j < len; j++) { - dirProp = dirProps.getBidiTypeAt(j); - if (dirProp == L) + charType = charTypes.getBidiTypeAt(j); + if (charType == L) return; - if (dirProp == AL || dirProp == AN || dirProp == R) { - insertMark(text, dirProps, offsets, separLocation); + if (charType == AL || charType == AN || charType == R) { + offsets.insertOffset(charTypes, separLocation); return; } } @@ -212,10 +209,10 @@ int len = text.length(); if (len == 0) return text; - STextCharTypes dirProps = new STextCharTypes(text); - int[] offsets = leanToFullCommon(processor, environment, text, state, dirProps); - int prefixLength = offsets[1]; - int count = offsets[0] - OFFSETS_SHIFT; + STextCharTypes charTypes = new STextCharTypes(processor, environment, text); + STextOffsets offsets = leanToFullCommon(processor, environment, text, state, charTypes); + int prefixLength = offsets.getPrefixLength(); + int count = offsets.getCount(); if (count == 0 && prefixLength == 0) return text; int newLen = len + count; @@ -226,12 +223,11 @@ char[] fullChars = new char[newLen]; int added = prefixLength; // add marks at offsets - int direction = offsets[2]; + int direction = charTypes.getDirection(); char curMark = MARKS[direction]; - for (int i = 0, j = OFFSETS_SHIFT; i < len; i++) { + for (int i = 0, j = 0; i < len; i++) { char c = text.charAt(i); - // offsets[0] contains the number of used entries - if (j < offsets[0] && i == offsets[j]) { + if (j < count && i == offsets.getOffset(j)) { fullChars[i + added] = curMark; added++; j++; @@ -263,14 +259,14 @@ int len = text.length(); if (len == 0) return EMPTY_INT_ARRAY; - STextCharTypes dirProps = new STextCharTypes(text); - int[] offsets = leanToFullCommon(processor, environment, text, state, dirProps); - int prefixLength = offsets[1]; + STextCharTypes charTypes = new STextCharTypes(processor, environment, text); + STextOffsets offsets = leanToFullCommon(processor, environment, text, state, charTypes); + int prefixLength = offsets.getPrefixLength(); int[] map = new int[len]; - int count = offsets[0]; // number of used entries + int count = offsets.getCount(); // number of used entries int added = prefixLength; - for (int pos = 0, i = OFFSETS_SHIFT; pos < len; pos++) { - if (i < count && pos == offsets[i]) { + for (int pos = 0, i = 0; pos < len; pos++) { + if (i < count && pos == offsets.getOffset(i)) { added++; i++; } @@ -286,16 +282,12 @@ int len = text.length(); if (len == 0) return EMPTY_INT_ARRAY; - STextCharTypes dirProps = new STextCharTypes(text); - int[] offsets = leanToFullCommon(processor, environment, text, state, dirProps); - // offsets[0] contains the number of used entries - int count = offsets[0] - OFFSETS_SHIFT; - int[] result = new int[count]; - System.arraycopy(offsets, OFFSETS_SHIFT, result, 0, count); - return result; + STextCharTypes charTypes = new STextCharTypes(processor, environment, text); + STextOffsets offsets = leanToFullCommon(processor, environment, text, state, charTypes); + return offsets.getArray(); } - static int[] leanToFullCommon(STextProcessor processor, STextEnvironment environment, String text, int[] state, STextCharTypes dirProps) { + static STextOffsets leanToFullCommon(STextProcessor processor, STextEnvironment environment, String text, int[] state, STextCharTypes charTypes) { if (environment == null) environment = STextEnvironment.DEFAULT; if (state == null) { @@ -303,15 +295,9 @@ state[0] = STextEngine.STATE_INITIAL; } int len = text.length(); - int orient = dirProps.getOrientation(environment); - int direction = processor.getDirection(environment, text, dirProps); - // offsets of marks to add. Entry 0 is the number of used slots; - // entry 1 is reserved to pass prefixLength. - // entry 2 is reserved to pass direction.. - int[] offsets = new int[20]; - offsets[0] = OFFSETS_SHIFT; - offsets[2] = direction; - if (!processor.skipProcessing(environment, text, dirProps)) { + int direction = processor.getDirection(environment, text, charTypes); + STextOffsets offsets = new STextOffsets(); + if (!processor.skipProcessing(environment, text, charTypes)) { // initialize locations int separCount = processor.getSeparators(environment).length(); int[] locations = new int[separCount + processor.getSpecialsCount(environment)]; @@ -321,46 +307,47 @@ // current position int curPos = 0; if (state[0] > STextEngine.STATE_INITIAL) { - offsets = ensureRoomInOffsets(offsets); + offsets.ensureRoom(); int initState = state[0]; state[0] = STextEngine.STATE_INITIAL; - curPos = processor.processSpecial(environment, text, dirProps, offsets, state, initState, -1); + curPos = processor.processSpecial(environment, text, charTypes, offsets, state, initState, -1); } while (true) { // location of next token to handle int nextLocation; // index of next token to handle (if < separCount, this is a separator; otherwise a special case int idxLocation; - long res = computeNextLocation(processor, environment, text, dirProps, offsets, locations, state, curPos); + long res = computeNextLocation(processor, environment, text, charTypes, offsets, locations, curPos); nextLocation = (int) (res & 0x00000000FFFFFFFF); /* low word */ if (nextLocation >= len) break; + offsets.ensureRoom(); idxLocation = (int) (res >> 32); /* high word */ if (idxLocation < separCount) { - offsets = ensureRoomInOffsets(offsets); - processSeparator(text, dirProps, offsets, nextLocation); + processSeparator(text, charTypes, offsets, nextLocation); curPos = nextLocation + 1; } else { - offsets = ensureRoomInOffsets(offsets); idxLocation -= (separCount - 1); // because caseNumber starts from 1 - curPos = processor.processSpecial(environment, text, dirProps, offsets, state, idxLocation, nextLocation); + curPos = processor.processSpecial(environment, text, charTypes, offsets, state, idxLocation, nextLocation); } if (curPos >= len) break; } // end while } // end if (!processor.skipProcessing()) - if (orient == STextEnvironment.ORIENT_IGNORE) - offsets[1] = 0; + int prefixLength; + int orientation = environment.getOrientation(); + if (orientation == STextEnvironment.ORIENT_IGNORE) + prefixLength = 0; else { - // recompute orient since it may have changed if contextual - orient = dirProps.getOrientation(environment); - if (orient == direction && orient != STextEnvironment.ORIENT_UNKNOWN) - offsets[1] = 0; - else if ((environment.getOrientation() & STextEnvironment.ORIENT_CONTEXTUAL_LTR) != 0) - offsets[1] = 1; + int resolvedOrientation = charTypes.resolveOrientation(environment); + if (orientation != STextEnvironment.ORIENT_UNKNOWN && resolvedOrientation == direction) + prefixLength = 0; + else if ((orientation & STextEnvironment.ORIENT_CONTEXTUAL_LTR) != 0) + prefixLength = 1; else - offsets[1] = 2; + prefixLength = 2; } + offsets.setPrefixLength(prefixLength); return offsets; } @@ -501,8 +488,7 @@ if (lenFull == 0) return EMPTY_INT_ARRAY; String lean = fullToLeanText(processor, environment, full, state); - int[] offsets = new int[20]; - offsets[0] = OFFSETS_SHIFT; + STextOffsets offsets = new STextOffsets(); int lenLean = lean.length(); int idxLean, idxFull; // lean must be a subset of Full, so we only check on iLean < leanLen @@ -510,66 +496,14 @@ if (full.charAt(idxFull) == lean.charAt(idxLean)) idxLean++; else { - offsets = ensureRoomInOffsets(offsets); - insertMark(lean, null, offsets, idxFull); + offsets.ensureRoom(); + offsets.insertOffset(null, idxFull); } } for (; idxFull < lenFull; idxFull++) { - offsets = ensureRoomInOffsets(offsets); - insertMark(lean, null, offsets, idxFull); + offsets.ensureRoom(); + offsets.insertOffset(null, idxFull); } - int[] result = new int[offsets[0] - OFFSETS_SHIFT]; - System.arraycopy(offsets, OFFSETS_SHIFT, result, 0, result.length); - return result; - } - - static int[] ensureRoomInOffsets(int[] offsets) { - // make sure - if ((offsets.length - offsets[0]) < 3) { - int[] newOffsets = new int[offsets.length * 2]; - System.arraycopy(offsets, 0, newOffsets, 0, offsets[0]); - return newOffsets; - } - return offsets; - } - - /** - * @see STextProcessor#insertMark STextProcessor.insertMark - */ - public static void insertMark(String text, STextCharTypes dirProps, int[] offsets, int offset) { - int count = offsets[0];// number of used entries - int index = count - 1; // index of greatest member <= offset - // look up after which member the new offset should be inserted - while (index >= OFFSETS_SHIFT) { - int wrkOffset = offsets[index]; - if (offset > wrkOffset) - break; - if (offset == wrkOffset) - return; // avoid duplicates - index--; - } - index++; // index now points at where to insert - int length = count - index; // number of members to move up - if (length > 0) // shift right all members greater than offset - System.arraycopy(offsets, index, offsets, index + 1, length); - offsets[index] = offset; - offsets[0]++; // number of used entries - // if the offset is 0, adding a mark does not change anything - if (dirProps == null || offset < 1) - return; - - byte dirProp = dirProps.getBidiTypeAt(offset); - // if the current char is a strong one or a digit, we change the - // dirProp of the previous char to account for the inserted mark. - if (dirProp == L || dirProp == R || dirProp == AL || dirProp == EN || dirProp == AN) - index = offset - 1; - else - // if the current char is a neutral, we change its own dirProp - index = offset; - - int dir = offsets[2]; // current structured text direction - dirProps.setBidiTypeAt(index, STRONGS[dir]); - return; + return offsets.getArray(); } - } diff --git src/org/eclipse/equinox/bidi/internal/STextSingle.java src/org/eclipse/equinox/bidi/internal/STextSingle.java index 4f6e3b1..1ca2d24 100644 --- src/org/eclipse/equinox/bidi/internal/STextSingle.java +++ src/org/eclipse/equinox/bidi/internal/STextSingle.java @@ -11,8 +11,7 @@ package org.eclipse.equinox.bidi.internal; import org.eclipse.equinox.bidi.STextEnvironment; -import org.eclipse.equinox.bidi.custom.STextCharTypes; -import org.eclipse.equinox.bidi.custom.STextProcessor; +import org.eclipse.equinox.bidi.custom.*; /** * A base processor for structured text composed of two parts separated by a separator. @@ -41,7 +40,7 @@ * * @see #getSeparators getSeparators */ - public int indexOfSpecial(STextEnvironment environment, String text, STextCharTypes dirProps, int[] offsets, int caseNumber, int fromIndex) { + public int indexOfSpecial(STextEnvironment environment, String text, STextCharTypes charTypes, STextOffsets offsets, int caseNumber, int fromIndex) { return text.indexOf(this.getSeparators(environment).charAt(0), fromIndex); } @@ -51,8 +50,8 @@ * * @return the length of text. */ - public int processSpecial(STextEnvironment environment, String text, STextCharTypes dirProps, int[] offsets, int[] state, int caseNumber, int separLocation) { - STextProcessor.processSeparator(text, dirProps, offsets, separLocation); + public int processSpecial(STextEnvironment environment, String text, STextCharTypes charTypes, STextOffsets offsets, int[] state, int caseNumber, int separLocation) { + STextProcessor.processSeparator(text, charTypes, offsets, separLocation); return text.length(); } diff --git src/org/eclipse/equinox/bidi/internal/consumable/STextEmail.java src/org/eclipse/equinox/bidi/internal/consumable/STextEmail.java index cc8a1d2..d132ef6 100644 --- src/org/eclipse/equinox/bidi/internal/consumable/STextEmail.java +++ src/org/eclipse/equinox/bidi/internal/consumable/STextEmail.java @@ -28,7 +28,7 @@ } public int getDirection(STextEnvironment environment, String text) { - return getDirection(environment, text, new STextCharTypes(text)); + return getDirection(environment, text, new STextCharTypes(this, environment, text)); } /** @@ -42,7 +42,7 @@ * * Otherwise, returns {@link STextEngine#DIR_LTR DIR_LTR}. */ - public int getDirection(STextEnvironment environment, String text, STextCharTypes dirProps) { + public int getDirection(STextEnvironment environment, String text, STextCharTypes charTypes) { String language = environment.getLanguage(); if (!language.equals("ar")) //$NON-NLS-1$ return STextEngine.DIR_LTR; @@ -51,8 +51,8 @@ if (domainStart < 0) domainStart = 0; for (int i = domainStart; i < text.length(); i++) { - byte dirProp = dirProps.getBidiTypeAt(i); - if (dirProp == AL || dirProp == R) + byte charType = charTypes.getBidiTypeAt(i); + if (charType == AL || charType == R) return STextEngine.DIR_RTL; } return STextEngine.DIR_LTR; diff --git src/org/eclipse/equinox/bidi/internal/consumable/STextJava.java src/org/eclipse/equinox/bidi/internal/consumable/STextJava.java index 81b3b41..601e4fd 100644 --- src/org/eclipse/equinox/bidi/internal/consumable/STextJava.java +++ src/org/eclipse/equinox/bidi/internal/consumable/STextJava.java @@ -12,8 +12,7 @@ import org.eclipse.equinox.bidi.STextEngine; import org.eclipse.equinox.bidi.STextEnvironment; -import org.eclipse.equinox.bidi.custom.STextCharTypes; -import org.eclipse.equinox.bidi.custom.STextProcessor; +import org.eclipse.equinox.bidi.custom.*; import org.eclipse.equinox.bidi.internal.STextActivator; /** @@ -59,7 +58,7 @@ *
  • comments starting with slash-slash
  • * */ - public int indexOfSpecial(STextEnvironment environment, String text, STextCharTypes dirProps, int[] offsets, int caseNumber, int fromIndex) { + public int indexOfSpecial(STextEnvironment environment, String text, STextCharTypes charTypes, STextOffsets offsets, int caseNumber, int fromIndex) { switch (caseNumber) { case 1 : /* space */ return text.indexOf(' ', fromIndex); @@ -83,15 +82,15 @@ *
  • skip until after a line separator
  • * */ - public int processSpecial(STextEnvironment environment, String text, STextCharTypes dirProps, int[] offsets, int[] state, int caseNumber, int separLocation) { + public int processSpecial(STextEnvironment environment, String text, STextCharTypes charTypes, STextOffsets offsets, int[] state, int caseNumber, int separLocation) { int location, counter, i; - STextProcessor.processSeparator(text, dirProps, offsets, separLocation); + STextProcessor.processSeparator(text, charTypes, offsets, separLocation); switch (caseNumber) { case 1 : /* space */ separLocation++; while (separLocation < text.length() && text.charAt(separLocation) == ' ') { - dirProps.setBidiTypeAt(separLocation, WS); + charTypes.setBidiTypeAt(separLocation, WS); separLocation++; } return separLocation; @@ -120,7 +119,7 @@ } // we need to call processSeparator since text may follow the // end of comment immediately without even a space - STextProcessor.processSeparator(text, dirProps, offsets, location); + STextProcessor.processSeparator(text, charTypes, offsets, location); return location + 2; case 4 : /* slash-slash comment */ location = text.indexOf(lineSep, separLocation + 2); diff --git src/org/eclipse/equinox/bidi/internal/consumable/STextMath.java src/org/eclipse/equinox/bidi/internal/consumable/STextMath.java index 69d7a28..8834f5a 100644 --- src/org/eclipse/equinox/bidi/internal/consumable/STextMath.java +++ src/org/eclipse/equinox/bidi/internal/consumable/STextMath.java @@ -30,7 +30,7 @@ } public int getDirection(STextEnvironment environment, String text) { - return getDirection(environment, text, new STextCharTypes(text)); + return getDirection(environment, text, new STextCharTypes(this, environment, text)); } /** @@ -45,23 +45,22 @@ * * Otherwise, returns {@link STextEngine#DIR_LTR DIR_LTR}. */ - public int getDirection(STextEnvironment environment, String text, STextCharTypes dirProps) { + public int getDirection(STextEnvironment environment, String text, STextCharTypes charTypes) { String language = environment.getLanguage(); if (!language.equals("ar")) //$NON-NLS-1$ return STextEngine.DIR_LTR; boolean flagAN = false; for (int i = 0; i < text.length(); i++) { - byte dirProp = dirProps.getBidiTypeAt(i); - if (dirProp == AL) + byte charType = charTypes.getBidiTypeAt(i); + if (charType == AL) return STextEngine.DIR_RTL; - if (dirProp == L || dirProp == R) + if (charType == L || charType == R) return STextEngine.DIR_LTR; - if (dirProp == AN) + if (charType == AN) flagAN = true; } if (flagAN) return STextEngine.DIR_RTL; return STextEngine.DIR_LTR; } - } diff --git src/org/eclipse/equinox/bidi/internal/consumable/STextRegex.java src/org/eclipse/equinox/bidi/internal/consumable/STextRegex.java index 56b1baa..d3cf7f1 100644 --- src/org/eclipse/equinox/bidi/internal/consumable/STextRegex.java +++ src/org/eclipse/equinox/bidi/internal/consumable/STextRegex.java @@ -12,8 +12,7 @@ import org.eclipse.equinox.bidi.STextEngine; import org.eclipse.equinox.bidi.STextEnvironment; -import org.eclipse.equinox.bidi.custom.STextCharTypes; -import org.eclipse.equinox.bidi.custom.STextProcessor; +import org.eclipse.equinox.bidi.custom.*; /** * STextRegex is a processor for regular expressions. @@ -77,7 +76,7 @@ * This method locates occurrences of the syntactic strings and of * R, AL, EN, AN characters. */ - public int indexOfSpecial(STextEnvironment environment, String text, STextCharTypes dirProps, int[] offsets, int caseNumber, int fromIndex) { + public int indexOfSpecial(STextEnvironment environment, String text, STextCharTypes charTypes, STextOffsets offsets, int caseNumber, int fromIndex) { // In this method, L, R, AL, AN and EN represent bidi categories // as defined in the Unicode Bidirectional Algorithm // ( http://www.unicode.org/reports/tr9/ ). @@ -86,7 +85,7 @@ // AL represents the category Arabic Letter. // AN represents the category Arabic Number. // EN represents the category European Number. - byte dirProp; + byte charType; if (caseNumber < numberOfStrings) { /* 1 *//* comment (?#...) */ @@ -113,27 +112,27 @@ fromIndex = 1; // look for R, AL, AN, EN which are potentially needing a mark for (; fromIndex < text.length(); fromIndex++) { - dirProp = dirProps.getBidiTypeAt(fromIndex); + charType = charTypes.getBidiTypeAt(fromIndex); // R and AL will always be examined using processSeparator() - if (dirProp == R || dirProp == AL) + if (charType == R || charType == AL) return fromIndex; - if (dirProp == EN || dirProp == AN) { + if (charType == EN || charType == AN) { // no need for a mark after the first digit in a number - if (dirProps.getBidiTypeAt(fromIndex - 1) == dirProp) + if (charTypes.getBidiTypeAt(fromIndex - 1) == charType) continue; for (int i = fromIndex - 1; i >= 0; i--) { - dirProp = dirProps.getBidiTypeAt(i); + charType = charTypes.getBidiTypeAt(i); // after a L char, no need for a mark - if (dirProp == L) + if (charType == L) continue; // digit after R or AL or AN need a mark, except for EN // following AN, but this is a contrived case, so we // don't check for it (and calling processSeparator() // for it will do no harm) - if (dirProp == R || dirProp == AL || dirProp == AN) + if (charType == R || charType == AL || charType == AN) return fromIndex; } continue; @@ -145,7 +144,7 @@ /** * This method process the special cases. */ - public int processSpecial(STextEnvironment environment, String text, STextCharTypes dirProps, int[] offsets, int[] state, int caseNumber, int separLocation) { + public int processSpecial(STextEnvironment environment, String text, STextCharTypes charTypes, STextOffsets offsets, int[] state, int caseNumber, int separLocation) { int location; switch (caseNumber) { @@ -154,7 +153,7 @@ // initial state from previous line location = 0; } else { - STextProcessor.processSeparator(text, dirProps, offsets, separLocation); + STextProcessor.processSeparator(text, charTypes, offsets, separLocation); // skip the opening "(?#" location = separLocation + 3; } @@ -170,7 +169,7 @@ case 5 : /* conditional named back reference (?() */ case 6 : /* conditional named back reference (?('name') */ case 7 : /* named parentheses reference (?&name) */ - STextProcessor.processSeparator(text, dirProps, offsets, separLocation); + STextProcessor.processSeparator(text, charTypes, offsets, separLocation); // no need for calling processSeparator() for the following cases // since the starting string contains a L char case 8 : /* named group (?P */ @@ -194,7 +193,7 @@ // initial state from previous line location = 0; } else { - STextProcessor.processSeparator(text, dirProps, offsets, separLocation); + STextProcessor.processSeparator(text, charTypes, offsets, separLocation); // skip the opening "\Q" location = separLocation + 2; } @@ -203,11 +202,11 @@ state[0] = caseNumber; return text.length(); } - // set the dirProp for the "E" to L (Left to Right character) - dirProps.setBidiTypeAt(location + 1, L); + // set the charType for the "E" to L (Left to Right character) + charTypes.setBidiTypeAt(location + 1, L); return location + 2; case 18 : /* R, AL, AN, EN */ - STextProcessor.processSeparator(text, dirProps, offsets, separLocation); + STextProcessor.processSeparator(text, charTypes, offsets, separLocation); return separLocation + 1; } @@ -216,7 +215,7 @@ } public int getDirection(STextEnvironment environment, String text) { - return getDirection(environment, text, new STextCharTypes(text)); + return getDirection(environment, text, new STextCharTypes(this, environment, text)); } /** @@ -231,15 +230,15 @@ * * Otherwise, returns {@link STextEngine#DIR_LTR DIR_LTR}. */ - public int getDirection(STextEnvironment environment, String text, STextCharTypes dirProps) { + public int getDirection(STextEnvironment environment, String text, STextCharTypes charTypes) { String language = environment.getLanguage(); if (!language.equals("ar")) //$NON-NLS-1$ return STextEngine.DIR_LTR; for (int i = 0; i < text.length(); i++) { - byte dirProp = dirProps.getBidiTypeAt(i); - if (dirProp == AL || dirProp == R) + byte charType = charTypes.getBidiTypeAt(i); + if (charType == AL || charType == R) return STextEngine.DIR_RTL; - if (dirProp == L) + if (charType == L) return STextEngine.DIR_LTR; } if (environment.getMirrored()) diff --git src/org/eclipse/equinox/bidi/internal/consumable/STextSql.java src/org/eclipse/equinox/bidi/internal/consumable/STextSql.java index 426d732..ff2750a 100644 --- src/org/eclipse/equinox/bidi/internal/consumable/STextSql.java +++ src/org/eclipse/equinox/bidi/internal/consumable/STextSql.java @@ -12,8 +12,7 @@ import org.eclipse.equinox.bidi.STextEngine; import org.eclipse.equinox.bidi.STextEnvironment; -import org.eclipse.equinox.bidi.custom.STextCharTypes; -import org.eclipse.equinox.bidi.custom.STextProcessor; +import org.eclipse.equinox.bidi.custom.*; import org.eclipse.equinox.bidi.internal.STextActivator; /** @@ -60,7 +59,7 @@ *
  • comments starting with hyphen-hyphen
  • * */ - public int indexOfSpecial(STextEnvironment environment, String text, STextCharTypes dirProps, int[] offsets, int caseNumber, int fromIndex) { + public int indexOfSpecial(STextEnvironment environment, String text, STextCharTypes charTypes, STextOffsets offsets, int caseNumber, int fromIndex) { switch (caseNumber) { case 1 : /* space */ return text.indexOf(" ", fromIndex); //$NON-NLS-1$ @@ -87,15 +86,15 @@ *
  • skip until after a line separator
  • * */ - public int processSpecial(STextEnvironment environment, String text, STextCharTypes dirProps, int[] offsets, int[] state, int caseNumber, int separLocation) { + public int processSpecial(STextEnvironment environment, String text, STextCharTypes charTypes, STextOffsets offsets, int[] state, int caseNumber, int separLocation) { int location; - STextProcessor.processSeparator(text, dirProps, offsets, separLocation); + STextProcessor.processSeparator(text, charTypes, offsets, separLocation); switch (caseNumber) { case 1 : /* space */ separLocation++; while (separLocation < text.length() && text.charAt(separLocation) == ' ') { - dirProps.setBidiTypeAt(separLocation, WS); + charTypes.setBidiTypeAt(separLocation, WS); separLocation++; } return separLocation; @@ -138,7 +137,7 @@ } // we need to call processSeparator since text may follow the // end of comment immediately without even a space - STextProcessor.processSeparator(text, dirProps, offsets, location); + STextProcessor.processSeparator(text, charTypes, offsets, location); return location + 2; case 5 : /* hyphen-hyphen comment */ location = text.indexOf(lineSep, separLocation + 2);