Download
Getting Started
Members
Projects
Community
Marketplace
Events
Planet Eclipse
Newsletter
Videos
Participate
Report a Bug
Forums
Mailing Lists
Wiki
IRC
How to Contribute
Working Groups
Automotive
Internet of Things
LocationTech
Long-Term Support
PolarSys
Science
OpenMDM
More
Community
Marketplace
Events
Planet Eclipse
Newsletter
Videos
Participate
Report a Bug
Forums
Mailing Lists
Wiki
IRC
How to Contribute
Working Groups
Automotive
Internet of Things
LocationTech
Long-Term Support
PolarSys
Science
OpenMDM
Toggle navigation
Bugzilla – Attachment 2086 Details for
Bug 13907
Scanner does not report whitespace tokens at end of input
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
Log In
[x]
|
Terms of Use
|
Copyright Agent
patch for the scanner
Fix13907.java (text/plain), 17.28 KB, created by
Olivier Thomann
on 2002-10-01 14:58:30 EDT
(
hide
)
Description:
patch for the scanner
Filename:
MIME Type:
Creator:
Olivier Thomann
Created:
2002-10-01 14:58:30 EDT
Size:
17.28 KB
patch
obsolete
>public int getNextToken() throws InvalidInputException { > > this.wasAcr = false; > if (diet) { > jumpOverMethodBody(); > diet = false; > return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE; > } > boolean isWhiteSpace = false; > int whiteStart = 0; > try { > while (true) { //loop for jumping over comments > withoutUnicodePtr = 0; > //start with a new token (even comment written with unicode ) > > // ---------Consume white space and handles startPosition--------- > whiteStart = currentPosition; > do { > startPosition = currentPosition; > if (((currentCharacter = source[currentPosition++]) == '\\') > && (source[currentPosition] == 'u')) { > isWhiteSpace = jumpOverUnicodeWhiteSpace(); > } else { > if ((currentCharacter == '\r') || (currentCharacter == '\n')) { > checkNonExternalizeString(); > if (recordLineSeparator) { > pushLineSeparator(); > } else { > currentLine = null; > } > } > isWhiteSpace = > (currentCharacter == ' ') || Character.isWhitespace(currentCharacter); > } > } while (isWhiteSpace); > if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) { > // reposition scanner in case we are interested by spaces as tokens > currentPosition--; > startPosition = whiteStart; > return TokenNameWHITESPACE; > } > //little trick to get out in the middle of a source compuation > if (currentPosition > eofPosition) > return TokenNameEOF; > > // ---------Identify the next token------------- > > switch (currentCharacter) { > case '(' : > return TokenNameLPAREN; > case ')' : > return TokenNameRPAREN; > case '{' : > return TokenNameLBRACE; > case '}' : > return TokenNameRBRACE; > case '[' : > return TokenNameLBRACKET; > case ']' : > return TokenNameRBRACKET; > case ';' : > return TokenNameSEMICOLON; > case ',' : > return TokenNameCOMMA; > case '.' : > if (getNextCharAsDigit()) > return scanNumber(true); > return TokenNameDOT; > case '+' : > { > int test; > if ((test = getNextChar('+', '=')) == 0) > return TokenNamePLUS_PLUS; > if (test > 0) > return TokenNamePLUS_EQUAL; > return TokenNamePLUS; > } > case '-' : > { > int test; > if ((test = getNextChar('-', '=')) == 0) > return TokenNameMINUS_MINUS; > if (test > 0) > return TokenNameMINUS_EQUAL; > return TokenNameMINUS; > } > case '~' : > return TokenNameTWIDDLE; > case '!' : > if (getNextChar('=')) > return TokenNameNOT_EQUAL; > return TokenNameNOT; > case '*' : > if (getNextChar('=')) > return TokenNameMULTIPLY_EQUAL; > return TokenNameMULTIPLY; > case '%' : > if (getNextChar('=')) > return TokenNameREMAINDER_EQUAL; > return TokenNameREMAINDER; > case '<' : > { > int test; > if ((test = getNextChar('=', '<')) == 0) > return TokenNameLESS_EQUAL; > if (test > 0) { > if (getNextChar('=')) > return TokenNameLEFT_SHIFT_EQUAL; > return TokenNameLEFT_SHIFT; > } > return TokenNameLESS; > } > case '>' : > { > int test; > if ((test = getNextChar('=', '>')) == 0) > return TokenNameGREATER_EQUAL; > if (test > 0) { > if ((test = getNextChar('=', '>')) == 0) > return TokenNameRIGHT_SHIFT_EQUAL; > if (test > 0) { > if (getNextChar('=')) > return TokenNameUNSIGNED_RIGHT_SHIFT_EQUAL; > return TokenNameUNSIGNED_RIGHT_SHIFT; > } > return TokenNameRIGHT_SHIFT; > } > return TokenNameGREATER; > } > case '=' : > if (getNextChar('=')) > return TokenNameEQUAL_EQUAL; > return TokenNameEQUAL; > case '&' : > { > int test; > if ((test = getNextChar('&', '=')) == 0) > return TokenNameAND_AND; > if (test > 0) > return TokenNameAND_EQUAL; > return TokenNameAND; > } > case '|' : > { > int test; > if ((test = getNextChar('|', '=')) == 0) > return TokenNameOR_OR; > if (test > 0) > return TokenNameOR_EQUAL; > return TokenNameOR; > } > case '^' : > if (getNextChar('=')) > return TokenNameXOR_EQUAL; > return TokenNameXOR; > case '?' : > return TokenNameQUESTION; > case ':' : > return TokenNameCOLON; > case '\'' : > { > int test; > if ((test = getNextChar('\n', '\r')) == 0) { > throw new InvalidInputException(INVALID_CHARACTER_CONSTANT); > } > if (test > 0) { > // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed > for (int lookAhead = 0; lookAhead < 3; lookAhead++) { > if (currentPosition + lookAhead == source.length) > break; > if (source[currentPosition + lookAhead] == '\n') > break; > if (source[currentPosition + lookAhead] == '\'') { > currentPosition += lookAhead + 1; > break; > } > } > throw new InvalidInputException(INVALID_CHARACTER_CONSTANT); > } > } > if (getNextChar('\'')) { > // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed > for (int lookAhead = 0; lookAhead < 3; lookAhead++) { > if (currentPosition + lookAhead == source.length) > break; > if (source[currentPosition + lookAhead] == '\n') > break; > if (source[currentPosition + lookAhead] == '\'') { > currentPosition += lookAhead + 1; > break; > } > } > throw new InvalidInputException(INVALID_CHARACTER_CONSTANT); > } > if (getNextChar('\\')) > scanEscapeCharacter(); > else { // consume next character > unicodeAsBackSlash = false; > if (((currentCharacter = source[currentPosition++]) == '\\') > && (source[currentPosition] == 'u')) { > getNextUnicodeChar(); > } else { > if (withoutUnicodePtr != 0) { > withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; > } > } > } > if (getNextChar('\'')) > return TokenNameCharacterLiteral; > // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed > for (int lookAhead = 0; lookAhead < 20; lookAhead++) { > if (currentPosition + lookAhead == source.length) > break; > if (source[currentPosition + lookAhead] == '\n') > break; > if (source[currentPosition + lookAhead] == '\'') { > currentPosition += lookAhead + 1; > break; > } > } > throw new InvalidInputException(INVALID_CHARACTER_CONSTANT); > case '"' : > try { > // consume next character > unicodeAsBackSlash = false; > if (((currentCharacter = source[currentPosition++]) == '\\') > && (source[currentPosition] == 'u')) { > getNextUnicodeChar(); > } else { > if (withoutUnicodePtr != 0) { > withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; > } > } > > while (currentCharacter != '"') { > /**** \r and \n are not valid in string literals ****/ > if ((currentCharacter == '\n') || (currentCharacter == '\r')) { > // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed > for (int lookAhead = 0; lookAhead < 50; lookAhead++) { > if (currentPosition + lookAhead == source.length) > break; > if (source[currentPosition + lookAhead] == '\n') > break; > if (source[currentPosition + lookAhead] == '\"') { > currentPosition += lookAhead + 1; > break; > } > } > throw new InvalidInputException(INVALID_CHAR_IN_STRING); > } > if (currentCharacter == '\\') { > int escapeSize = currentPosition; > boolean backSlashAsUnicodeInString = unicodeAsBackSlash; > //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one > scanEscapeCharacter(); > escapeSize = currentPosition - escapeSize; > if (withoutUnicodePtr == 0) { > //buffer all the entries that have been left aside.... > withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition; > System.arraycopy( > source, > startPosition, > withoutUnicodeBuffer, > 1, > withoutUnicodePtr); > withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; > } else { //overwrite the / in the buffer > withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter; > if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct > withoutUnicodePtr--; > } > } > } > // consume next character > unicodeAsBackSlash = false; > if (((currentCharacter = source[currentPosition++]) == '\\') > && (source[currentPosition] == 'u')) { > getNextUnicodeChar(); > } else { > if (withoutUnicodePtr != 0) { > withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; > } > } > > } > } catch (IndexOutOfBoundsException e) { > throw new InvalidInputException(UNTERMINATED_STRING); > } catch (InvalidInputException e) { > if (e.getMessage().equals(INVALID_ESCAPE)) { > // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed > for (int lookAhead = 0; lookAhead < 50; lookAhead++) { > if (currentPosition + lookAhead == source.length) > break; > if (source[currentPosition + lookAhead] == '\n') > break; > if (source[currentPosition + lookAhead] == '\"') { > currentPosition += lookAhead + 1; > break; > } > } > > } > throw e; // rethrow > } > if (checkNonExternalizedStringLiterals){ // check for presence of NLS tags //$NON-NLS-?$ where ? is an int. > if (currentLine == null) { > currentLine= new NLSLine(); > lines.add(currentLine); > } > currentLine.add( > new StringLiteral( > getCurrentTokenSourceString(), > startPosition, > currentPosition - 1)); > } > return TokenNameStringLiteral; > case '/' : > { > int test; > if ((test = getNextChar('/', '*')) == 0) { //line comment > int endPositionForLineComment = 0; > try { //get the next char > if (((currentCharacter = source[currentPosition++]) == '\\') > && (source[currentPosition] == 'u')) { > //-------------unicode traitement ------------ > int c1 = 0, c2 = 0, c3 = 0, c4 = 0; > currentPosition++; > while (source[currentPosition] == 'u') { > currentPosition++; > } > if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15 > || c1 < 0 > || (c2 = Character.getNumericValue(source[currentPosition++])) > 15 > || c2 < 0 > || (c3 = Character.getNumericValue(source[currentPosition++])) > 15 > || c3 < 0 > || (c4 = Character.getNumericValue(source[currentPosition++])) > 15 > || c4 < 0) { > throw new InvalidInputException(INVALID_UNICODE_ESCAPE); > } else { > currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); > } > } > > //handle the \\u case manually into comment > if (currentCharacter == '\\') { > if (source[currentPosition] == '\\') > currentPosition++; > } //jump over the \\ > boolean isUnicode = false; > while (currentCharacter != '\r' && currentCharacter != '\n') { > //get the next char > isUnicode = false; > if (((currentCharacter = source[currentPosition++]) == '\\') > && (source[currentPosition] == 'u')) { > isUnicode = true; > //-------------unicode traitement ------------ > int c1 = 0, c2 = 0, c3 = 0, c4 = 0; > currentPosition++; > while (source[currentPosition] == 'u') { > currentPosition++; > } > if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15 > || c1 < 0 > || (c2 = Character.getNumericValue(source[currentPosition++])) > 15 > || c2 < 0 > || (c3 = Character.getNumericValue(source[currentPosition++])) > 15 > || c3 < 0 > || (c4 = Character.getNumericValue(source[currentPosition++])) > 15 > || c4 < 0) { > throw new InvalidInputException(INVALID_UNICODE_ESCAPE); > } else { > currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); > } > } > //handle the \\u case manually into comment > if (currentCharacter == '\\') { > if (source[currentPosition] == '\\') > currentPosition++; > } //jump over the \\ > } > if (isUnicode) { > endPositionForLineComment = currentPosition - 6; > } else { > endPositionForLineComment = currentPosition - 1; > } > recordComment(false); > if ((currentCharacter == '\r') || (currentCharacter == '\n')) { > checkNonExternalizeString(); > if (recordLineSeparator) { > if (isUnicode) { > pushUnicodeLineSeparator(); > } else { > pushLineSeparator(); > } > } else { > currentLine = null; > } > } > if (tokenizeComments) { > if (!isUnicode) { > currentPosition = endPositionForLineComment; // reset one character behind > } > return TokenNameCOMMENT_LINE; > } > } catch (IndexOutOfBoundsException e) { //an eof will them be generated > if (tokenizeComments) { > currentPosition--; // reset one character behind > return TokenNameCOMMENT_LINE; > } > } > break; > } > if (test > 0) { //traditional and annotation comment > boolean isJavadoc = false, star = false; > // consume next character > unicodeAsBackSlash = false; > if (((currentCharacter = source[currentPosition++]) == '\\') > && (source[currentPosition] == 'u')) { > getNextUnicodeChar(); > } else { > if (withoutUnicodePtr != 0) { > withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; > } > } > > if (currentCharacter == '*') { > isJavadoc = true; > star = true; > } > if ((currentCharacter == '\r') || (currentCharacter == '\n')) { > checkNonExternalizeString(); > if (recordLineSeparator) { > pushLineSeparator(); > } else { > currentLine = null; > } > } > try { //get the next char > if (((currentCharacter = source[currentPosition++]) == '\\') > && (source[currentPosition] == 'u')) { > //-------------unicode traitement ------------ > getNextUnicodeChar(); > } > //handle the \\u case manually into comment > if (currentCharacter == '\\') { > if (source[currentPosition] == '\\') > currentPosition++; //jump over the \\ > } > // empty comment is not a javadoc /**/ > if (currentCharacter == '/') { > isJavadoc = false; > } > //loop until end of comment */ > while ((currentCharacter != '/') || (!star)) { > if ((currentCharacter == '\r') || (currentCharacter == '\n')) { > checkNonExternalizeString(); > if (recordLineSeparator) { > pushLineSeparator(); > } else { > currentLine = null; > } > } > star = currentCharacter == '*'; > //get next char > if (((currentCharacter = source[currentPosition++]) == '\\') > && (source[currentPosition] == 'u')) { > //-------------unicode traitement ------------ > getNextUnicodeChar(); > } > //handle the \\u case manually into comment > if (currentCharacter == '\\') { > if (source[currentPosition] == '\\') > currentPosition++; > } //jump over the \\ > } > recordComment(isJavadoc); > if (tokenizeComments) { > if (isJavadoc) > return TokenNameCOMMENT_JAVADOC; > return TokenNameCOMMENT_BLOCK; > } > } catch (IndexOutOfBoundsException e) { > throw new InvalidInputException(UNTERMINATED_COMMENT); > } > break; > } > if (getNextChar('=')) > return TokenNameDIVIDE_EQUAL; > return TokenNameDIVIDE; > } > case '\u001a' : > if (atEnd()) > return TokenNameEOF; > //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream > throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$ > > default : > if (Character.isJavaIdentifierStart(currentCharacter)) > return scanIdentifierOrKeyword(); > if (Character.isDigit(currentCharacter)) > return scanNumber(false); > return TokenNameERROR; > } > } > } //-----------------end switch while try-------------------- > catch (IndexOutOfBoundsException e) { > if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) { > // reposition scanner in case we are interested by spaces as tokens > currentPosition--; > startPosition = whiteStart; > return TokenNameWHITESPACE; > } > } > return TokenNameEOF; >}
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Raw
Actions:
View
Attachments on
bug 13907
: 2086