aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/org/yaml/snakeyaml/scanner/ScannerImpl.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/java/org/yaml/snakeyaml/scanner/ScannerImpl.java')
-rw-r--r--src/main/java/org/yaml/snakeyaml/scanner/ScannerImpl.java4438
1 files changed, 2271 insertions, 2167 deletions
diff --git a/src/main/java/org/yaml/snakeyaml/scanner/ScannerImpl.java b/src/main/java/org/yaml/snakeyaml/scanner/ScannerImpl.java
index 4272aab7..ec8711f9 100644
--- a/src/main/java/org/yaml/snakeyaml/scanner/ScannerImpl.java
+++ b/src/main/java/org/yaml/snakeyaml/scanner/ScannerImpl.java
@@ -1,17 +1,15 @@
/**
- * Copyright (c) 2008, http://www.snakeyaml.org
+ * Copyright (c) 2008, SnakeYAML
*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
*/
package org.yaml.snakeyaml.scanner;
@@ -24,7 +22,9 @@ import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
-
+import org.yaml.snakeyaml.DumperOptions;
+import org.yaml.snakeyaml.LoaderOptions;
+import org.yaml.snakeyaml.comments.CommentType;
import org.yaml.snakeyaml.error.Mark;
import org.yaml.snakeyaml.error.YAMLException;
import org.yaml.snakeyaml.reader.StreamReader;
@@ -34,6 +34,7 @@ import org.yaml.snakeyaml.tokens.BlockEndToken;
import org.yaml.snakeyaml.tokens.BlockEntryToken;
import org.yaml.snakeyaml.tokens.BlockMappingStartToken;
import org.yaml.snakeyaml.tokens.BlockSequenceStartToken;
+import org.yaml.snakeyaml.tokens.CommentToken;
import org.yaml.snakeyaml.tokens.DirectiveToken;
import org.yaml.snakeyaml.tokens.DocumentEndToken;
import org.yaml.snakeyaml.tokens.DocumentStartToken;
@@ -58,6 +59,7 @@ import org.yaml.snakeyaml.util.UriEncoder;
* Scanner produces tokens of the following types:
* STREAM-START
* STREAM-END
+ * COMMENT
* DIRECTIVE(name, value)
* DOCUMENT-START
* DOCUMENT-END
@@ -80,2209 +82,2311 @@ import org.yaml.snakeyaml.util.UriEncoder;
* </pre>
*/
public final class ScannerImpl implements Scanner {
- /**
- * A regular expression matching characters which are not in the hexadecimal
- * set (0-9, A-F, a-f).
- */
- private final static Pattern NOT_HEXA = Pattern.compile("[^0-9A-Fa-f]");
-
- /**
- * A mapping from an escaped character in the input stream to the character
- * that they should be replaced with.
- *
- * YAML defines several common and a few uncommon escape sequences.
- *
- * @see <a href="http://www.yaml.org/spec/current.html#id2517668">4.1.6.
- * Escape Sequences</a>
- */
- public final static Map<Character, String> ESCAPE_REPLACEMENTS = new HashMap<Character, String>();
-
- /**
- * A mapping from a character to a number of bytes to read-ahead for that
- * escape sequence. These escape sequences are used to handle unicode
- * escaping in the following formats, where H is a hexadecimal character:
- *
- * <pre>
- * &#92;xHH : escaped 8-bit Unicode character
- * &#92;uHHHH : escaped 16-bit Unicode character
- * &#92;UHHHHHHHH : escaped 32-bit Unicode character
- * </pre>
- *
- * @see <a href="http://yaml.org/spec/1.1/current.html#id872840">5.6. Escape
- * Sequences</a>
- */
- public final static Map<Character, Integer> ESCAPE_CODES = new HashMap<Character, Integer>();
-
- static {
- // ASCII null
- ESCAPE_REPLACEMENTS.put(Character.valueOf('0'), "\0");
- // ASCII bell
- ESCAPE_REPLACEMENTS.put(Character.valueOf('a'), "\u0007");
- // ASCII backspace
- ESCAPE_REPLACEMENTS.put(Character.valueOf('b'), "\u0008");
- // ASCII horizontal tab
- ESCAPE_REPLACEMENTS.put(Character.valueOf('t'), "\u0009");
- // ASCII newline (line feed; &#92;n maps to 0x0A)
- ESCAPE_REPLACEMENTS.put(Character.valueOf('n'), "\n");
- // ASCII vertical tab
- ESCAPE_REPLACEMENTS.put(Character.valueOf('v'), "\u000B");
- // ASCII form-feed
- ESCAPE_REPLACEMENTS.put(Character.valueOf('f'), "\u000C");
- // carriage-return (&#92;r maps to 0x0D)
- ESCAPE_REPLACEMENTS.put(Character.valueOf('r'), "\r");
- // ASCII escape character (Esc)
- ESCAPE_REPLACEMENTS.put(Character.valueOf('e'), "\u001B");
- // ASCII space
- ESCAPE_REPLACEMENTS.put(Character.valueOf(' '), "\u0020");
- // ASCII double-quote
- ESCAPE_REPLACEMENTS.put(Character.valueOf('"'), "\"");
- // ASCII backslash
- ESCAPE_REPLACEMENTS.put(Character.valueOf('\\'), "\\");
- // Unicode next line
- ESCAPE_REPLACEMENTS.put(Character.valueOf('N'), "\u0085");
- // Unicode non-breaking-space
- ESCAPE_REPLACEMENTS.put(Character.valueOf('_'), "\u00A0");
- // Unicode line-separator
- ESCAPE_REPLACEMENTS.put(Character.valueOf('L'), "\u2028");
- // Unicode paragraph separator
- ESCAPE_REPLACEMENTS.put(Character.valueOf('P'), "\u2029");
-
- // 8-bit Unicode
- ESCAPE_CODES.put(Character.valueOf('x'), 2);
- // 16-bit Unicode
- ESCAPE_CODES.put(Character.valueOf('u'), 4);
- // 32-bit Unicode (Supplementary characters are supported)
- ESCAPE_CODES.put(Character.valueOf('U'), 8);
- }
- private final StreamReader reader;
- // Had we reached the end of the stream?
- private boolean done = false;
-
- // The number of unclosed '{' and '['. `flow_level == 0` means block
- // context.
- private int flowLevel = 0;
-
- // List of processed tokens that are not yet emitted.
- private List<Token> tokens;
-
- // Number of tokens that were emitted through the `get_token` method.
- private int tokensTaken = 0;
-
- // The current indentation level.
- private int indent = -1;
-
- // Past indentation levels.
- private ArrayStack<Integer> indents;
-
- // Variables related to simple keys treatment. See PyYAML.
-
- /**
- * <pre>
- * A simple key is a key that is not denoted by the '?' indicator.
- * Example of simple keys:
- * ---
- * block simple key: value
- * ? not a simple key:
- * : { flow simple key: value }
- * We emit the KEY token before all keys, so when we find a potential
- * simple key, we try to locate the corresponding ':' indicator.
- * Simple keys should be limited to a single line and 1024 characters.
- *
- * Can a simple key start at the current position? A simple key may
- * start:
- * - at the beginning of the line, not counting indentation spaces
- * (in block context),
- * - after '{', '[', ',' (in the flow context),
- * - after '?', ':', '-' (in the block context).
- * In the block context, this flag also signifies if a block collection
- * may start at the current position.
- * </pre>
- */
- private boolean allowSimpleKey = true;
- /*
- * Keep track of possible simple keys. This is a dictionary. The key is
- * `flow_level`; there can be no more that one possible simple key for each
- * level. The value is a SimpleKey record: (token_number, required, index,
- * line, column, mark) A simple key may start with ALIAS, ANCHOR, TAG,
- * SCALAR(flow), '[', or '{' tokens.
- */
- private Map<Integer, SimpleKey> possibleSimpleKeys;
-
- public ScannerImpl(StreamReader reader) {
- this.reader = reader;
- this.tokens = new ArrayList<Token>(100);
- this.indents = new ArrayStack<Integer>(10);
- // The order in possibleSimpleKeys is kept for nextPossibleSimpleKey()
- this.possibleSimpleKeys = new LinkedHashMap<Integer, SimpleKey>();
- fetchStreamStart();// Add the STREAM-START token.
- }
-
- /**
- * Check whether the next token is one of the given types.
- */
- public boolean checkToken(Token.ID... choices) {
- while (needMoreTokens()) {
- fetchMoreTokens();
- }
- if (!this.tokens.isEmpty()) {
- if (choices.length == 0) {
- return true;
- }
- // since profiler puts this method on top (it is used a lot), we
- // should not use 'foreach' here because of the performance reasons
- Token.ID first = this.tokens.get(0).getTokenId();
- for (int i = 0; i < choices.length; i++) {
- if (first == choices[i]) {
- return true;
- }
- }
- }
- return false;
- }
-
- /**
- * Return the next token, but do not delete it from the queue.
- */
- public Token peekToken() {
- while (needMoreTokens()) {
- fetchMoreTokens();
- }
- return this.tokens.get(0);
- }
-
- /**
- * Return the next token, removing it from the queue.
- */
- public Token getToken() {
- if (!this.tokens.isEmpty()) {
- this.tokensTaken++;
- return this.tokens.remove(0);
- }
- return null;
- }
-
- // Private methods.
- /**
- * Returns true if more tokens should be scanned.
- */
- private boolean needMoreTokens() {
- // If we are done, we do not require more tokens.
- if (this.done) {
- return false;
- }
- // If we aren't done, but we have no tokens, we need to scan more.
- if (this.tokens.isEmpty()) {
- return true;
- }
- // The current token may be a potential simple key, so we
- // need to look further.
- stalePossibleSimpleKeys();
- return nextPossibleSimpleKey() == this.tokensTaken;
- }
-
- /**
- * Fetch one or more tokens from the StreamReader.
- */
- private void fetchMoreTokens() {
- // Eat whitespaces and comments until we reach the next token.
- scanToNextToken();
- // Remove obsolete possible simple keys.
- stalePossibleSimpleKeys();
- // Compare the current indentation and column. It may add some tokens
- // and decrease the current indentation level.
- unwindIndent(reader.getColumn());
- // Peek the next character, to decide what the next group of tokens
- // will look like.
- char ch = reader.peek();
- switch (ch) {
- case '\0':
- // Is it the end of stream?
- fetchStreamEnd();
- return;
- case '%':
- // Is it a directive?
- if (checkDirective()) {
- fetchDirective();
- return;
- }
- break;
- case '-':
- // Is it the document start?
- if (checkDocumentStart()) {
- fetchDocumentStart();
- return;
- // Is it the block entry indicator?
- } else if (checkBlockEntry()) {
- fetchBlockEntry();
- return;
- }
- break;
- case '.':
- // Is it the document end?
- if (checkDocumentEnd()) {
- fetchDocumentEnd();
- return;
- }
- break;
- // TODO support for BOM within a stream. (not implemented in PyYAML)
- case '[':
- // Is it the flow sequence start indicator?
- fetchFlowSequenceStart();
- return;
- case '{':
- // Is it the flow mapping start indicator?
- fetchFlowMappingStart();
- return;
- case ']':
- // Is it the flow sequence end indicator?
- fetchFlowSequenceEnd();
- return;
- case '}':
- // Is it the flow mapping end indicator?
- fetchFlowMappingEnd();
- return;
- case ',':
- // Is it the flow entry indicator?
- fetchFlowEntry();
- return;
- // see block entry indicator above
- case '?':
- // Is it the key indicator?
- if (checkKey()) {
- fetchKey();
- return;
- }
- break;
- case ':':
- // Is it the value indicator?
- if (checkValue()) {
- fetchValue();
- return;
- }
- break;
- case '*':
- // Is it an alias?
- fetchAlias();
- return;
- case '&':
- // Is it an anchor?
- fetchAnchor();
- return;
- case '!':
- // Is it a tag?
- fetchTag();
- return;
- case '|':
- // Is it a literal scalar?
- if (this.flowLevel == 0) {
- fetchLiteral();
- return;
- }
- break;
- case '>':
- // Is it a folded scalar?
- if (this.flowLevel == 0) {
- fetchFolded();
- return;
- }
- break;
- case '\'':
- // Is it a single quoted scalar?
- fetchSingle();
- return;
- case '"':
- // Is it a double quoted scalar?
- fetchDouble();
- return;
- }
- // It must be a plain scalar then.
- if (checkPlain()) {
- fetchPlain();
- return;
- }
- // No? It's an error. Let's produce a nice error message.We do this by
- // converting escaped characters into their escape sequences. This is a
- // backwards use of the ESCAPE_REPLACEMENTS map.
- String chRepresentation = String.valueOf(ch);
- for (Character s : ESCAPE_REPLACEMENTS.keySet()) {
- String v = ESCAPE_REPLACEMENTS.get(s);
- if (v.equals(chRepresentation)) {
- chRepresentation = "\\" + s;// ' ' -> '\t'
- break;
- }
- }
- if (ch == '\t')
- chRepresentation += "(TAB)";
- String text = String
- .format("found character '%s' that cannot start any token. (Do not use %s for indentation)",
- chRepresentation, chRepresentation);
- throw new ScannerException("while scanning for the next token", null, text,
- reader.getMark());
- }
-
- // Simple keys treatment.
-
- /**
- * Return the number of the nearest possible simple key. Actually we don't
- * need to loop through the whole dictionary.
- */
- private int nextPossibleSimpleKey() {
- /*
- * the implementation is not as in PyYAML. Because
- * this.possibleSimpleKeys is ordered we can simply take the first key
- */
- if (!this.possibleSimpleKeys.isEmpty()) {
- return this.possibleSimpleKeys.values().iterator().next().getTokenNumber();
- }
- return -1;
- }
-
- /**
- * <pre>
- * Remove entries that are no longer possible simple keys. According to
- * the YAML specification, simple keys
- * - should be limited to a single line,
- * - should be no longer than 1024 characters.
- * Disabling this procedure will allow simple keys of any length and
- * height (may cause problems if indentation is broken though).
- * </pre>
- */
- private void stalePossibleSimpleKeys() {
- if (!this.possibleSimpleKeys.isEmpty()) {
- for (Iterator<SimpleKey> iterator = this.possibleSimpleKeys.values().iterator(); iterator
- .hasNext();) {
- SimpleKey key = iterator.next();
- if ((key.getLine() != reader.getLine())
- || (reader.getIndex() - key.getIndex() > 1024)) {
- // If the key is not on the same line as the current
- // position OR the difference in column between the token
- // start and the current position is more than the maximum
- // simple key length, then this cannot be a simple key.
- if (key.isRequired()) {
- // If the key was required, this implies an error
- // condition.
- throw new ScannerException("while scanning a simple key", key.getMark(),
- "could not find expected ':'", reader.getMark());
- }
- iterator.remove();
- }
- }
- }
- }
-
- /**
- * The next token may start a simple key. We check if it's possible and save
- * its position. This function is called for ALIAS, ANCHOR, TAG,
- * SCALAR(flow), '[', and '{'.
- */
- private void savePossibleSimpleKey() {
- // The next token may start a simple key. We check if it's possible
- // and save its position. This function is called for
- // ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
-
- // Check if a simple key is required at the current position.
- // A simple key is required if this position is the root flowLevel, AND
- // the current indentation level is the same as the last indent-level.
- boolean required = (this.flowLevel == 0) && (this.indent == this.reader.getColumn());
-
- if (allowSimpleKey || !required) {
- // A simple key is required only if it is the first token in the
- // current line. Therefore it is always allowed.
- } else {
- throw new YAMLException(
- "A simple key is required only if it is the first token in the current line");
- }
-
- // The next token might be a simple key. Let's save it's number and
- // position.
- if (this.allowSimpleKey) {
- removePossibleSimpleKey();
- int tokenNumber = this.tokensTaken + this.tokens.size();
- SimpleKey key = new SimpleKey(tokenNumber, required, reader.getIndex(),
- reader.getLine(), this.reader.getColumn(), this.reader.getMark());
- this.possibleSimpleKeys.put(this.flowLevel, key);
+ /**
+ * A regular expression matching characters which are not in the hexadecimal set (0-9, A-F, a-f).
+ */
+ private static final Pattern NOT_HEXA = Pattern.compile("[^0-9A-Fa-f]");
+
+ /**
+ * A mapping from an escaped character in the input stream to the string representation that they
+ * should be replaced with.
+ *
+ * YAML defines several common and a few uncommon escape sequences.
+ *
+ * @see <a href="http://www.yaml.org/spec/current.html#id2517668">4.1.6. Escape Sequences</a>
+ */
+ public static final Map<Character, String> ESCAPE_REPLACEMENTS = new HashMap<Character, String>();
+
+ /**
+ * A mapping from a character to a number of bytes to read-ahead for that escape sequence. These
+ * escape sequences are used to handle unicode escaping in the following formats, where H is a
+ * hexadecimal character:
+ *
+ * <pre>
+ * &#92;xHH : escaped 8-bit Unicode character
+ * &#92;uHHHH : escaped 16-bit Unicode character
+ * &#92;UHHHHHHHH : escaped 32-bit Unicode character
+ * </pre>
+ *
+ * @see <a href="http://yaml.org/spec/1.1/current.html#id872840">5.6. Escape Sequences</a>
+ */
+ public static final Map<Character, Integer> ESCAPE_CODES = new HashMap<Character, Integer>();
+
+ static {
+ // ASCII null
+ ESCAPE_REPLACEMENTS.put(Character.valueOf('0'), "\0");
+ // ASCII bell
+ ESCAPE_REPLACEMENTS.put(Character.valueOf('a'), "\u0007");
+ // ASCII backspace
+ ESCAPE_REPLACEMENTS.put(Character.valueOf('b'), "\u0008");
+ // ASCII horizontal tab
+ ESCAPE_REPLACEMENTS.put(Character.valueOf('t'), "\u0009");
+ // ASCII newline (line feed; &#92;n maps to 0x0A)
+ ESCAPE_REPLACEMENTS.put(Character.valueOf('n'), "\n");
+ // ASCII vertical tab
+ ESCAPE_REPLACEMENTS.put(Character.valueOf('v'), "\u000B");
+ // ASCII form-feed
+ ESCAPE_REPLACEMENTS.put(Character.valueOf('f'), "\u000C");
+ // carriage-return (&#92;r maps to 0x0D)
+ ESCAPE_REPLACEMENTS.put(Character.valueOf('r'), "\r");
+ // ASCII escape character (Esc)
+ ESCAPE_REPLACEMENTS.put(Character.valueOf('e'), "\u001B");
+ // ASCII space
+ ESCAPE_REPLACEMENTS.put(Character.valueOf(' '), "\u0020");
+ // ASCII double-quote
+ ESCAPE_REPLACEMENTS.put(Character.valueOf('"'), "\"");
+ // ASCII backslash
+ ESCAPE_REPLACEMENTS.put(Character.valueOf('\\'), "\\");
+ // Unicode next line
+ ESCAPE_REPLACEMENTS.put(Character.valueOf('N'), "\u0085");
+ // Unicode non-breaking-space
+ ESCAPE_REPLACEMENTS.put(Character.valueOf('_'), "\u00A0");
+ // Unicode line-separator
+ ESCAPE_REPLACEMENTS.put(Character.valueOf('L'), "\u2028");
+ // Unicode paragraph separator
+ ESCAPE_REPLACEMENTS.put(Character.valueOf('P'), "\u2029");
+
+ // 8-bit Unicode
+ ESCAPE_CODES.put(Character.valueOf('x'), 2);
+ // 16-bit Unicode
+ ESCAPE_CODES.put(Character.valueOf('u'), 4);
+ // 32-bit Unicode (Supplementary characters are supported)
+ ESCAPE_CODES.put(Character.valueOf('U'), 8);
+ }
+
+ private final StreamReader reader;
+ // Had we reached the end of the stream?
+ private boolean done = false;
+
+ // The number of unclosed '{' and '['. `flow_level == 0` means block context.
+ private int flowLevel = 0;
+
+ // List of processed tokens that are not yet emitted.
+ private final List<Token> tokens;
+
+ // The last added token
+ private Token lastToken;
+
+ // Number of tokens that were emitted through the `getToken()` method.
+ private int tokensTaken = 0;
+
+ // The current indentation level.
+ private int indent = -1;
+
+ // Past indentation levels.
+ private final ArrayStack<Integer> indents;
+
+ // A flag that indicates if comments should be parsed
+ private boolean parseComments;
+
+ private final LoaderOptions loaderOptions;
+
+ // Variables related to simple keys treatment. See PyYAML.
+
+ /**
+ * <pre>
+ * A simple key is a key that is not denoted by the '?' indicator.
+ * Example of simple keys:
+ * ---
+ * block simple key: value
+ * ? not a simple key:
+ * : { flow simple key: value }
+ * We emit the KEY token before all keys, so when we find a potential
+ * simple key, we try to locate the corresponding ':' indicator.
+ * Simple keys should be limited to a single line and 1024 characters.
+ *
+ * Can a simple key start at the current position? A simple key may
+ * start:
+ * - at the beginning of the line, not counting indentation spaces
+ * (in block context),
+ * - after '{', '[', ',' (in the flow context),
+ * - after '?', ':', '-' (in the block context).
+ * In the block context, this flag also signifies if a block collection
+ * may start at the current position.
+ * </pre>
+ */
+ private boolean allowSimpleKey = true;
+
+ /*
+ * Keep track of possible simple keys. This is a dictionary. The key is `flow_level`; there can be
+ * no more than one possible simple key for each level. The value is a SimpleKey record:
+ * (token_number, required, index, line, column, mark) A simple key may start with ALIAS, ANCHOR,
+ * TAG, SCALAR(flow), '[', or '{' tokens.
+ */
+ private final Map<Integer, SimpleKey> possibleSimpleKeys;
+
+ public ScannerImpl(StreamReader reader) {
+ this(reader, new LoaderOptions());
+ }
+
+ public ScannerImpl(StreamReader reader, LoaderOptions options) {
+ this.parseComments = options.isProcessComments();
+ this.reader = reader;
+ this.tokens = new ArrayList<Token>(100);
+ this.indents = new ArrayStack<Integer>(10);
+ // The order in possibleSimpleKeys is kept for nextPossibleSimpleKey()
+ this.possibleSimpleKeys = new LinkedHashMap<Integer, SimpleKey>();
+ this.loaderOptions = options;
+ fetchStreamStart();// Add the STREAM-START token.
+ }
+
+ /**
+ * Please use LoaderOptions instead Set the scanner to ignore comments or parse them as a
+ * <code>CommentToken</code>.
+ *
+ * @param parseComments <code>true</code> to parse; <code>false</code> to ignore
+ */
+ @Deprecated
+ public ScannerImpl setParseComments(boolean parseComments) {
+ this.parseComments = parseComments;
+ return this;
+ }
+
+ @Deprecated
+ public boolean isParseComments() {
+ return parseComments;
+ }
+
+ /**
+ * Check whether the next token is one of the given types.
+ */
+ public boolean checkToken(Token.ID... choices) {
+ while (needMoreTokens()) {
+ fetchMoreTokens();
+ }
+ if (!this.tokens.isEmpty()) {
+ if (choices.length == 0) {
+ return true;
+ }
+ // since profiler puts this method on top (it is used a lot), we
+ // should not use 'foreach' here because of the performance reasons
+ Token.ID first = this.tokens.get(0).getTokenId();
+ for (int i = 0; i < choices.length; i++) {
+ if (first == choices[i]) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Return the next token, but do not delete it from the queue.
+ */
+ public Token peekToken() {
+ while (needMoreTokens()) {
+ fetchMoreTokens();
+ }
+ return this.tokens.get(0);
+ }
+
+ /**
+ * Return the next token, removing it from the queue.
+ */
+ public Token getToken() {
+ this.tokensTaken++;
+ return this.tokens.remove(0);
+ }
+
+ // Private methods.
+
+ private void addToken(Token token) {
+ lastToken = token;
+ this.tokens.add(token);
+ }
+
+ private void addToken(int index, Token token) {
+ if (index == this.tokens.size()) {
+ lastToken = token;
+ }
+ this.tokens.add(index, token);
+ }
+
+ private void addAllTokens(List<Token> tokens) {
+ lastToken = tokens.get(tokens.size() - 1);
+ this.tokens.addAll(tokens);
+ }
+
+ /**
+ * Returns true if more tokens should be scanned.
+ */
+ private boolean needMoreTokens() {
+ // If we are done, we do not require more tokens.
+ if (this.done) {
+ return false;
+ }
+ // If we aren't done, but we have no tokens, we need to scan more.
+ if (this.tokens.isEmpty()) {
+ return true;
+ }
+ // The current token may be a potential simple key, so we
+ // need to look further.
+ stalePossibleSimpleKeys();
+ return nextPossibleSimpleKey() == this.tokensTaken;
+ }
+
+ /**
+ * Fetch one or more tokens from the StreamReader.
+ */
+ private void fetchMoreTokens() {
+ if (reader.getIndex() > loaderOptions.getCodePointLimit()) {
+ throw new YAMLException("The incoming YAML document exceeds the limit: "
+ + loaderOptions.getCodePointLimit() + " code points.");
+ }
+ // Eat whitespaces and process comments until we reach the next token.
+ scanToNextToken();
+ // Remove obsolete possible simple keys.
+ stalePossibleSimpleKeys();
+ // Compare the current indentation and column. It may add some tokens
+ // and decrease the current indentation level.
+ unwindIndent(reader.getColumn());
+ // Peek the next code point, to decide what the next group of tokens
+ // will look like.
+ int c = reader.peek();
+ switch (c) {
+ case '\0':
+ // Is it the end of stream?
+ fetchStreamEnd();
+ return;
+ case '%':
+ // Is it a directive?
+ if (checkDirective()) {
+ fetchDirective();
+ return;
+ }
+ break;
+ case '-':
+ // Is it the document start?
+ if (checkDocumentStart()) {
+ fetchDocumentStart();
+ return;
+ // Is it the block entry indicator?
+ } else if (checkBlockEntry()) {
+ fetchBlockEntry();
+ return;
+ }
+ break;
+ case '.':
+ // Is it the document end?
+ if (checkDocumentEnd()) {
+ fetchDocumentEnd();
+ return;
+ }
+ break;
+ // TODO support for BOM within a stream. (also not implemented in PyYAML)
+ case '[':
+ // Is it the flow sequence start indicator?
+ fetchFlowSequenceStart();
+ return;
+ case '{':
+ // Is it the flow mapping start indicator?
+ fetchFlowMappingStart();
+ return;
+ case ']':
+ // Is it the flow sequence end indicator?
+ fetchFlowSequenceEnd();
+ return;
+ case '}':
+ // Is it the flow mapping end indicator?
+ fetchFlowMappingEnd();
+ return;
+ case ',':
+ // Is it the flow entry indicator?
+ fetchFlowEntry();
+ return;
+ // see block entry indicator above
+ case '?':
+ // Is it the key indicator?
+ if (checkKey()) {
+ fetchKey();
+ return;
+ }
+ break;
+ case ':':
+ // Is it the value indicator?
+ if (checkValue()) {
+ fetchValue();
+ return;
+ }
+ break;
+ case '*':
+ // Is it an alias?
+ fetchAlias();
+ return;
+ case '&':
+ // Is it an anchor?
+ fetchAnchor();
+ return;
+ case '!':
+ // Is it a tag?
+ fetchTag();
+ return;
+ case '|':
+ // Is it a literal scalar?
+ if (this.flowLevel == 0) {
+ fetchLiteral();
+ return;
}
- }
-
- /**
- * Remove the saved possible key position at the current flow level.
- */
- private void removePossibleSimpleKey() {
- SimpleKey key = possibleSimpleKeys.remove(flowLevel);
- if (key != null && key.isRequired()) {
+ break;
+ case '>':
+ // Is it a folded scalar?
+ if (this.flowLevel == 0) {
+ fetchFolded();
+ return;
+ }
+ break;
+ case '\'':
+ // Is it a single quoted scalar?
+ fetchSingle();
+ return;
+ case '"':
+ // Is it a double quoted scalar?
+ fetchDouble();
+ return;
+ }
+ // It must be a plain scalar then.
+ if (checkPlain()) {
+ fetchPlain();
+ return;
+ }
+ // No? It's an error. Let's produce a nice error message.We do this by
+ // converting escaped characters into their escape sequences. This is a
+ // backwards use of the ESCAPE_REPLACEMENTS map.
+ String chRepresentation = escapeChar(String.valueOf(Character.toChars(c)));
+ if (c == '\t') {
+ chRepresentation += "(TAB)";
+ }
+ String text = String.format(
+ "found character '%s' that cannot start any token. (Do not use %s for indentation)",
+ chRepresentation, chRepresentation);
+ throw new ScannerException("while scanning for the next token", null, text, reader.getMark());
+ }
+
+ /**
+ * This is implemented in CharConstants in SnakeYAML Engine
+ */
+ private String escapeChar(String chRepresentation) {
+ for (Character s : ESCAPE_REPLACEMENTS.keySet()) {
+ String v = ESCAPE_REPLACEMENTS.get(s);
+ if (v.equals(chRepresentation)) {
+ return "\\" + s;// ' ' -> '\t'
+ }
+ }
+ return chRepresentation;
+ }
+
+ // Simple keys treatment.
+
+ /**
+ * Return the number of the nearest possible simple key. Actually we don't need to loop through
+ * the whole dictionary.
+ */
+ private int nextPossibleSimpleKey() {
+ /*
+ * the implementation is not as in PyYAML. Because this.possibleSimpleKeys is ordered we can
+ * simply take the first key
+ */
+ if (!this.possibleSimpleKeys.isEmpty()) {
+ return this.possibleSimpleKeys.values().iterator().next().getTokenNumber();
+ }
+ return -1;
+ }
+
+ /**
+ * <pre>
+ * Remove entries that are no longer possible simple keys. According to
+ * the YAML specification, simple keys
+ * - should be limited to a single line,
+ * - should be no longer than 1024 characters.
+ * Disabling this procedure will allow simple keys of any length and
+ * height (may cause problems if indentation is broken though).
+ * </pre>
+ */
+ private void stalePossibleSimpleKeys() {
+ if (!this.possibleSimpleKeys.isEmpty()) {
+ for (Iterator<SimpleKey> iterator = this.possibleSimpleKeys.values().iterator(); iterator
+ .hasNext();) {
+ SimpleKey key = iterator.next();
+ if ((key.getLine() != reader.getLine()) || (reader.getIndex() - key.getIndex() > 1024)) {
+ // If the key is not on the same line as the current
+ // position OR the difference in column between the token
+ // start and the current position is more than the maximum
+ // simple key length, then this cannot be a simple key.
+ if (key.isRequired()) {
+ // If the key was required, this implies an error
+ // condition.
throw new ScannerException("while scanning a simple key", key.getMark(),
- "could not find expected ':'", reader.getMark());
- }
- }
-
- // Indentation functions.
-
- /**
- * * Handle implicitly ending multiple levels of block nodes by decreased
- * indentation. This function becomes important on lines 4 and 7 of this
- * example:
- *
- * <pre>
- * 1) book one:
- * 2) part one:
- * 3) chapter one
- * 4) part two:
- * 5) chapter one
- * 6) chapter two
- * 7) book two:
- * </pre>
- *
- * In flow context, tokens should respect indentation. Actually the
- * condition should be `self.indent &gt;= column` according to the spec. But
- * this condition will prohibit intuitively correct constructions such as
- * key : { } </pre>
- */
- private void unwindIndent(int col) {
- // In the flow context, indentation is ignored. We make the scanner less
- // restrictive then specification requires.
- if (this.flowLevel != 0) {
- return;
- }
-
- // In block context, we may need to issue the BLOCK-END tokens.
- while (this.indent > col) {
- Mark mark = reader.getMark();
- this.indent = this.indents.pop();
- this.tokens.add(new BlockEndToken(mark, mark));
- }
- }
-
- /**
- * Check if we need to increase indentation.
- */
- private boolean addIndent(int column) {
- if (this.indent < column) {
- this.indents.push(this.indent);
- this.indent = column;
- return true;
- }
- return false;
- }
-
- // Fetchers.
-
- /**
- * We always add STREAM-START as the first token and STREAM-END as the last
- * token.
- */
- private void fetchStreamStart() {
- // Read the token.
+ "could not find expected ':'", reader.getMark());
+ }
+ iterator.remove();
+ }
+ }
+ }
+ }
+
+ /**
+ * The next token may start a simple key. We check if it's possible and save its position. This
+ * function is called for ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
+ */
+ private void savePossibleSimpleKey() {
+ // The next token may start a simple key. We check if it's possible
+ // and save its position. This function is called for
+ // ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
+
+ // Check if a simple key is required at the current position.
+ // A simple key is required if this position is the root flowLevel, AND
+ // the current indentation level is the same as the last indent-level.
+ boolean required = (this.flowLevel == 0) && (this.indent == this.reader.getColumn());
+
+ if (allowSimpleKey || !required) {
+ // A simple key is required only if it is the first token in the
+ // current line. Therefore it is always allowed.
+ } else {
+ throw new YAMLException(
+ "A simple key is required only if it is the first token in the current line");
+ }
+
+ // The next token might be a simple key. Let's save it's number and
+ // position.
+ if (this.allowSimpleKey) {
+ removePossibleSimpleKey();
+ int tokenNumber = this.tokensTaken + this.tokens.size();
+ SimpleKey key = new SimpleKey(tokenNumber, required, reader.getIndex(), reader.getLine(),
+ this.reader.getColumn(), this.reader.getMark());
+ this.possibleSimpleKeys.put(this.flowLevel, key);
+ }
+ }
+
+ /**
+ * Remove the saved possible key position at the current flow level.
+ */
+ private void removePossibleSimpleKey() {
+ SimpleKey key = possibleSimpleKeys.remove(flowLevel);
+ if (key != null && key.isRequired()) {
+ throw new ScannerException("while scanning a simple key", key.getMark(),
+ "could not find expected ':'", reader.getMark());
+ }
+ }
+
+ // Indentation functions.
+
+ /**
+ * * Handle implicitly ending multiple levels of block nodes by decreased indentation. This
+ * function becomes important on lines 4 and 7 of this example:
+ *
+ * <pre>
+ * 1) book one:
+ * 2) part one:
+ * 3) chapter one
+ * 4) part two:
+ * 5) chapter one
+ * 6) chapter two
+ * 7) book two:
+ * </pre>
+ *
+ * In flow context, tokens should respect indentation. Actually the condition should be
+ * `self.indent &gt;= column` according to the spec. But this condition will prohibit intuitively
+ * correct constructions such as key : { }
+ * </pre>
+ */
+ private void unwindIndent(int col) {
+ // In the flow context, indentation is ignored. We make the scanner less
+ // restrictive than specification requires.
+ if (this.flowLevel != 0) {
+ return;
+ }
+
+ // In block context, we may need to issue the BLOCK-END tokens.
+ while (this.indent > col) {
+ Mark mark = reader.getMark();
+ this.indent = this.indents.pop();
+ addToken(new BlockEndToken(mark, mark));
+ }
+ }
+
+ /**
+ * Check if we need to increase indentation.
+ */
+ private boolean addIndent(int column) {
+ if (this.indent < column) {
+ this.indents.push(this.indent);
+ this.indent = column;
+ return true;
+ }
+ return false;
+ }
+
+ // Fetchers.
+
+ /**
+ * We always add STREAM-START as the first token and STREAM-END as the last token.
+ */
+ private void fetchStreamStart() {
+ // Read the token.
+ Mark mark = reader.getMark();
+
+ // Add STREAM-START.
+ Token token = new StreamStartToken(mark, mark);
+ addToken(token);
+ }
+
+ private void fetchStreamEnd() {
+ // Set the current indentation to -1.
+ unwindIndent(-1);
+
+ // Reset simple keys.
+ removePossibleSimpleKey();
+ this.allowSimpleKey = false;
+ this.possibleSimpleKeys.clear();
+
+ // Read the token.
+ Mark mark = reader.getMark();
+
+ // Add STREAM-END.
+ Token token = new StreamEndToken(mark, mark);
+ addToken(token);
+
+ // The stream is finished.
+ this.done = true;
+ }
+
+ /**
+ * Fetch a YAML directive. Directives are presentation details that are interpreted as
+ * instructions to the processor. YAML defines two kinds of directives, YAML and TAG; all other
+ * types are reserved for future use.
+ *
+ * @see <a href="http://www.yaml.org/spec/1.1/#id864824">3.2.3.4. Directives</a>
+ */
+ private void fetchDirective() {
+ // Set the current indentation to -1.
+ unwindIndent(-1);
+
+ // Reset simple keys.
+ removePossibleSimpleKey();
+ this.allowSimpleKey = false;
+
+ // Scan and add DIRECTIVE.
+ List<Token> tok = scanDirective();
+ addAllTokens(tok);
+ }
+
+ /**
+ * Fetch a document-start token ("---").
+ */
+ private void fetchDocumentStart() {
+ fetchDocumentIndicator(true);
+ }
+
+ /**
+ * Fetch a document-end token ("...").
+ */
+ private void fetchDocumentEnd() {
+ fetchDocumentIndicator(false);
+ }
+
+ /**
+ * Fetch a document indicator, either "---" for "document-start", or else "..." for "document-end.
+ * The type is chosen by the given boolean.
+ */
+ private void fetchDocumentIndicator(boolean isDocumentStart) {
+ // Set the current indentation to -1.
+ unwindIndent(-1);
+
+ // Reset simple keys. Note that there could not be a block collection
+ // after '---'.
+ removePossibleSimpleKey();
+ this.allowSimpleKey = false;
+
+ // Add DOCUMENT-START or DOCUMENT-END.
+ Mark startMark = reader.getMark();
+ reader.forward(3);
+ Mark endMark = reader.getMark();
+ Token token;
+ if (isDocumentStart) {
+ token = new DocumentStartToken(startMark, endMark);
+ } else {
+ token = new DocumentEndToken(startMark, endMark);
+ }
+ addToken(token);
+ }
+
+ private void fetchFlowSequenceStart() {
+ fetchFlowCollectionStart(false);
+ }
+
+ private void fetchFlowMappingStart() {
+ fetchFlowCollectionStart(true);
+ }
+
+ /**
+ * Fetch a flow-style collection start, which is either a sequence or a mapping. The type is
+ * determined by the given boolean.
+ *
+ * A flow-style collection is in a format similar to JSON. Sequences are started by '[' and ended
+ * by ']'; mappings are started by '{' and ended by '}'.
+ *
+ * @see <a href="http://www.yaml.org/spec/1.1/#id863975">3.2.3.1. Node Styles</a>
+ *
+ * @param isMappingStart
+ */
+ private void fetchFlowCollectionStart(boolean isMappingStart) {
+ // '[' and '{' may start a simple key.
+ savePossibleSimpleKey();
+
+ // Increase the flow level.
+ this.flowLevel++;
+
+ // Simple keys are allowed after '[' and '{'.
+ this.allowSimpleKey = true;
+
+ // Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
+ Mark startMark = reader.getMark();
+ reader.forward(1);
+ Mark endMark = reader.getMark();
+ Token token;
+ if (isMappingStart) {
+ token = new FlowMappingStartToken(startMark, endMark);
+ } else {
+ token = new FlowSequenceStartToken(startMark, endMark);
+ }
+ addToken(token);
+ }
+
+ private void fetchFlowSequenceEnd() {
+ fetchFlowCollectionEnd(false);
+ }
+
+ private void fetchFlowMappingEnd() {
+ fetchFlowCollectionEnd(true);
+ }
+
+ /**
+ * Fetch a flow-style collection end, which is either a sequence or a mapping. The type is
+ * determined by the given boolean.
+ *
+ * A flow-style collection is in a format similar to JSON. Sequences are started by '[' and ended
+ * by ']'; mappings are started by '{' and ended by '}'.
+ *
+ * @see <a href="http://www.yaml.org/spec/1.1/#id863975">3.2.3.1. Node Styles</a>
+ */
+ private void fetchFlowCollectionEnd(boolean isMappingEnd) {
+ // Reset possible simple key on the current level.
+ removePossibleSimpleKey();
+
+ // Decrease the flow level.
+ this.flowLevel--;
+
+ // No simple keys after ']' or '}'.
+ this.allowSimpleKey = false;
+
+ // Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
+ Mark startMark = reader.getMark();
+ reader.forward();
+ Mark endMark = reader.getMark();
+ Token token;
+ if (isMappingEnd) {
+ token = new FlowMappingEndToken(startMark, endMark);
+ } else {
+ token = new FlowSequenceEndToken(startMark, endMark);
+ }
+ addToken(token);
+ }
+
+ /**
+ * Fetch an entry in the flow style. Flow-style entries occur either immediately after the start
+ * of a collection, or else after a comma.
+ *
+ * @see <a href="http://www.yaml.org/spec/1.1/#id863975">3.2.3.1. Node Styles</a>
+ */
+ private void fetchFlowEntry() {
+ // Simple keys are allowed after ','.
+ this.allowSimpleKey = true;
+
+ // Reset possible simple key on the current level.
+ removePossibleSimpleKey();
+
+ // Add FLOW-ENTRY.
+ Mark startMark = reader.getMark();
+ reader.forward();
+ Mark endMark = reader.getMark();
+ Token token = new FlowEntryToken(startMark, endMark);
+ addToken(token);
+ }
+
+ /**
+ * Fetch an entry in the block style.
+ *
+ * @see <a href="http://www.yaml.org/spec/1.1/#id863975">3.2.3.1. Node Styles</a>
+ */
+ private void fetchBlockEntry() {
+ // Block context needs additional checks.
+ if (this.flowLevel == 0) {
+ // Are we allowed to start a new entry?
+ if (!this.allowSimpleKey) {
+ throw new ScannerException(null, null, "sequence entries are not allowed here",
+ reader.getMark());
+ }
+
+ // We may need to add BLOCK-SEQUENCE-START.
+ if (addIndent(this.reader.getColumn())) {
Mark mark = reader.getMark();
-
- // Add STREAM-START.
- Token token = new StreamStartToken(mark, mark);
- this.tokens.add(token);
- }
-
- private void fetchStreamEnd() {
- // Set the current intendation to -1.
- unwindIndent(-1);
-
- // Reset simple keys.
- removePossibleSimpleKey();
- this.allowSimpleKey = false;
- this.possibleSimpleKeys.clear();
-
- // Read the token.
+ addToken(new BlockSequenceStartToken(mark, mark));
+ }
+ } else {
+ // It's an error for the block entry to occur in the flow
+ // context,but we let the parser detect this.
+ }
+ // Simple keys are allowed after '-'.
+ this.allowSimpleKey = true;
+
+ // Reset possible simple key on the current level.
+ removePossibleSimpleKey();
+
+ // Add BLOCK-ENTRY.
+ Mark startMark = reader.getMark();
+ reader.forward();
+ Mark endMark = reader.getMark();
+ Token token = new BlockEntryToken(startMark, endMark);
+ addToken(token);
+ }
+
+ /**
+ * Fetch a key in a block-style mapping.
+ *
+ * @see <a href="http://www.yaml.org/spec/1.1/#id863975">3.2.3.1. Node Styles</a>
+ */
+ private void fetchKey() {
+ // Block context needs additional checks.
+ if (this.flowLevel == 0) {
+ // Are we allowed to start a key (not necessary a simple)?
+ if (!this.allowSimpleKey) {
+ throw new ScannerException(null, null, "mapping keys are not allowed here",
+ reader.getMark());
+ }
+ // We may need to add BLOCK-MAPPING-START.
+ if (addIndent(this.reader.getColumn())) {
Mark mark = reader.getMark();
-
- // Add STREAM-END.
- Token token = new StreamEndToken(mark, mark);
- this.tokens.add(token);
-
- // The stream is finished.
- this.done = true;
- }
-
- /**
- * Fetch a YAML directive. Directives are presentation details that are
- * interpreted as instructions to the processor. YAML defines two kinds of
- * directives, YAML and TAG; all other types are reserved for future use.
- *
- * @see <a href="http://www.yaml.org/spec/1.1/#id864824"></a>
- */
- private void fetchDirective() {
- // Set the current intendation to -1.
- unwindIndent(-1);
-
- // Reset simple keys.
- removePossibleSimpleKey();
- this.allowSimpleKey = false;
-
- // Scan and add DIRECTIVE.
- Token tok = scanDirective();
- this.tokens.add(tok);
- }
-
- /**
- * Fetch a document-start token ("---").
- */
- private void fetchDocumentStart() {
- fetchDocumentIndicator(true);
- }
-
- /**
- * Fetch a document-end token ("...").
- */
- private void fetchDocumentEnd() {
- fetchDocumentIndicator(false);
- }
-
- /**
- * Fetch a document indicator, either "---" for "document-start", or else
- * "..." for "document-end. The type is chosen by the given boolean.
- */
- private void fetchDocumentIndicator(boolean isDocumentStart) {
- // Set the current intendation to -1.
- unwindIndent(-1);
-
- // Reset simple keys. Note that there could not be a block collection
- // after '---'.
- removePossibleSimpleKey();
- this.allowSimpleKey = false;
-
- // Add DOCUMENT-START or DOCUMENT-END.
- Mark startMark = reader.getMark();
- reader.forward(3);
- Mark endMark = reader.getMark();
- Token token;
- if (isDocumentStart) {
- token = new DocumentStartToken(startMark, endMark);
- } else {
- token = new DocumentEndToken(startMark, endMark);
- }
- this.tokens.add(token);
- }
-
- private void fetchFlowSequenceStart() {
- fetchFlowCollectionStart(false);
- }
-
- private void fetchFlowMappingStart() {
- fetchFlowCollectionStart(true);
- }
-
+ addToken(new BlockMappingStartToken(mark, mark));
+ }
+ }
+ // Simple keys are allowed after '?' in the block context.
+ this.allowSimpleKey = this.flowLevel == 0;
+
+ // Reset possible simple key on the current level.
+ removePossibleSimpleKey();
+
+ // Add KEY.
+ Mark startMark = reader.getMark();
+ reader.forward();
+ Mark endMark = reader.getMark();
+ Token token = new KeyToken(startMark, endMark);
+ addToken(token);
+ }
+
+ /**
+ * Fetch a value in a block-style mapping.
+ *
+ * @see <a href="http://www.yaml.org/spec/1.1/#id863975">3.2.3.1. Node Styles</a>
+ */
+ private void fetchValue() {
+ // Do we determine a simple key?
+ SimpleKey key = this.possibleSimpleKeys.remove(this.flowLevel);
+ if (key != null) {
+ // Add KEY.
+ addToken(key.getTokenNumber() - this.tokensTaken, new KeyToken(key.getMark(), key.getMark()));
+
+ // If this key starts a new block mapping, we need to add
+ // BLOCK-MAPPING-START.
+ if (this.flowLevel == 0) {
+ if (addIndent(key.getColumn())) {
+ addToken(key.getTokenNumber() - this.tokensTaken,
+ new BlockMappingStartToken(key.getMark(), key.getMark()));
+ }
+ }
+ // There cannot be two simple keys one after another.
+ this.allowSimpleKey = false;
+
+ } else {
+ // It must be a part of a complex key.
+ // Block context needs additional checks. Do we really need them?
+ // They will be caught by the parser anyway.
+ if (this.flowLevel == 0) {
+
+ // We are allowed to start a complex value if and only if we can
+ // start a simple key.
+ if (!this.allowSimpleKey) {
+ throw new ScannerException(null, null, "mapping values are not allowed here",
+ reader.getMark());
+ }
+ }
+
+ // If this value starts a new block mapping, we need to add
+ // BLOCK-MAPPING-START. It will be detected as an error later by
+ // the parser.
+ if (flowLevel == 0) {
+ if (addIndent(reader.getColumn())) {
+ Mark mark = reader.getMark();
+ addToken(new BlockMappingStartToken(mark, mark));
+ }
+ }
+
+ // Simple keys are allowed after ':' in the block context.
+ allowSimpleKey = flowLevel == 0;
+
+ // Reset possible simple key on the current level.
+ removePossibleSimpleKey();
+ }
+ // Add VALUE.
+ Mark startMark = reader.getMark();
+ reader.forward();
+ Mark endMark = reader.getMark();
+ Token token = new ValueToken(startMark, endMark);
+ addToken(token);
+ }
+
+ /**
+ * Fetch an alias, which is a reference to an anchor. Aliases take the format:
+ *
+ * <pre>
+ * *(anchor name)
+ * </pre>
+ *
+ * @see <a href="http://www.yaml.org/spec/1.1/#id863390">3.2.2.2. Anchors and Aliases</a>
+ */
+ private void fetchAlias() {
+ // ALIAS could be a simple key.
+ savePossibleSimpleKey();
+
+ // No simple keys after ALIAS.
+ this.allowSimpleKey = false;
+
+ // Scan and add ALIAS.
+ Token tok = scanAnchor(false);
+ addToken(tok);
+ }
+
+ /**
+ * Fetch an anchor. Anchors take the form:
+ *
+ * <pre>
+ * &(anchor name)
+ * </pre>
+ *
+ * @see <a href="http://www.yaml.org/spec/1.1/#id863390">3.2.2.2. Anchors and Aliases</a>
+ */
+ private void fetchAnchor() {
+ // ANCHOR could start a simple key.
+ savePossibleSimpleKey();
+
+ // No simple keys after ANCHOR.
+ this.allowSimpleKey = false;
+
+ // Scan and add ANCHOR.
+ Token tok = scanAnchor(true);
+ addToken(tok);
+ }
+
+ /**
+ * Fetch a tag. Tags take a complex form.
+ *
+ * @see <a href="http://www.yaml.org/spec/1.1/#id861700">3.2.1.2. Tags</a>
+ */
+ private void fetchTag() {
+ // TAG could start a simple key.
+ savePossibleSimpleKey();
+
+ // No simple keys after TAG.
+ this.allowSimpleKey = false;
+
+ // Scan and add TAG.
+ Token tok = scanTag();
+ addToken(tok);
+ }
+
+ /**
+ * Fetch a literal scalar, denoted with a vertical-bar. This is the type best used for source code
+ * and other content, such as binary data, which must be included verbatim.
+ *
+ * @see <a href="http://www.yaml.org/spec/1.1/#id863975">3.2.3.1. Node Styles</a>
+ */
+ private void fetchLiteral() {
+ fetchBlockScalar('|');
+ }
+
+ /**
+ * Fetch a folded scalar, denoted with a greater-than sign. This is the type best used for long
+ * content, such as the text of a chapter or description.
+ *
+ * @see <a href="http://www.yaml.org/spec/1.1/#id863975">3.2.3.1. Node Styles</a>
+ */
+ private void fetchFolded() {
+ fetchBlockScalar('>');
+ }
+
+ /**
+ * Fetch a block scalar (literal or folded).
+ *
+ * @see <a href="http://www.yaml.org/spec/1.1/#id863975">3.2.3.1. Node Styles</a>
+ *
+ * @param style
+ */
+ private void fetchBlockScalar(char style) {
+ // A simple key may follow a block scalar.
+ this.allowSimpleKey = true;
+
+ // Reset possible simple key on the current level.
+ removePossibleSimpleKey();
+
+ // Scan and add SCALAR.
+ List<Token> tok = scanBlockScalar(style);
+ addAllTokens(tok);
+ }
+
+ /**
+ * Fetch a single-quoted (') scalar.
+ */
+ private void fetchSingle() {
+ fetchFlowScalar('\'');
+ }
+
+ /**
+ * Fetch a double-quoted (") scalar.
+ */
+ private void fetchDouble() {
+ fetchFlowScalar('"');
+ }
+
+ /**
+ * Fetch a flow scalar (single- or double-quoted).
+ *
+ * @see <a href="http://www.yaml.org/spec/1.1/#id863975">3.2.3.1. Node Styles</a>
+ *
+ * @param style
+ */
+ private void fetchFlowScalar(char style) {
+ // A flow scalar could be a simple key.
+ savePossibleSimpleKey();
+
+ // No simple keys after flow scalars.
+ this.allowSimpleKey = false;
+
+ // Scan and add SCALAR.
+ Token tok = scanFlowScalar(style);
+ addToken(tok);
+ }
+
+ /**
+ * Fetch a plain scalar.
+ */
+ private void fetchPlain() {
+ // A plain scalar could be a simple key.
+ savePossibleSimpleKey();
+
+ // No simple keys after plain scalars. But note that `scan_plain` will
+ // change this flag if the scan is finished at the beginning of the
+ // line.
+ this.allowSimpleKey = false;
+
+ // Scan and add SCALAR. May change `allow_simple_key`.
+ Token tok = scanPlain();
+ addToken(tok);
+ }
+
+ // Checkers.
+
+ /**
+ * Returns true if the next thing on the reader is a directive, given that the leading '%' has
+ * already been checked.
+ *
+ * @see <a href="http://www.yaml.org/spec/1.1/#id864824">3.2.3.4. Directives</a>
+ */
+ private boolean checkDirective() {
+ // DIRECTIVE: ^ '%' ...
+ // The '%' indicator is already checked.
+ return reader.getColumn() == 0;
+ }
+
+ /**
+ * Returns true if the next thing on the reader is a document-start ("---"). A document-start is
+ * always followed immediately by a new line.
+ */
+ private boolean checkDocumentStart() {
+ // DOCUMENT-START: ^ '---' (' '|'\n')
+ if (reader.getColumn() == 0) {
+ return "---".equals(reader.prefix(3)) && Constant.NULL_BL_T_LINEBR.has(reader.peek(3));
+ }
+ return false;
+ }
+
+ /**
+ * Returns true if the next thing on the reader is a document-end ("..."). A document-end is
+ * always followed immediately by a new line.
+ */
+ private boolean checkDocumentEnd() {
+ // DOCUMENT-END: ^ '...' (' '|'\n')
+ if (reader.getColumn() == 0) {
+ return "...".equals(reader.prefix(3)) && Constant.NULL_BL_T_LINEBR.has(reader.peek(3));
+ }
+ return false;
+ }
+
+ /**
+ * Returns true if the next thing on the reader is a block token.
+ */
+ private boolean checkBlockEntry() {
+ // BLOCK-ENTRY: '-' (' '|'\n')
+ return Constant.NULL_BL_T_LINEBR.has(reader.peek(1));
+ }
+
+ /**
+ * Returns true if the next thing on the reader is a key token.
+ */
+ private boolean checkKey() {
+ // KEY(flow context): '?'
+ if (this.flowLevel != 0) {
+ return true;
+ } else {
+ // KEY(block context): '?' (' '|'\n')
+ return Constant.NULL_BL_T_LINEBR.has(reader.peek(1));
+ }
+ }
+
+ /**
+ * Returns true if the next thing on the reader is a value token.
+ */
+ private boolean checkValue() {
+ // VALUE(flow context): ':'
+ if (flowLevel != 0) {
+ return true;
+ } else {
+ // VALUE(block context): ':' (' '|'\n')
+ return Constant.NULL_BL_T_LINEBR.has(reader.peek(1));
+ }
+ }
+
+ /**
+ * Returns true if the next thing on the reader is a plain token.
+ */
+ private boolean checkPlain() {
/**
- * Fetch a flow-style collection start, which is either a sequence or a
- * mapping. The type is determined by the given boolean.
- *
- * A flow-style collection is in a format similar to JSON. Sequences are
- * started by '[' and ended by ']'; mappings are started by '{' and ended by
- * '}'.
- *
- * @see <a href="http://www.yaml.org/spec/1.1/#id863975"></a>
- *
- * @param isMappingStart
+ * <pre>
+ * A plain scalar may start with any non-space character except:
+ * '-', '?', ':', ',', '[', ']', '{', '}',
+ * '#', '&amp;', '*', '!', '|', '&gt;', '\'', '\&quot;',
+ * '%', '@', '`'.
+ *
+ * It may also start with
+ * '-', '?', ':'
+ * if it is followed by a non-space character.
+ *
+ * Note that we limit the last rule to the block context (except the
+ * '-' character) because we want the flow context to be space
+ * independent.
+ * </pre>
*/
- private void fetchFlowCollectionStart(boolean isMappingStart) {
- // '[' and '{' may start a simple key.
- savePossibleSimpleKey();
-
- // Increase the flow level.
- this.flowLevel++;
-
- // Simple keys are allowed after '[' and '{'.
- this.allowSimpleKey = true;
-
- // Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
- Mark startMark = reader.getMark();
- reader.forward(1);
- Mark endMark = reader.getMark();
- Token token;
- if (isMappingStart) {
- token = new FlowMappingStartToken(startMark, endMark);
+ int c = reader.peek();
+ // If the next char is NOT one of the forbidden chars above or
+ // whitespace, then this is the start of a plain scalar.
+ return Constant.NULL_BL_T_LINEBR.hasNo(c, "-?:,[]{}#&*!|>'\"%@`")
+ || (Constant.NULL_BL_T_LINEBR.hasNo(reader.peek(1))
+ && (c == '-' || (this.flowLevel == 0 && "?:".indexOf(c) != -1)));
+ }
+
+ // Scanners.
+
+ /**
+ * <pre>
+ * We ignore spaces, line breaks and comments.
+ * If we find a line break in the block context, we set the flag
+ * `allow_simple_key` on.
+ * The byte order mark is stripped if it's the first character in the
+ * stream. We do not yet support BOM inside the stream as the
+ * specification requires. Any such mark will be considered as a part
+ * of the document.
+ * TODO: We need to make tab handling rules more sane. A good rule is
+ * Tabs cannot precede tokens
+ * BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
+ * KEY(block), VALUE(block), BLOCK-ENTRY
+ * So the checking code is
+ * if &lt;TAB&gt;:
+ * self.allow_simple_keys = False
+ * We also need to add the check for `allow_simple_keys == True` to
+ * `unwind_indent` before issuing BLOCK-END.
+ * Scanners for block, flow, and plain scalars need to be modified.
+ * </pre>
+ */
+ private void scanToNextToken() {
+ // If there is a byte order mark (BOM) at the beginning of the stream,
+ // forward past it.
+ if (reader.getIndex() == 0 && reader.peek() == 0xFEFF) {
+ reader.forward();
+ }
+ boolean found = false;
+ int inlineStartColumn = -1;
+ while (!found) {
+ Mark startMark = reader.getMark();
+ int columnBeforeComment = reader.getColumn();
+ boolean commentSeen = false;
+ int ff = 0;
+ // Peek ahead until we find the first non-space character, then
+ // move forward directly to that character.
+ while (reader.peek(ff) == ' ') {
+ ff++;
+ }
+ if (ff > 0) {
+ reader.forward(ff);
+ }
+ // If the character we have skipped forward to is a comment (#),
+ // then peek ahead until we find the next end of line. YAML
+ // comments are from a # to the next new-line. We then forward
+ // past the comment.
+ if (reader.peek() == '#') {
+ commentSeen = true;
+ CommentType type;
+ if (columnBeforeComment != 0
+ && !(lastToken != null && lastToken.getTokenId() == Token.ID.BlockEntry)) {
+ type = CommentType.IN_LINE;
+ inlineStartColumn = reader.getColumn();
+ } else if (inlineStartColumn == reader.getColumn()) {
+ type = CommentType.IN_LINE;
} else {
- token = new FlowSequenceStartToken(startMark, endMark);
+ inlineStartColumn = -1;
+ type = CommentType.BLOCK;
+ }
+ CommentToken token = scanComment(type);
+ if (parseComments) {
+ addToken(token);
+ }
+ }
+ // If we scanned a line break, then (depending on flow level),
+ // simple keys may be allowed.
+ String breaks = scanLineBreak();
+ if (breaks.length() != 0) {// found a line-break
+ if (parseComments && !commentSeen) {
+ if (columnBeforeComment == 0) {
+ Mark endMark = reader.getMark();
+ addToken(new CommentToken(CommentType.BLANK_LINE, breaks, startMark, endMark));
+ }
}
- this.tokens.add(token);
- }
-
- private void fetchFlowSequenceEnd() {
- fetchFlowCollectionEnd(false);
- }
-
- private void fetchFlowMappingEnd() {
- fetchFlowCollectionEnd(true);
- }
-
- /**
- * Fetch a flow-style collection end, which is either a sequence or a
- * mapping. The type is determined by the given boolean.
- *
- * A flow-style collection is in a format similar to JSON. Sequences are
- * started by '[' and ended by ']'; mappings are started by '{' and ended by
- * '}'.
- *
- * @see <a href="http://www.yaml.org/spec/1.1/#id863975"></a>
- */
- private void fetchFlowCollectionEnd(boolean isMappingEnd) {
- // Reset possible simple key on the current level.
- removePossibleSimpleKey();
-
- // Decrease the flow level.
- this.flowLevel--;
-
- // No simple keys after ']' or '}'.
- this.allowSimpleKey = false;
-
- // Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
- Mark startMark = reader.getMark();
+ if (this.flowLevel == 0) {
+ // Simple keys are allowed at flow-level 0 after a line
+ // break
+ this.allowSimpleKey = true;
+ }
+ } else {
+ found = true;
+ }
+ }
+ }
+
+ private CommentToken scanComment(CommentType type) {
+ // See the specification for details.
+ Mark startMark = reader.getMark();
+ reader.forward();
+ int length = 0;
+ while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(length))) {
+ length++;
+ }
+ String value = reader.prefixForward(length);
+ Mark endMark = reader.getMark();
+ return new CommentToken(type, value, startMark, endMark);
+ }
+
+ @SuppressWarnings({"unchecked", "rawtypes"})
+ private List<Token> scanDirective() {
+ // See the specification for details.
+ Mark startMark = reader.getMark();
+ Mark endMark;
+ reader.forward();
+ String name = scanDirectiveName(startMark);
+ List<?> value = null;
+ if ("YAML".equals(name)) {
+ value = scanYamlDirectiveValue(startMark);
+ endMark = reader.getMark();
+ } else if ("TAG".equals(name)) {
+ value = scanTagDirectiveValue(startMark);
+ endMark = reader.getMark();
+ } else {
+ endMark = reader.getMark();
+ int ff = 0;
+ while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(ff))) {
+ ff++;
+ }
+ if (ff > 0) {
+ reader.forward(ff);
+ }
+ }
+ CommentToken commentToken = scanDirectiveIgnoredLine(startMark);
+ DirectiveToken token = new DirectiveToken(name, value, startMark, endMark);
+ return makeTokenList(token, commentToken);
+ }
+
+ /**
+ * Scan a directive name. Directive names are a series of non-space characters.
+ *
+ * @see <a href="http://www.yaml.org/spec/1.1/#id895217">7.1. Directives</a>
+ */
+ private String scanDirectiveName(Mark startMark) {
+ // See the specification for details.
+ int length = 0;
+ // A Directive-name is a sequence of alphanumeric characters
+ // (a-z,A-Z,0-9). We scan until we find something that isn't.
+ // FIXME this disagrees with the specification.
+ int c = reader.peek(length);
+ while (Constant.ALPHA.has(c)) {
+ length++;
+ c = reader.peek(length);
+ }
+ // If the name would be empty, an error occurs.
+ if (length == 0) {
+ final String s = String.valueOf(Character.toChars(c));
+ throw new ScannerException("while scanning a directive", startMark,
+ "expected alphabetic or numeric character, but found " + s + "(" + c + ")",
+ reader.getMark());
+ }
+ String value = reader.prefixForward(length);
+ c = reader.peek();
+ if (Constant.NULL_BL_LINEBR.hasNo(c)) {
+ final String s = String.valueOf(Character.toChars(c));
+ throw new ScannerException("while scanning a directive", startMark,
+ "expected alphabetic or numeric character, but found " + s + "(" + c + ")",
+ reader.getMark());
+ }
+ return value;
+ }
+
+ private List<Integer> scanYamlDirectiveValue(Mark startMark) {
+ // See the specification for details.
+ while (reader.peek() == ' ') {
+ reader.forward();
+ }
+ Integer major = scanYamlDirectiveNumber(startMark);
+ int c = reader.peek();
+ if (c != '.') {
+ final String s = String.valueOf(Character.toChars(c));
+ throw new ScannerException("while scanning a directive", startMark,
+ "expected a digit or '.', but found " + s + "(" + c + ")", reader.getMark());
+ }
+ reader.forward();
+ Integer minor = scanYamlDirectiveNumber(startMark);
+ c = reader.peek();
+ if (Constant.NULL_BL_LINEBR.hasNo(c)) {
+ final String s = String.valueOf(Character.toChars(c));
+ throw new ScannerException("while scanning a directive", startMark,
+ "expected a digit or ' ', but found " + s + "(" + c + ")", reader.getMark());
+ }
+ List<Integer> result = new ArrayList<Integer>(2);
+ result.add(major);
+ result.add(minor);
+ return result;
+ }
+
+ /**
+ * Read a %YAML directive number: this is either the major or the minor part. Stop reading at a
+ * non-digit character (usually either '.' or '\n').
+ *
+ * @see <a href="http://www.yaml.org/spec/1.1/#id895631">7.1.1. “YAML” Directive</a>
+ * @see <a href="http://www.yaml.org/spec/1.1/#ns-dec-digit"></a>
+ */
+ private Integer scanYamlDirectiveNumber(Mark startMark) {
+ // See the specification for details.
+ int c = reader.peek();
+ if (!Character.isDigit(c)) {
+ final String s = String.valueOf(Character.toChars(c));
+ throw new ScannerException("while scanning a directive", startMark,
+ "expected a digit, but found " + s + "(" + (c) + ")", reader.getMark());
+ }
+ int length = 0;
+ while (Character.isDigit(reader.peek(length))) {
+ length++;
+ }
+ Integer value = Integer.parseInt(reader.prefixForward(length));
+ return value;
+ }
+
+ /**
+ * <p>
+ * Read a %TAG directive value:
+ *
+ * <pre>
+ * s-ignored-space+ c-tag-handle s-ignored-space+ ns-tag-prefix s-l-comments
+ * </pre>
+ *
+ * </p>
+ *
+ * @see <a href="http://www.yaml.org/spec/1.1/#id896044">7.1.2. “TAG” Directive</a>
+ */
+ private List<String> scanTagDirectiveValue(Mark startMark) {
+ // See the specification for details.
+ while (reader.peek() == ' ') {
+ reader.forward();
+ }
+ String handle = scanTagDirectiveHandle(startMark);
+ while (reader.peek() == ' ') {
+ reader.forward();
+ }
+ String prefix = scanTagDirectivePrefix(startMark);
+ List<String> result = new ArrayList<String>(2);
+ result.add(handle);
+ result.add(prefix);
+ return result;
+ }
+
+ /**
+ * Scan a %TAG directive's handle. This is YAML's c-tag-handle.
+ *
+ * @see <a href="http://www.yaml.org/spec/1.1/#id896876">7.1.2.2. Tag Handles</a>
+ * @param startMark - beginning of the handle
+ * @return scanned handle
+ */
+ private String scanTagDirectiveHandle(Mark startMark) {
+ // See the specification for details.
+ String value = scanTagHandle("directive", startMark);
+ int c = reader.peek();
+ if (c != ' ') {
+ final String s = String.valueOf(Character.toChars(c));
+ throw new ScannerException("while scanning a directive", startMark,
+ "expected ' ', but found " + s + "(" + c + ")", reader.getMark());
+ }
+ return value;
+ }
+
+ /**
+ * Scan a %TAG directive's prefix. This is YAML's ns-tag-prefix.
+ *
+ * @see <a href="http://www.yaml.org/spec/1.1/#ns-tag-prefix"></a>
+ */
+ private String scanTagDirectivePrefix(Mark startMark) {
+ // See the specification for details.
+ String value = scanTagUri("directive", startMark);
+ int c = reader.peek();
+ if (Constant.NULL_BL_LINEBR.hasNo(c)) {
+ final String s = String.valueOf(Character.toChars(c));
+ throw new ScannerException("while scanning a directive", startMark,
+ "expected ' ', but found " + s + "(" + c + ")", reader.getMark());
+ }
+ return value;
+ }
+
+ private CommentToken scanDirectiveIgnoredLine(Mark startMark) {
+ // See the specification for details.
+ while (reader.peek() == ' ') {
+ reader.forward();
+ }
+ CommentToken commentToken = null;
+ if (reader.peek() == '#') {
+ CommentToken comment = scanComment(CommentType.IN_LINE);
+ if (parseComments) {
+ commentToken = comment;
+ }
+ }
+ int c = reader.peek();
+ String lineBreak = scanLineBreak();
+ if (lineBreak.length() == 0 && c != '\0') {
+ final String s = String.valueOf(Character.toChars(c));
+ throw new ScannerException("while scanning a directive", startMark,
+ "expected a comment or a line break, but found " + s + "(" + c + ")", reader.getMark());
+ }
+ return commentToken;
+ }
+
+ /**
+ * <pre>
+ * The YAML 1.1 specification does not restrict characters for anchors and
+ * aliases. This may lead to problems.
+ * see https://bitbucket.org/snakeyaml/snakeyaml/issues/485/alias-names-are-too-permissive-compared-to
+ * This implementation tries to follow https://github.com/yaml/yaml-spec/blob/master/rfc/RFC-0003.md
+ * </pre>
+ */
+ private Token scanAnchor(boolean isAnchor) {
+ Mark startMark = reader.getMark();
+ int indicator = reader.peek();
+ String name = indicator == '*' ? "alias" : "anchor";
+ reader.forward();
+ int length = 0;
+ int c = reader.peek(length);
+ while (Constant.NULL_BL_T_LINEBR.hasNo(c, ":,[]{}/.*&")) {
+ length++;
+ c = reader.peek(length);
+ }
+ if (length == 0) {
+ final String s = String.valueOf(Character.toChars(c));
+ throw new ScannerException("while scanning an " + name, startMark,
+ "unexpected character found " + s + "(" + c + ")", reader.getMark());
+ }
+ String value = reader.prefixForward(length);
+ c = reader.peek();
+ if (Constant.NULL_BL_T_LINEBR.hasNo(c, "?:,]}%@`")) {
+ final String s = String.valueOf(Character.toChars(c));
+ throw new ScannerException("while scanning an " + name, startMark,
+ "unexpected character found " + s + "(" + c + ")", reader.getMark());
+ }
+ Mark endMark = reader.getMark();
+ Token tok;
+ if (isAnchor) {
+ tok = new AnchorToken(value, startMark, endMark);
+ } else {
+ tok = new AliasToken(value, startMark, endMark);
+ }
+ return tok;
+ }
+
+ /**
+ * <p>
+ * Scan a Tag property. A Tag property may be specified in one of three ways: c-verbatim-tag,
+ * c-ns-shorthand-tag, or c-ns-non-specific-tag
+ * </p>
+ *
+ * <p>
+ * c-verbatim-tag takes the form !&lt;ns-uri-char+&gt; and must be delivered verbatim (as-is) to
+ * the application. In particular, verbatim tags are not subject to tag resolution.
+ * </p>
+ *
+ * <p>
+ * c-ns-shorthand-tag is a valid tag handle followed by a non-empty suffix. If the tag handle is a
+ * c-primary-tag-handle ('!') then the suffix must have all exclamation marks properly URI-escaped
+ * (%21); otherwise, the string will look like a named tag handle: !foo!bar would be interpreted
+ * as (handle="!foo!", suffix="bar").
+ * </p>
+ *
+ * <p>
+ * c-ns-non-specific-tag is always a lone '!'; this is only useful for plain scalars, where its
+ * specification means that the scalar MUST be resolved to have type tag:yaml.org,2002:str.
+ * </p>
+ *
+ * TODO SnakeYaml incorrectly ignores c-ns-non-specific-tag right now.
+ *
+ * @see <a href="http://www.yaml.org/spec/1.1/#id900262">8.2. Node Tags</a>
+ *
+ * TODO Note that this method does not enforce rules about local versus global tags!
+ */
+ private Token scanTag() {
+ // See the specification for details.
+ Mark startMark = reader.getMark();
+ // Determine the type of tag property based on the first character
+ // encountered
+ int c = reader.peek(1);
+ String handle = null;
+ String suffix = null;
+ // Verbatim tag! (c-verbatim-tag)
+ if (c == '<') {
+ // Skip the exclamation mark and &gt;, then read the tag suffix (as
+ // a URI).
+ reader.forward(2);
+ suffix = scanTagUri("tag", startMark);
+ c = reader.peek();
+ if (c != '>') {
+ // If there are any characters between the end of the tag-suffix
+ // URI and the closing &gt;, then an error has occurred.
+ final String s = String.valueOf(Character.toChars(c));
+ throw new ScannerException("while scanning a tag", startMark,
+ "expected '>', but found '" + s + "' (" + c + ")", reader.getMark());
+ }
+ reader.forward();
+ } else if (Constant.NULL_BL_T_LINEBR.has(c)) {
+ // A NUL, blank, tab, or line-break means that this was a
+ // c-ns-non-specific tag.
+ suffix = "!";
+ reader.forward();
+ } else {
+ // Any other character implies c-ns-shorthand-tag type.
+
+ // Look ahead in the stream to determine whether this tag property
+ // is of the form !foo or !foo!bar.
+ int length = 1;
+ boolean useHandle = false;
+ while (Constant.NULL_BL_LINEBR.hasNo(c)) {
+ if (c == '!') {
+ useHandle = true;
+ break;
+ }
+ length++;
+ c = reader.peek(length);
+ }
+ // If we need to use a handle, scan it in; otherwise, the handle is
+ // presumed to be '!'.
+ if (useHandle) {
+ handle = scanTagHandle("tag", startMark);
+ } else {
+ handle = "!";
reader.forward();
- Mark endMark = reader.getMark();
- Token token;
- if (isMappingEnd) {
- token = new FlowMappingEndToken(startMark, endMark);
+ }
+ suffix = scanTagUri("tag", startMark);
+ }
+ c = reader.peek();
+ // Check that the next character is allowed to follow a tag-property;
+ // if it is not, raise the error.
+ if (Constant.NULL_BL_LINEBR.hasNo(c)) {
+ final String s = String.valueOf(Character.toChars(c));
+ throw new ScannerException("while scanning a tag", startMark,
+ "expected ' ', but found '" + s + "' (" + (c) + ")", reader.getMark());
+ }
+ TagTuple value = new TagTuple(handle, suffix);
+ Mark endMark = reader.getMark();
+ return new TagToken(value, startMark, endMark);
+ }
+
+ private List<Token> scanBlockScalar(char style) {
+ // See the specification for details.
+ boolean folded;
+ // Depending on the given style, we determine whether the scalar is
+ // folded ('>') or literal ('|')
+ folded = style == '>';
+ StringBuilder chunks = new StringBuilder();
+ Mark startMark = reader.getMark();
+ // Scan the header.
+ reader.forward();
+ Chomping chompi = scanBlockScalarIndicators(startMark);
+ int increment = chompi.getIncrement();
+ CommentToken commentToken = scanBlockScalarIgnoredLine(startMark);
+
+ // Determine the indentation level and go to the first non-empty line.
+ int minIndent = this.indent + 1;
+ if (minIndent < 1) {
+ minIndent = 1;
+ }
+ String breaks;
+ int maxIndent;
+ int indent;
+ Mark endMark;
+ if (increment == -1) {
+ Object[] brme = scanBlockScalarIndentation();
+ breaks = (String) brme[0];
+ maxIndent = ((Integer) brme[1]).intValue();
+ endMark = (Mark) brme[2];
+ indent = Math.max(minIndent, maxIndent);
+ } else {
+ indent = minIndent + increment - 1;
+ Object[] brme = scanBlockScalarBreaks(indent);
+ breaks = (String) brme[0];
+ endMark = (Mark) brme[1];
+ }
+
+ String lineBreak = "";
+
+ // Scan the inner part of the block scalar.
+ while (this.reader.getColumn() == indent && reader.peek() != '\0') {
+ chunks.append(breaks);
+ boolean leadingNonSpace = " \t".indexOf(reader.peek()) == -1;
+ int length = 0;
+ while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(length))) {
+ length++;
+ }
+ chunks.append(reader.prefixForward(length));
+ lineBreak = scanLineBreak();
+ Object[] brme = scanBlockScalarBreaks(indent);
+ breaks = (String) brme[0];
+ endMark = (Mark) brme[1];
+ if (this.reader.getColumn() == indent && reader.peek() != '\0') {
+
+ // Unfortunately, folding rules are ambiguous.
+ //
+ // This is the folding according to the specification:
+ if (folded && "\n".equals(lineBreak) && leadingNonSpace
+ && " \t".indexOf(reader.peek()) == -1) {
+ if (breaks.length() == 0) {
+ chunks.append(" ");
+ }
} else {
- token = new FlowSequenceEndToken(startMark, endMark);
+ chunks.append(lineBreak);
+ }
+ // Clark Evans's interpretation (also in the spec examples) not
+ // imported from PyYAML
+ } else {
+ break;
+ }
+ }
+ // Chomp the tail.
+ if (chompi.chompTailIsNotFalse()) {
+ chunks.append(lineBreak);
+ }
+ if (chompi.chompTailIsTrue()) {
+ chunks.append(breaks);
+ }
+ // We are done.
+ ScalarToken scalarToken = new ScalarToken(chunks.toString(), false, startMark, endMark,
+ DumperOptions.ScalarStyle.createStyle(style));
+ return makeTokenList(commentToken, scalarToken);
+ }
+
+ /**
+ * Scan a block scalar indicator. The block scalar indicator includes two optional components,
+ * which may appear in either order.
+ *
+ * A block indentation indicator is a non-zero digit describing the indentation level of the block
+ * scalar to follow. This indentation is an additional number of spaces relative to the current
+ * indentation level.
+ *
+ * A block chomping indicator is a + or -, selecting the chomping mode away from the default
+ * (clip) to either -(strip) or +(keep).
+ *
+ * @see <a href="http://www.yaml.org/spec/1.1/#id868988">5.3. Indicator Characters</a>
+ * @see <a href="http://www.yaml.org/spec/1.1/#id927035">9.2.2. Block Indentation Indicator</a>
+ * @see <a href="http://www.yaml.org/spec/1.1/#id927557">9.2.3. Block Chomping Indicator</a>
+ */
+ private Chomping scanBlockScalarIndicators(Mark startMark) {
+ // See the specification for details.
+ Boolean chomping = null;
+ int increment = -1;
+ int c = reader.peek();
+ if (c == '-' || c == '+') {
+ if (c == '+') {
+ chomping = Boolean.TRUE;
+ } else {
+ chomping = Boolean.FALSE;
+ }
+ reader.forward();
+ c = reader.peek();
+ if (Character.isDigit(c)) {
+ final String s = String.valueOf(Character.toChars(c));
+ increment = Integer.parseInt(s);
+ if (increment == 0) {
+ throw new ScannerException("while scanning a block scalar", startMark,
+ "expected indentation indicator in the range 1-9, but found 0", reader.getMark());
}
- this.tokens.add(token);
- }
-
- /**
- * Fetch an entry in the flow style. Flow-style entries occur either
- * immediately after the start of a collection, or else after a comma.
- *
- * @see <a href="http://www.yaml.org/spec/1.1/#id863975"></a>
- */
- private void fetchFlowEntry() {
- // Simple keys are allowed after ','.
- this.allowSimpleKey = true;
-
- // Reset possible simple key on the current level.
- removePossibleSimpleKey();
-
- // Add FLOW-ENTRY.
- Mark startMark = reader.getMark();
reader.forward();
- Mark endMark = reader.getMark();
- Token token = new FlowEntryToken(startMark, endMark);
- this.tokens.add(token);
- }
-
- /**
- * Fetch an entry in the block style.
- *
- * @see <a href="http://www.yaml.org/spec/1.1/#id863975"></a>
- */
- private void fetchBlockEntry() {
- // Block context needs additional checks.
- if (this.flowLevel == 0) {
- // Are we allowed to start a new entry?
- if (!this.allowSimpleKey) {
- throw new ScannerException(null, null, "sequence entries are not allowed here",
- reader.getMark());
- }
-
- // We may need to add BLOCK-SEQUENCE-START.
- if (addIndent(this.reader.getColumn())) {
- Mark mark = reader.getMark();
- this.tokens.add(new BlockSequenceStartToken(mark, mark));
- }
+ }
+ } else if (Character.isDigit(c)) {
+ final String s = String.valueOf(Character.toChars(c));
+ increment = Integer.parseInt(s);
+ if (increment == 0) {
+ throw new ScannerException("while scanning a block scalar", startMark,
+ "expected indentation indicator in the range 1-9, but found 0", reader.getMark());
+ }
+ reader.forward();
+ c = reader.peek();
+ if (c == '-' || c == '+') {
+ if (c == '+') {
+ chomping = Boolean.TRUE;
} else {
- // It's an error for the block entry to occur in the flow
- // context,but we let the parser detect this.
+ chomping = Boolean.FALSE;
}
- // Simple keys are allowed after '-'.
- this.allowSimpleKey = true;
-
- // Reset possible simple key on the current level.
- removePossibleSimpleKey();
-
- // Add BLOCK-ENTRY.
- Mark startMark = reader.getMark();
reader.forward();
- Mark endMark = reader.getMark();
- Token token = new BlockEntryToken(startMark, endMark);
- this.tokens.add(token);
- }
-
- /**
- * Fetch a key in a block-style mapping.
- *
- * @see <a href="http://www.yaml.org/spec/1.1/#id863975"></a>
- */
- private void fetchKey() {
- // Block context needs additional checks.
- if (this.flowLevel == 0) {
- // Are we allowed to start a key (not necessary a simple)?
- if (!this.allowSimpleKey) {
- throw new ScannerException(null, null, "mapping keys are not allowed here",
- reader.getMark());
- }
- // We may need to add BLOCK-MAPPING-START.
- if (addIndent(this.reader.getColumn())) {
- Mark mark = reader.getMark();
- this.tokens.add(new BlockMappingStartToken(mark, mark));
- }
- }
- // Simple keys are allowed after '?' in the block context.
- this.allowSimpleKey = this.flowLevel == 0;
-
- // Reset possible simple key on the current level.
- removePossibleSimpleKey();
-
- // Add KEY.
- Mark startMark = reader.getMark();
+ }
+ }
+ c = reader.peek();
+ if (Constant.NULL_BL_LINEBR.hasNo(c)) {
+ final String s = String.valueOf(Character.toChars(c));
+ throw new ScannerException("while scanning a block scalar", startMark,
+ "expected chomping or indentation indicators, but found " + s + "(" + c + ")",
+ reader.getMark());
+ }
+ return new Chomping(chomping, increment);
+ }
+
+ /**
+ * Scan to the end of the line after a block scalar has been scanned; the only things that are
+ * permitted at this time are comments and spaces.
+ */
+ private CommentToken scanBlockScalarIgnoredLine(Mark startMark) {
+ // See the specification for details.
+
+ // Forward past any number of trailing spaces
+ while (reader.peek() == ' ') {
+ reader.forward();
+ }
+
+ // If a comment occurs, scan to just before the end of line.
+ CommentToken commentToken = null;
+ if (reader.peek() == '#') {
+ commentToken = scanComment(CommentType.IN_LINE);
+ }
+ // If the next character is not a null or line break, an error has
+ // occurred.
+ int c = reader.peek();
+ String lineBreak = scanLineBreak();
+ if (lineBreak.length() == 0 && c != '\0') {
+ final String s = String.valueOf(Character.toChars(c));
+ throw new ScannerException("while scanning a block scalar", startMark,
+ "expected a comment or a line break, but found " + s + "(" + c + ")", reader.getMark());
+ }
+ return commentToken;
+ }
+
+ /**
+ * Scans for the indentation of a block scalar implicitly. This mechanism is used only if the
+ * block did not explicitly state an indentation to be used.
+ *
+ * @see <a href="http://www.yaml.org/spec/1.1/#id927035">9.2.2. Block Indentation Indicator</a>
+ */
+ private Object[] scanBlockScalarIndentation() {
+ // See the specification for details.
+ StringBuilder chunks = new StringBuilder();
+ int maxIndent = 0;
+ Mark endMark = reader.getMark();
+ // Look ahead some number of lines until the first non-blank character
+ // occurs; the determined indentation will be the maximum number of
+ // leading spaces on any of these lines.
+ while (Constant.LINEBR.has(reader.peek(), " \r")) {
+ if (reader.peek() != ' ') {
+ // If the character isn't a space, it must be some kind of
+ // line-break; scan the line break and track it.
+ chunks.append(scanLineBreak());
+ endMark = reader.getMark();
+ } else {
+ // If the character is a space, move forward to the next
+ // character; if we surpass our previous maximum for indent
+ // level, update that too.
reader.forward();
- Mark endMark = reader.getMark();
- Token token = new KeyToken(startMark, endMark);
- this.tokens.add(token);
- }
-
- /**
- * Fetch a value in a block-style mapping.
- *
- * @see <a href="http://www.yaml.org/spec/1.1/#id863975"></a>
- */
- private void fetchValue() {
- // Do we determine a simple key?
- SimpleKey key = this.possibleSimpleKeys.remove(this.flowLevel);
- if (key != null) {
- // Add KEY.
- this.tokens.add(key.getTokenNumber() - this.tokensTaken, new KeyToken(key.getMark(),
- key.getMark()));
-
- // If this key starts a new block mapping, we need to add
- // BLOCK-MAPPING-START.
- if (this.flowLevel == 0) {
- if (addIndent(key.getColumn())) {
- this.tokens.add(key.getTokenNumber() - this.tokensTaken,
- new BlockMappingStartToken(key.getMark(), key.getMark()));
- }
- }
- // There cannot be two simple keys one after another.
- this.allowSimpleKey = false;
-
- } else {
- // It must be a part of a complex key.
- // Block context needs additional checks. Do we really need them?
- // They will be caught by the parser anyway.
- if (this.flowLevel == 0) {
-
- // We are allowed to start a complex value if and only if we can
- // start a simple key.
- if (!this.allowSimpleKey) {
- throw new ScannerException(null, null, "mapping values are not allowed here",
- reader.getMark());
- }
- }
-
- // If this value starts a new block mapping, we need to add
- // BLOCK-MAPPING-START. It will be detected as an error later by
- // the parser.
- if (flowLevel == 0) {
- if (addIndent(reader.getColumn())) {
- Mark mark = reader.getMark();
- this.tokens.add(new BlockMappingStartToken(mark, mark));
- }
- }
-
- // Simple keys are allowed after ':' in the block context.
- allowSimpleKey = flowLevel == 0;
-
- // Reset possible simple key on the current level.
- removePossibleSimpleKey();
- }
- // Add VALUE.
- Mark startMark = reader.getMark();
- reader.forward();
- Mark endMark = reader.getMark();
- Token token = new ValueToken(startMark, endMark);
- this.tokens.add(token);
- }
-
- /**
- * Fetch an alias, which is a reference to an anchor. Aliases take the
- * format:
- *
- * <pre>
- * *(anchor name)
- * </pre>
- *
- * @see <a href="http://www.yaml.org/spec/1.1/#id863390"></a>
- */
- private void fetchAlias() {
- // ALIAS could be a simple key.
- savePossibleSimpleKey();
-
- // No simple keys after ALIAS.
- this.allowSimpleKey = false;
-
- // Scan and add ALIAS.
- Token tok = scanAnchor(false);
- this.tokens.add(tok);
- }
-
- /**
- * Fetch an anchor. Anchors take the form:
- *
- * <pre>
- * &(anchor name)
- * </pre>
- *
- * @see <a href="http://www.yaml.org/spec/1.1/#id863390"></a>
- */
- private void fetchAnchor() {
- // ANCHOR could start a simple key.
- savePossibleSimpleKey();
-
- // No simple keys after ANCHOR.
- this.allowSimpleKey = false;
-
- // Scan and add ANCHOR.
- Token tok = scanAnchor(true);
- this.tokens.add(tok);
- }
-
- /**
- * Fetch a tag. Tags take a complex form.
- *
- * @see <a href="http://www.yaml.org/spec/1.1/#id861700"></a>
- */
- private void fetchTag() {
- // TAG could start a simple key.
- savePossibleSimpleKey();
-
- // No simple keys after TAG.
- this.allowSimpleKey = false;
-
- // Scan and add TAG.
- Token tok = scanTag();
- this.tokens.add(tok);
- }
-
- /**
- * Fetch a literal scalar, denoted with a vertical-bar. This is the type
- * best used for source code and other content, such as binary data, which
- * must be included verbatim.
- *
- * @see <a href="http://www.yaml.org/spec/1.1/#id863975"></a>
- */
- private void fetchLiteral() {
- fetchBlockScalar('|');
- }
-
- /**
- * Fetch a folded scalar, denoted with a greater-than sign. This is the type
- * best used for long content, such as the text of a chapter or description.
- *
- * @see <a href="http://www.yaml.org/spec/1.1/#id863975"></a>
- */
- private void fetchFolded() {
- fetchBlockScalar('>');
- }
-
- /**
- * Fetch a block scalar (literal or folded).
- *
- * @see <a href="http://www.yaml.org/spec/1.1/#id863975"></a>
- *
- * @param style
- */
- private void fetchBlockScalar(char style) {
- // A simple key may follow a block scalar.
- this.allowSimpleKey = true;
-
- // Reset possible simple key on the current level.
- removePossibleSimpleKey();
-
- // Scan and add SCALAR.
- Token tok = scanBlockScalar(style);
- this.tokens.add(tok);
- }
-
- /**
- * Fetch a single-quoted (') scalar.
- */
- private void fetchSingle() {
- fetchFlowScalar('\'');
- }
-
- /**
- * Fetch a double-quoted (") scalar.
- */
- private void fetchDouble() {
- fetchFlowScalar('"');
- }
-
- /**
- * Fetch a flow scalar (single- or double-quoted).
- *
- * @see <a href="http://www.yaml.org/spec/1.1/#id863975"></a>
- *
- * @param style
- */
- private void fetchFlowScalar(char style) {
- // A flow scalar could be a simple key.
- savePossibleSimpleKey();
-
- // No simple keys after flow scalars.
- this.allowSimpleKey = false;
-
- // Scan and add SCALAR.
- Token tok = scanFlowScalar(style);
- this.tokens.add(tok);
- }
-
- /**
- * Fetch a plain scalar.
- */
- private void fetchPlain() {
- // A plain scalar could be a simple key.
- savePossibleSimpleKey();
-
- // No simple keys after plain scalars. But note that `scan_plain` will
- // change this flag if the scan is finished at the beginning of the
- // line.
- this.allowSimpleKey = false;
-
- // Scan and add SCALAR. May change `allow_simple_key`.
- Token tok = scanPlain();
- this.tokens.add(tok);
- }
-
- // Checkers.
- /**
- * Returns true if the next thing on the reader is a directive, given that
- * the leading '%' has already been checked.
- *
- * @see <a href="http://www.yaml.org/spec/1.1/#id864824"></a>
- */
- private boolean checkDirective() {
- // DIRECTIVE: ^ '%' ...
- // The '%' indicator is already checked.
- return reader.getColumn() == 0;
- }
-
- /**
- * Returns true if the next thing on the reader is a document-start ("---").
- * A document-start is always followed immediately by a new line.
- */
- private boolean checkDocumentStart() {
- // DOCUMENT-START: ^ '---' (' '|'\n')
- if (reader.getColumn() == 0) {
- if ("---".equals(reader.prefix(3)) && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
- return true;
- }
- }
- return false;
- }
-
- /**
- * Returns true if the next thing on the reader is a document-end ("..."). A
- * document-end is always followed immediately by a new line.
- */
- private boolean checkDocumentEnd() {
- // DOCUMENT-END: ^ '...' (' '|'\n')
- if (reader.getColumn() == 0) {
- if ("...".equals(reader.prefix(3)) && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
- return true;
- }
- }
- return false;
- }
-
- /**
- * Returns true if the next thing on the reader is a block token.
- */
- private boolean checkBlockEntry() {
- // BLOCK-ENTRY: '-' (' '|'\n')
- return Constant.NULL_BL_T_LINEBR.has(reader.peek(1));
- }
-
- /**
- * Returns true if the next thing on the reader is a key token.
- */
- private boolean checkKey() {
- // KEY(flow context): '?'
- if (this.flowLevel != 0) {
- return true;
- } else {
- // KEY(block context): '?' (' '|'\n')
- return Constant.NULL_BL_T_LINEBR.has(reader.peek(1));
- }
- }
-
- /**
- * Returns true if the next thing on the reader is a value token.
- */
- private boolean checkValue() {
- // VALUE(flow context): ':'
- if (flowLevel != 0) {
- return true;
- } else {
- // VALUE(block context): ':' (' '|'\n')
- return Constant.NULL_BL_T_LINEBR.has(reader.peek(1));
- }
- }
-
- /**
- * Returns true if the next thing on the reader is a plain token.
- */
- private boolean checkPlain() {
- /**
- * <pre>
- * A plain scalar may start with any non-space character except:
- * '-', '?', ':', ',', '[', ']', '{', '}',
- * '#', '&amp;', '*', '!', '|', '&gt;', '\'', '\&quot;',
- * '%', '@', '`'.
- *
- * It may also start with
- * '-', '?', ':'
- * if it is followed by a non-space character.
- *
- * Note that we limit the last rule to the block context (except the
- * '-' character) because we want the flow context to be space
- * independent.
- * </pre>
- */
- char ch = reader.peek();
- // If the next char is NOT one of the forbidden chars above or
- // whitespace, then this is the start of a plain scalar.
- return Constant.NULL_BL_T_LINEBR.hasNo(ch, "-?:,[]{}#&*!|>\'\"%@`")
- || (Constant.NULL_BL_T_LINEBR.hasNo(reader.peek(1)) && (ch == '-' || (this.flowLevel == 0 && "?:"
- .indexOf(ch) != -1)));
- }
-
- // Scanners.
-
- /**
- * <pre>
- * We ignore spaces, line breaks and comments.
- * If we find a line break in the block context, we set the flag
- * `allow_simple_key` on.
- * The byte order mark is stripped if it's the first character in the
- * stream. We do not yet support BOM inside the stream as the
- * specification requires. Any such mark will be considered as a part
- * of the document.
- * TODO: We need to make tab handling rules more sane. A good rule is
- * Tabs cannot precede tokens
- * BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
- * KEY(block), VALUE(block), BLOCK-ENTRY
- * So the checking code is
- * if &lt;TAB&gt;:
- * self.allow_simple_keys = False
- * We also need to add the check for `allow_simple_keys == True` to
- * `unwind_indent` before issuing BLOCK-END.
- * Scanners for block, flow, and plain scalars need to be modified.
- * </pre>
- */
- private void scanToNextToken() {
- // If there is a byte order mark (BOM) at the beginning of the stream,
- // forward past it.
- if (reader.getIndex() == 0 && reader.peek() == '\uFEFF') {
- reader.forward();
- }
- boolean found = false;
- while (!found) {
- int ff = 0;
- // Peek ahead until we find the first non-space character, then
- // move forward directly to that character.
- while (reader.peek(ff) == ' ') {
- ff++;
- }
- if (ff > 0) {
- reader.forward(ff);
- }
- // If the character we have skipped forward to is a comment (#),
- // then peek ahead until we find the next end of line. YAML
- // comments are from a # to the next new-line. We then forward
- // past the comment.
- if (reader.peek() == '#') {
- ff = 0;
- while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(ff))) {
- ff++;
- }
- if (ff > 0) {
- reader.forward(ff);
- }
- }
- // If we scanned a line break, then (depending on flow level),
- // simple keys may be allowed.
- if (scanLineBreak().length() != 0) {// found a line-break
- if (this.flowLevel == 0) {
- // Simple keys are allowed at flow-level 0 after a line
- // break
- this.allowSimpleKey = true;
- }
- } else {
- found = true;
- }
- }
- }
-
- @SuppressWarnings({ "unchecked", "rawtypes" })
- private Token scanDirective() {
- // See the specification for details.
- Mark startMark = reader.getMark();
- Mark endMark;
+ if (this.reader.getColumn() > maxIndent) {
+ maxIndent = reader.getColumn();
+ }
+ }
+ }
+ // Pass several results back together.
+ return new Object[] {chunks.toString(), maxIndent, endMark};
+ }
+
+ private Object[] scanBlockScalarBreaks(int indent) {
+ // See the specification for details.
+ StringBuilder chunks = new StringBuilder();
+ Mark endMark = reader.getMark();
+ int col = this.reader.getColumn();
+ // Scan for up to the expected indentation-level of spaces, then move
+ // forward past that amount.
+ while (col < indent && reader.peek() == ' ') {
+ reader.forward();
+ col++;
+ }
+
+ // Consume one or more line breaks followed by any amount of spaces,
+ // until we find something that isn't a line-break.
+ String lineBreak = null;
+ while ((lineBreak = scanLineBreak()).length() != 0) {
+ chunks.append(lineBreak);
+ endMark = reader.getMark();
+ // Scan past up to (indent) spaces on the next line, then forward
+ // past them.
+ col = this.reader.getColumn();
+ while (col < indent && reader.peek() == ' ') {
reader.forward();
- String name = scanDirectiveName(startMark);
- List<?> value = null;
- if ("YAML".equals(name)) {
- value = scanYamlDirectiveValue(startMark);
- endMark = reader.getMark();
- } else if ("TAG".equals(name)) {
- value = scanTagDirectiveValue(startMark);
- endMark = reader.getMark();
- } else {
- endMark = reader.getMark();
- int ff = 0;
- while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(ff))) {
- ff++;
- }
- if (ff > 0) {
- reader.forward(ff);
- }
- }
- scanDirectiveIgnoredLine(startMark);
- return new DirectiveToken(name, value, startMark, endMark);
- }
-
- /**
- * Scan a directive name. Directive names are a series of non-space
- * characters.
- *
- * @see <a href="http://www.yaml.org/spec/1.1/#id895217"></a>
- */
- private String scanDirectiveName(Mark startMark) {
- // See the specification for details.
- int length = 0;
- // A Directive-name is a sequence of alphanumeric characters
- // (a-z,A-Z,0-9). We scan until we find something that isn't.
- // FIXME this disagrees with the specification.
- char ch = reader.peek(length);
- while (Constant.ALPHA.has(ch)) {
- length++;
- ch = reader.peek(length);
- }
- // If the name would be empty, an error occurs.
- if (length == 0) {
- throw new ScannerException("while scanning a directive", startMark,
- "expected alphabetic or numeric character, but found " + ch + "(" + ((int) ch)
- + ")", reader.getMark());
- }
- String value = reader.prefixForward(length);
- ch = reader.peek();
- if (Constant.NULL_BL_LINEBR.hasNo(ch)) {
- throw new ScannerException("while scanning a directive", startMark,
- "expected alphabetic or numeric character, but found " + ch + "(" + ((int) ch)
- + ")", reader.getMark());
- }
- return value;
- }
-
- private List<Integer> scanYamlDirectiveValue(Mark startMark) {
- // See the specification for details.
- while (reader.peek() == ' ') {
- reader.forward();
- }
- Integer major = scanYamlDirectiveNumber(startMark);
- if (reader.peek() != '.') {
- throw new ScannerException("while scanning a directive", startMark,
- "expected a digit or '.', but found " + reader.peek() + "("
- + ((int) reader.peek()) + ")", reader.getMark());
- }
+ col++;
+ }
+ }
+ // Return both the assembled intervening string and the end-mark.
+ return new Object[] {chunks.toString(), endMark};
+ }
+
+ /**
+ * Scan a flow-style scalar. Flow scalars are presented in one of two forms; first, a flow scalar
+ * may be a double-quoted string; second, a flow scalar may be a single-quoted string.
+ *
+ * @see <a href="https://yaml.org/spec/1.1/#id904158">9.1. Flow Scalar Styles</a> style/syntax
+ *
+ * <pre>
+ * See the specification for details.
+ * Note that we loose indentation rules for quoted scalars. Quoted
+ * scalars don't need to adhere indentation because &quot; and ' clearly
+ * mark the beginning and the end of them. Therefore we are less
+ * restrictive then the specification requires. We only need to check
+ * that document separators are not included in scalars.
+ * </pre>
+ */
+ private Token scanFlowScalar(char style) {
+ boolean _double;
+ // The style will be either single- or double-quoted; we determine this
+ // by the first character in the entry (supplied)
+ _double = style == '"';
+ StringBuilder chunks = new StringBuilder();
+ Mark startMark = reader.getMark();
+ int quote = reader.peek();
+ reader.forward();
+ chunks.append(scanFlowScalarNonSpaces(_double, startMark));
+ while (reader.peek() != quote) {
+ chunks.append(scanFlowScalarSpaces(startMark));
+ chunks.append(scanFlowScalarNonSpaces(_double, startMark));
+ }
+ reader.forward();
+ Mark endMark = reader.getMark();
+ return new ScalarToken(chunks.toString(), false, startMark, endMark,
+ DumperOptions.ScalarStyle.createStyle(style));
+ }
+
+ /**
+ * Scan some number of flow-scalar non-space characters.
+ */
+ private String scanFlowScalarNonSpaces(boolean doubleQuoted, Mark startMark) {
+ // See the specification for details.
+ StringBuilder chunks = new StringBuilder();
+ while (true) {
+ // Scan through any number of characters which are not: NUL, blank,
+ // tabs, line breaks, single-quotes, double-quotes, or backslashes.
+ int length = 0;
+ while (Constant.NULL_BL_T_LINEBR.hasNo(reader.peek(length), "'\"\\")) {
+ length++;
+ }
+ if (length != 0) {
+ chunks.append(reader.prefixForward(length));
+ }
+ // Depending on our quoting-type, the characters ', " and \ have
+ // differing meanings.
+ int c = reader.peek();
+ if (!doubleQuoted && c == '\'' && reader.peek(1) == '\'') {
+ chunks.append("'");
+ reader.forward(2);
+ } else if ((doubleQuoted && c == '\'') || (!doubleQuoted && "\"\\".indexOf(c) != -1)) {
+ chunks.appendCodePoint(c);
reader.forward();
- Integer minor = scanYamlDirectiveNumber(startMark);
- if (Constant.NULL_BL_LINEBR.hasNo(reader.peek())) {
- throw new ScannerException("while scanning a directive", startMark,
- "expected a digit or ' ', but found " + reader.peek() + "("
- + ((int) reader.peek()) + ")", reader.getMark());
- }
- List<Integer> result = new ArrayList<Integer>(2);
- result.add(major);
- result.add(minor);
- return result;
- }
-
- /**
- * Read a %YAML directive number: this is either the major or the minor
- * part. Stop reading at a non-digit character (usually either '.' or '\n').
- *
- * @see <a href="http://www.yaml.org/spec/1.1/#id895631"></a>
- * @see <a href="http://www.yaml.org/spec/1.1/#ns-dec-digit"></a>
- */
- private Integer scanYamlDirectiveNumber(Mark startMark) {
- // See the specification for details.
- char ch = reader.peek();
- if (!Character.isDigit(ch)) {
- throw new ScannerException("while scanning a directive", startMark,
- "expected a digit, but found " + ch + "(" + ((int) ch) + ")", reader.getMark());
- }
- int length = 0;
- while (Character.isDigit(reader.peek(length))) {
- length++;
- }
- Integer value = Integer.parseInt(reader.prefixForward(length));
- return value;
- }
-
- /**
- * <p>
- * Read a %TAG directive value:
- *
- * <pre>
- * s-ignored-space+ c-tag-handle s-ignored-space+ ns-tag-prefix s-l-comments
- * </pre>
- *
- * </p>
- *
- * @see <a href="http://www.yaml.org/spec/1.1/#id896044"></a>
- */
- private List<String> scanTagDirectiveValue(Mark startMark) {
- // See the specification for details.
- while (reader.peek() == ' ') {
- reader.forward();
- }
- String handle = scanTagDirectiveHandle(startMark);
- while (reader.peek() == ' ') {
- reader.forward();
- }
- String prefix = scanTagDirectivePrefix(startMark);
- List<String> result = new ArrayList<String>(2);
- result.add(handle);
- result.add(prefix);
- return result;
- }
-
- /**
- * Scan a %TAG directive's handle. This is YAML's c-tag-handle.
- *
- * @see <a href="http://www.yaml.org/spec/1.1/#id896876"></a>
- * @param startMark
- * @return
- */
- private String scanTagDirectiveHandle(Mark startMark) {
- // See the specification for details.
- String value = scanTagHandle("directive", startMark);
- char ch = reader.peek();
- if (ch != ' ') {
- throw new ScannerException("while scanning a directive", startMark,
- "expected ' ', but found " + reader.peek() + "(" + ch + ")", reader.getMark());
- }
- return value;
- }
-
- /**
- * Scan a %TAG directive's prefix. This is YAML's ns-tag-prefix.
- *
- * @see <a href="http://www.yaml.org/spec/1.1/#ns-tag-prefix"></a>
- */
- private String scanTagDirectivePrefix(Mark startMark) {
- // See the specification for details.
- String value = scanTagUri("directive", startMark);
- if (Constant.NULL_BL_LINEBR.hasNo(reader.peek())) {
- throw new ScannerException("while scanning a directive", startMark,
- "expected ' ', but found " + reader.peek() + "(" + ((int) reader.peek()) + ")",
- reader.getMark());
- }
- return value;
- }
-
- private String scanDirectiveIgnoredLine(Mark startMark) {
- // See the specification for details.
- int ff = 0;
- while (reader.peek(ff) == ' ') {
- ff++;
- }
- if (ff > 0) {
- reader.forward(ff);
- }
- if (reader.peek() == '#') {
- ff = 0;
- while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(ff))) {
- ff++;
- }
- reader.forward(ff);
- }
- char ch = reader.peek();
- String lineBreak = scanLineBreak();
- if (lineBreak.length() == 0 && ch != '\0') {
- throw new ScannerException("while scanning a directive", startMark,
- "expected a comment or a line break, but found " + ch + "(" + ((int) ch) + ")",
- reader.getMark());
- }
- return lineBreak;
- }
-
- /**
- * <pre>
- * The specification does not restrict characters for anchors and
- * aliases. This may lead to problems, for instance, the document:
- * [ *alias, value ]
- * can be interpreted in two ways, as
- * [ &quot;value&quot; ]
- * and
- * [ *alias , &quot;value&quot; ]
- * Therefore we restrict aliases to numbers and ASCII letters.
- * </pre>
- */
- private Token scanAnchor(boolean isAnchor) {
- Mark startMark = reader.getMark();
- char indicator = reader.peek();
- String name = indicator == '*' ? "alias" : "anchor";
+ } else if (doubleQuoted && c == '\\') {
reader.forward();
- int length = 0;
- char ch = reader.peek(length);
- while (Constant.ALPHA.has(ch)) {
- length++;
- ch = reader.peek(length);
- }
- if (length == 0) {
- throw new ScannerException("while scanning an " + name, startMark,
- "expected alphabetic or numeric character, but found " + ch,
- reader.getMark());
- }
- String value = reader.prefixForward(length);
- ch = reader.peek();
- if (Constant.NULL_BL_T_LINEBR.hasNo(ch, "?:,]}%@`")) {
- throw new ScannerException("while scanning an " + name, startMark,
- "expected alphabetic or numeric character, but found " + ch + "("
- + ((int) reader.peek()) + ")", reader.getMark());
- }
- Mark endMark = reader.getMark();
- Token tok;
- if (isAnchor) {
- tok = new AnchorToken(value, startMark, endMark);
- } else {
- tok = new AliasToken(value, startMark, endMark);
- }
- return tok;
- }
-
- /**
- * <p>
- * Scan a Tag property. A Tag property may be specified in one of three
- * ways: c-verbatim-tag, c-ns-shorthand-tag, or c-ns-non-specific-tag
- * </p>
- *
- * <p>
- * c-verbatim-tag takes the form !&lt;ns-uri-char+&gt; and must be delivered
- * verbatim (as-is) to the application. In particular, verbatim tags are not
- * subject to tag resolution.
- * </p>
- *
- * <p>
- * c-ns-shorthand-tag is a valid tag handle followed by a non-empty suffix.
- * If the tag handle is a c-primary-tag-handle ('!') then the suffix must
- * have all exclamation marks properly URI-escaped (%21); otherwise, the
- * string will look like a named tag handle: !foo!bar would be interpreted
- * as (handle="!foo!", suffix="bar").
- * </p>
- *
- * <p>
- * c-ns-non-specific-tag is always a lone '!'; this is only useful for plain
- * scalars, where its specification means that the scalar MUST be resolved
- * to have type tag:yaml.org,2002:str.
- * </p>
- *
- * TODO SnakeYaml incorrectly ignores c-ns-non-specific-tag right now.
- *
- * @see <a href="http://www.yaml.org/spec/1.1/#id900262"></a>
- *
- * TODO Note that this method does not enforce rules about local versus
- * global tags!
- */
- private Token scanTag() {
- // See the specification for details.
- Mark startMark = reader.getMark();
- // Determine the type of tag property based on the first character
- // encountered
- char ch = reader.peek(1);
- String handle = null;
- String suffix = null;
- // Verbatim tag! (c-verbatim-tag)
- if (ch == '<') {
- // Skip the exclamation mark and &gt;, then read the tag suffix (as
- // a URI).
- reader.forward(2);
- suffix = scanTagUri("tag", startMark);
- if (reader.peek() != '>') {
- // If there are any characters between the end of the tag-suffix
- // URI and the closing &gt;, then an error has occurred.
- throw new ScannerException("while scanning a tag", startMark,
- "expected '>', but found '" + reader.peek() + "' (" + ((int) reader.peek())
- + ")", reader.getMark());
- }
- reader.forward();
- } else if (Constant.NULL_BL_T_LINEBR.has(ch)) {
- // A NUL, blank, tab, or line-break means that this was a
- // c-ns-non-specific tag.
- suffix = "!";
- reader.forward();
- } else {
- // Any other character implies c-ns-shorthand-tag type.
-
- // Look ahead in the stream to determine whether this tag property
- // is of the form !foo or !foo!bar.
- int length = 1;
- boolean useHandle = false;
- while (Constant.NULL_BL_LINEBR.hasNo(ch)) {
- if (ch == '!') {
- useHandle = true;
- break;
- }
- length++;
- ch = reader.peek(length);
- }
- handle = "!";
- // If we need to use a handle, scan it in; otherwise, the handle is
- // presumed to be '!'.
- if (useHandle) {
- handle = scanTagHandle("tag", startMark);
- } else {
- handle = "!";
- reader.forward();
- }
- suffix = scanTagUri("tag", startMark);
- }
- ch = reader.peek();
- // Check that the next character is allowed to follow a tag-property;
- // if it is not, raise the error.
- if (Constant.NULL_BL_LINEBR.hasNo(ch)) {
- throw new ScannerException("while scanning a tag", startMark,
- "expected ' ', but found '" + ch + "' (" + ((int) ch) + ")", reader.getMark());
- }
- TagTuple value = new TagTuple(handle, suffix);
- Mark endMark = reader.getMark();
- return new TagToken(value, startMark, endMark);
- }
-
- private Token scanBlockScalar(char style) {
- // See the specification for details.
- boolean folded;
- // Depending on the given style, we determine whether the scalar is
- // folded ('>') or literal ('|')
- if (style == '>') {
- folded = true;
+ c = reader.peek();
+ if (!Character.isSupplementaryCodePoint(c)
+ && ESCAPE_REPLACEMENTS.containsKey(Character.valueOf((char) c))) {
+ // The character is one of the single-replacement
+ // types; these are replaced with a literal character
+ // from the mapping.
+ chunks.append(ESCAPE_REPLACEMENTS.get(Character.valueOf((char) c)));
+ reader.forward();
+ } else if (!Character.isSupplementaryCodePoint(c)
+ && ESCAPE_CODES.containsKey(Character.valueOf((char) c))) {
+ // The character is a multi-digit escape sequence, with
+ // length defined by the value in the ESCAPE_CODES map.
+ length = ESCAPE_CODES.get(Character.valueOf((char) c)).intValue();
+ reader.forward();
+ String hex = reader.prefix(length);
+ if (NOT_HEXA.matcher(hex).find()) {
+ throw new ScannerException("while scanning a double-quoted scalar", startMark,
+ "expected escape sequence of " + length + " hexadecimal numbers, but found: " + hex,
+ reader.getMark());
+ }
+ int decimal = Integer.parseInt(hex, 16);
+ String unicode = new String(Character.toChars(decimal));
+ chunks.append(unicode);
+ reader.forward(length);
+ } else if (scanLineBreak().length() != 0) {
+ chunks.append(scanFlowScalarBreaks(startMark));
} else {
- folded = false;
+ final String s = String.valueOf(Character.toChars(c));
+ throw new ScannerException("while scanning a double-quoted scalar", startMark,
+ "found unknown escape character " + s + "(" + c + ")", reader.getMark());
}
- StringBuilder chunks = new StringBuilder();
- Mark startMark = reader.getMark();
- // Scan the header.
+ } else {
+ return chunks.toString();
+ }
+ }
+ }
+
+ private String scanFlowScalarSpaces(Mark startMark) {
+ // See the specification for details.
+ StringBuilder chunks = new StringBuilder();
+ int length = 0;
+ // Scan through any number of whitespace (space, tab) characters,
+ // consuming them.
+ while (" \t".indexOf(reader.peek(length)) != -1) {
+ length++;
+ }
+ String whitespaces = reader.prefixForward(length);
+ int c = reader.peek();
+ if (c == '\0') {
+ // A flow scalar cannot end with an end-of-stream
+ throw new ScannerException("while scanning a quoted scalar", startMark,
+ "found unexpected end of stream", reader.getMark());
+ }
+ // If we encounter a line break, scan it into our assembled string...
+ String lineBreak = scanLineBreak();
+ if (lineBreak.length() != 0) {
+ String breaks = scanFlowScalarBreaks(startMark);
+ if (!"\n".equals(lineBreak)) {
+ chunks.append(lineBreak);
+ } else if (breaks.length() == 0) {
+ chunks.append(" ");
+ }
+ chunks.append(breaks);
+ } else {
+ chunks.append(whitespaces);
+ }
+ return chunks.toString();
+ }
+
+ private String scanFlowScalarBreaks(Mark startMark) {
+ // See the specification for details.
+ StringBuilder chunks = new StringBuilder();
+ while (true) {
+ // Instead of checking indentation, we check for document
+ // separators.
+ String prefix = reader.prefix(3);
+ if (("---".equals(prefix) || "...".equals(prefix))
+ && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
+ throw new ScannerException("while scanning a quoted scalar", startMark,
+ "found unexpected document separator", reader.getMark());
+ }
+ // Scan past any number of spaces and tabs, ignoring them
+ while (" \t".indexOf(reader.peek()) != -1) {
reader.forward();
- Chomping chompi = scanBlockScalarIndicators(startMark);
- int increment = chompi.getIncrement();
- scanBlockScalarIgnoredLine(startMark);
-
- // Determine the indentation level and go to the first non-empty line.
- int minIndent = this.indent + 1;
- if (minIndent < 1) {
- minIndent = 1;
- }
- String breaks = null;
- int maxIndent = 0;
- int indent = 0;
- Mark endMark;
- if (increment == -1) {
- Object[] brme = scanBlockScalarIndentation();
- breaks = (String) brme[0];
- maxIndent = ((Integer) brme[1]).intValue();
- endMark = (Mark) brme[2];
- indent = Math.max(minIndent, maxIndent);
+ }
+ // If we stopped at a line break, add that; otherwise, return the
+ // assembled set of scalar breaks.
+ String lineBreak = scanLineBreak();
+ if (lineBreak.length() != 0) {
+ chunks.append(lineBreak);
+ } else {
+ return chunks.toString();
+ }
+ }
+ }
+
+ /**
+ * Scan a plain scalar.
+ *
+ * <pre>
+ * See the specification for details.
+ * We add an additional restriction for the flow context:
+ * plain scalars in the flow context cannot contain ',', ':' and '?'.
+ * We also keep track of the `allow_simple_key` flag here.
+ * Indentation rules are loosed for the flow context.
+ * </pre>
+ */
+ private Token scanPlain() {
+ StringBuilder chunks = new StringBuilder();
+ Mark startMark = reader.getMark();
+ Mark endMark = startMark;
+ int indent = this.indent + 1;
+ String spaces = "";
+ while (true) {
+ int c;
+ int length = 0;
+ // A comment indicates the end of the scalar.
+ if (reader.peek() == '#') {
+ break;
+ }
+ while (true) {
+ c = reader.peek(length);
+ if (Constant.NULL_BL_T_LINEBR.has(c)
+ || (c == ':' && Constant.NULL_BL_T_LINEBR.has(reader.peek(length + 1),
+ flowLevel != 0 ? ",[]{}" : ""))
+ || (this.flowLevel != 0 && ",?[]{}".indexOf(c) != -1)) {
+ break;
+ }
+ length++;
+ }
+ if (length == 0) {
+ break;
+ }
+ this.allowSimpleKey = false;
+ chunks.append(spaces);
+ chunks.append(reader.prefixForward(length));
+ endMark = reader.getMark();
+ spaces = scanPlainSpaces();
+ // System.out.printf("spaces[%s]\n", spaces);
+ if (spaces.length() == 0 || reader.peek() == '#'
+ || (this.flowLevel == 0 && this.reader.getColumn() < indent)) {
+ break;
+ }
+ }
+ return new ScalarToken(chunks.toString(), startMark, endMark, true);
+ }
+
+ // Helper for scanPlainSpaces method when comments are enabled.
+ // The ensures that blank lines and comments following a multi-line plain token are not swallowed
+ // up
+ private boolean atEndOfPlain() {
+ // peak ahead to find end of whitespaces and the column at which it occurs
+ int wsLength = 0;
+ int wsColumn = this.reader.getColumn();
+ {
+ int c;
+ while ((c = reader.peek(wsLength)) != '\0' && Constant.NULL_BL_T_LINEBR.has(c)) {
+ wsLength++;
+ if (!Constant.LINEBR.has(c) && (c != '\r' || reader.peek(wsLength + 1) != '\n')
+ && c != 0xFEFF) {
+ wsColumn++;
} else {
- indent = minIndent + increment - 1;
- Object[] brme = scanBlockScalarBreaks(indent);
- breaks = (String) brme[0];
- endMark = (Mark) brme[1];
- }
-
- String lineBreak = "";
-
- // Scan the inner part of the block scalar.
- while (this.reader.getColumn() == indent && reader.peek() != '\0') {
- chunks.append(breaks);
- boolean leadingNonSpace = " \t".indexOf(reader.peek()) == -1;
- int length = 0;
- while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(length))) {
- length++;
- }
- chunks.append(reader.prefixForward(length));
- lineBreak = scanLineBreak();
- Object[] brme = scanBlockScalarBreaks(indent);
- breaks = (String) brme[0];
- endMark = (Mark) brme[1];
- if (this.reader.getColumn() == indent && reader.peek() != '\0') {
-
- // Unfortunately, folding rules are ambiguous.
- //
- // This is the folding according to the specification:
- if (folded && "\n".equals(lineBreak) && leadingNonSpace
- && " \t".indexOf(reader.peek()) == -1) {
- if (breaks.length() == 0) {
- chunks.append(" ");
- }
- } else {
- chunks.append(lineBreak);
- }
- // Clark Evans's interpretation (also in the spec examples) not
- // imported from PyYAML
- } else {
- break;
- }
- }
- // Chomp the tail.
- if (chompi.chompTailIsNotFalse()) {
- chunks.append(lineBreak);
+ wsColumn = 0;
}
- if (chompi.chompTailIsTrue()) {
- chunks.append(breaks);
- }
- // We are done.
- return new ScalarToken(chunks.toString(), false, startMark, endMark, style);
+ }
}
- /**
- * Scan a block scalar indicator. The block scalar indicator includes two
- * optional components, which may appear in either order.
- *
- * A block indentation indicator is a non-zero digit describing the
- * indentation level of the block scalar to follow. This indentation is an
- * additional number of spaces relative to the current indentation level.
- *
- * A block chomping indicator is a + or -, selecting the chomping mode away
- * from the default (clip) to either -(strip) or +(keep).
- *
- * @see <a href="http://www.yaml.org/spec/1.1/#id868988"></a>
- * @see <a href="http://www.yaml.org/spec/1.1/#id927035"></a>
- * @see <a href="http://www.yaml.org/spec/1.1/#id927557"></a>
- */
- private Chomping scanBlockScalarIndicators(Mark startMark) {
- // See the specification for details.
- Boolean chomping = null;
- int increment = -1;
- char ch = reader.peek();
- if (ch == '-' || ch == '+') {
- if (ch == '+') {
- chomping = Boolean.TRUE;
- } else {
- chomping = Boolean.FALSE;
- }
- reader.forward();
- ch = reader.peek();
- if (Character.isDigit(ch)) {
- increment = Integer.parseInt(String.valueOf(ch));
- if (increment == 0) {
- throw new ScannerException("while scanning a block scalar", startMark,
- "expected indentation indicator in the range 1-9, but found 0",
- reader.getMark());
- }
- reader.forward();
- }
- } else if (Character.isDigit(ch)) {
- increment = Integer.parseInt(String.valueOf(ch));
- if (increment == 0) {
- throw new ScannerException("while scanning a block scalar", startMark,
- "expected indentation indicator in the range 1-9, but found 0",
- reader.getMark());
- }
- reader.forward();
- ch = reader.peek();
- if (ch == '-' || ch == '+') {
- if (ch == '+') {
- chomping = Boolean.TRUE;
- } else {
- chomping = Boolean.FALSE;
- }
- reader.forward();
- }
- }
- ch = reader.peek();
- if (Constant.NULL_BL_LINEBR.hasNo(ch)) {
- throw new ScannerException("while scanning a block scalar", startMark,
- "expected chomping or indentation indicators, but found " + ch,
- reader.getMark());
- }
- return new Chomping(chomping, increment);
+ // if we see, a comment or end of string or change decrease in indent, we are done
+ // Do not chomp end of lines and blanks, they will be handled by the main loop.
+ if (reader.peek(wsLength) == '#' || reader.peek(wsLength + 1) == '\0'
+ || this.flowLevel == 0 && wsColumn < this.indent) {
+ return true;
}
- /**
- * Scan to the end of the line after a block scalar has been scanned; the
- * only things that are permitted at this time are comments and spaces.
- */
- private String scanBlockScalarIgnoredLine(Mark startMark) {
- // See the specification for details.
- int ff = 0;
- // Forward past any number of trailing spaces
- while (reader.peek(ff) == ' ') {
- ff++;
- }
- if (ff > 0) {
- reader.forward(ff);
- }
- // If a comment occurs, scan to just before the end of line.
- if (reader.peek() == '#') {
- ff = 0;
- while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(ff))) {
- ff++;
- }
- if (ff > 0) {
- reader.forward(ff);
- }
- }
- // If the next character is not a null or line break, an error has
- // occurred.
- char ch = reader.peek();
- String lineBreak = scanLineBreak();
- if (lineBreak.length() == 0 && ch != '\0') {
- throw new ScannerException("while scanning a block scalar", startMark,
- "expected a comment or a line break, but found " + ch, reader.getMark());
+ // if we see, after the space, a key-value followed by a ':', we are done
+ // Do not chomp end of lines and blanks, they will be handled by the main loop.
+ if (this.flowLevel == 0) {
+ int c;
+ for (int extra = 1; (c = reader.peek(wsLength + extra)) != 0
+ && !Constant.NULL_BL_T_LINEBR.has(c); extra++) {
+ if (c == ':' && Constant.NULL_BL_T_LINEBR.has(reader.peek(wsLength + extra + 1))) {
+ return true;
}
- return lineBreak;
+ }
}
- /**
- * Scans for the indentation of a block scalar implicitly. This mechanism is
- * used only if the block did not explicitly state an indentation to be
- * used.
- *
- * @see <a href="http://www.yaml.org/spec/1.1/#id927035"></a>
- */
- private Object[] scanBlockScalarIndentation() {
- // See the specification for details.
- StringBuilder chunks = new StringBuilder();
- int maxIndent = 0;
- Mark endMark = reader.getMark();
- // Look ahead some number of lines until the first non-blank character
- // occurs; the determined indentation will be the maximum number of
- // leading spaces on any of these lines.
- while (Constant.LINEBR.has(reader.peek(), " \r")) {
- if (reader.peek() != ' ') {
- // If the character isn't a space, it must be some kind of
- // line-break; scan the line break and track it.
- chunks.append(scanLineBreak());
- endMark = reader.getMark();
- } else {
- // If the character is a space, move forward to the next
- // character; if we surpass our previous maximum for indent
- // level, update that too.
- reader.forward();
- if (this.reader.getColumn() > maxIndent) {
- maxIndent = reader.getColumn();
- }
- }
- }
- // Pass several results back together.
- return new Object[] { chunks.toString(), maxIndent, endMark };
- }
-
- private Object[] scanBlockScalarBreaks(int indent) {
- // See the specification for details.
- StringBuilder chunks = new StringBuilder();
- Mark endMark = reader.getMark();
- int ff = 0;
- int col = this.reader.getColumn();
- // Scan for up to the expected indentation-level of spaces, then move
- // forward past that amount.
- while (col < indent && reader.peek(ff) == ' ') {
- ff++;
- col++;
- }
- if (ff > 0) {
- reader.forward(ff);
- }
- // Consume one or more line breaks followed by any amount of spaces,
- // until we find something that isn't a line-break.
- String lineBreak = null;
- while ((lineBreak = scanLineBreak()).length() != 0) {
- chunks.append(lineBreak);
- endMark = reader.getMark();
- // Scan past up to (indent) spaces on the next line, then forward
- // past them.
- ff = 0;
- col = this.reader.getColumn();
- while (col < indent && reader.peek(ff) == ' ') {
- ff++;
- col++;
- }
- if (ff > 0) {
- reader.forward(ff);
- }
- }
- // Return both the assembled intervening string and the end-mark.
- return new Object[] { chunks.toString(), endMark };
- }
+ // None of the above so safe to chomp the spaces.
+ return false;
+ }
- /**
- * Scan a flow-style scalar. Flow scalars are presented in one of two forms;
- * first, a flow scalar may be a double-quoted string; second, a flow scalar
- * may be a single-quoted string.
- *
- * @see <a href="http://www.yaml.org/spec/1.1/#flow"></a> style/syntax
- *
- * <pre>
- * See the specification for details.
- * Note that we loose indentation rules for quoted scalars. Quoted
- * scalars don't need to adhere indentation because &quot; and ' clearly
- * mark the beginning and the end of them. Therefore we are less
- * restrictive then the specification requires. We only need to check
- * that document separators are not included in scalars.
- * </pre>
- */
- private Token scanFlowScalar(char style) {
- boolean _double;
- // The style will be either single- or double-quoted; we determine this
- // by the first character in the entry (supplied)
- if (style == '"') {
- _double = true;
- } else {
- _double = false;
- }
- StringBuilder chunks = new StringBuilder();
- Mark startMark = reader.getMark();
- char quote = reader.peek();
- reader.forward();
- chunks.append(scanFlowScalarNonSpaces(_double, startMark));
- while (reader.peek() != quote) {
- chunks.append(scanFlowScalarSpaces(startMark));
- chunks.append(scanFlowScalarNonSpaces(_double, startMark));
- }
- reader.forward();
- Mark endMark = reader.getMark();
- return new ScalarToken(chunks.toString(), false, startMark, endMark, style);
+ /**
+ * See the specification for details. SnakeYAML and libyaml allow tabs inside plain scalar
+ */
+ private String scanPlainSpaces() {
+ int length = 0;
+ while (reader.peek(length) == ' ' || reader.peek(length) == '\t') {
+ length++;
}
-
- /**
- * Scan some number of flow-scalar non-space characters.
- */
- private String scanFlowScalarNonSpaces(boolean doubleQuoted, Mark startMark) {
- // See the specification for details.
- StringBuilder chunks = new StringBuilder();
- while (true) {
- // Scan through any number of characters which are not: NUL, blank,
- // tabs, line breaks, single-quotes, double-quotes, or backslashes.
- int length = 0;
- while (Constant.NULL_BL_T_LINEBR.hasNo(reader.peek(length), "\'\"\\")) {
- length++;
- }
- if (length != 0) {
- chunks.append(reader.prefixForward(length));
- }
- // Depending on our quoting-type, the characters ', " and \ have
- // differing meanings.
- char ch = reader.peek();
- if (!doubleQuoted && ch == '\'' && reader.peek(1) == '\'') {
- chunks.append("'");
- reader.forward(2);
- } else if ((doubleQuoted && ch == '\'') || (!doubleQuoted && "\"\\".indexOf(ch) != -1)) {
- chunks.append(ch);
- reader.forward();
- } else if (doubleQuoted && ch == '\\') {
- reader.forward();
- ch = reader.peek();
- if (ESCAPE_REPLACEMENTS.containsKey(Character.valueOf(ch))) {
- // The character is one of the single-replacement
- // types; these are replaced with a literal character
- // from the mapping.
- chunks.append(ESCAPE_REPLACEMENTS.get(Character.valueOf(ch)));
- reader.forward();
- } else if (ESCAPE_CODES.containsKey(Character.valueOf(ch))) {
- // The character is a multi-digit escape sequence, with
- // length defined by the value in the ESCAPE_CODES map.
- length = ESCAPE_CODES.get(Character.valueOf(ch)).intValue();
- reader.forward();
- String hex = reader.prefix(length);
- if (NOT_HEXA.matcher(hex).find()) {
- throw new ScannerException("while scanning a double-quoted scalar",
- startMark, "expected escape sequence of " + length
- + " hexadecimal numbers, but found: " + hex,
- reader.getMark());
- }
- int decimal = Integer.parseInt(hex, 16);
- String unicode = new String(Character.toChars(decimal));
- chunks.append(unicode);
- reader.forward(length);
- } else if (scanLineBreak().length() != 0) {
- chunks.append(scanFlowScalarBreaks(startMark));
- } else {
- throw new ScannerException("while scanning a double-quoted scalar", startMark,
- "found unknown escape character " + ch + "(" + ((int) ch) + ")",
- reader.getMark());
- }
- } else {
- return chunks.toString();
- }
- }
- }
-
- private String scanFlowScalarSpaces(Mark startMark) {
- // See the specification for details.
- StringBuilder chunks = new StringBuilder();
- int length = 0;
- // Scan through any number of whitespace (space, tab) characters,
- // consuming them.
- while (" \t".indexOf(reader.peek(length)) != -1) {
- length++;
- }
- String whitespaces = reader.prefixForward(length);
- char ch = reader.peek();
- if (ch == '\0') {
- // A flow scalar cannot end with an end-of-stream
- throw new ScannerException("while scanning a quoted scalar", startMark,
- "found unexpected end of stream", reader.getMark());
- }
- // If we encounter a line break, scan it into our assembled string...
- String lineBreak = scanLineBreak();
- if (lineBreak.length() != 0) {
- String breaks = scanFlowScalarBreaks(startMark);
- if (!"\n".equals(lineBreak)) {
- chunks.append(lineBreak);
- } else if (breaks.length() == 0) {
- chunks.append(" ");
- }
- chunks.append(breaks);
+ String whitespaces = reader.prefixForward(length);
+ String lineBreak = scanLineBreak();
+ if (lineBreak.length() != 0) {
+ this.allowSimpleKey = true;
+ String prefix = reader.prefix(3);
+ if ("---".equals(prefix)
+ || "...".equals(prefix) && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
+ return "";
+ }
+ if (parseComments && atEndOfPlain()) {
+ return "";
+ }
+ StringBuilder breaks = new StringBuilder();
+ while (true) {
+ if (reader.peek() == ' ') {
+ reader.forward();
} else {
- chunks.append(whitespaces);
- }
- return chunks.toString();
- }
-
- private String scanFlowScalarBreaks(Mark startMark) {
- // See the specification for details.
- StringBuilder chunks = new StringBuilder();
- while (true) {
- // Instead of checking indentation, we check for document
- // separators.
- String prefix = reader.prefix(3);
- if (("---".equals(prefix) || "...".equals(prefix))
- && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
- throw new ScannerException("while scanning a quoted scalar", startMark,
- "found unexpected document separator", reader.getMark());
- }
- // Scan past any number of spaces and tabs, ignoring them
- while (" \t".indexOf(reader.peek()) != -1) {
- reader.forward();
+ String lb = scanLineBreak();
+ if (lb.length() != 0) {
+ breaks.append(lb);
+ prefix = reader.prefix(3);
+ if ("---".equals(prefix)
+ || "...".equals(prefix) && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
+ return "";
}
- // If we stopped at a line break, add that; otherwise, return the
- // assembled set of scalar breaks.
- String lineBreak = scanLineBreak();
- if (lineBreak.length() != 0) {
- chunks.append(lineBreak);
- } else {
- return chunks.toString();
- }
- }
+ } else {
+ break;
+ }
+ }
+ }
+ if (!"\n".equals(lineBreak)) {
+ return lineBreak + breaks;
+ } else if (breaks.length() == 0) {
+ return " ";
+ }
+ return breaks.toString();
+ }
+ return whitespaces;
+ }
+
+ /**
+ * <p>
+ * Scan a Tag handle. A Tag handle takes one of three forms:
+ *
+ * <pre>
+ * "!" (c-primary-tag-handle)
+ * "!!" (ns-secondary-tag-handle)
+ * "!(name)!" (c-named-tag-handle)
+ * </pre>
+ *
+ * Where (name) must be formatted as an ns-word-char.
+ * </p>
+ *
+ * @see <a href="http://www.yaml.org/spec/1.1/#c-tag-handle"></a>
+ * @see <a href="http://www.yaml.org/spec/1.1/#ns-word-char"></a>
+ *
+ * <pre>
+ * See the specification for details.
+ * For some strange reasons, the specification does not allow '_' in
+ * tag handles. I have allowed it anyway.
+ * </pre>
+ */
+ private String scanTagHandle(String name, Mark startMark) {
+ int c = reader.peek();
+ if (c != '!') {
+ final String s = String.valueOf(Character.toChars(c));
+ throw new ScannerException("while scanning a " + name, startMark,
+ "expected '!', but found " + s + "(" + (c) + ")", reader.getMark());
+ }
+ // Look for the next '!' in the stream, stopping if we hit a
+ // non-word-character. If the first character is a space, then the
+ // tag-handle is a c-primary-tag-handle ('!').
+ int length = 1;
+ c = reader.peek(length);
+ if (c != ' ') {
+ // Scan through 0+ alphabetic characters.
+ // FIXME According to the specification, these should be
+ // ns-word-char only, which prohibits '_'. This might be a
+ // candidate for a configuration option.
+ while (Constant.ALPHA.has(c)) {
+ length++;
+ c = reader.peek(length);
+ }
+ // Found the next non-word-char. If this is not a space and not an
+ // '!', then this is an error, as the tag-handle was specified as:
+ // !(name) or similar; the trailing '!' is missing.
+ if (c != '!') {
+ reader.forward(length);
+ final String s = String.valueOf(Character.toChars(c));
+ throw new ScannerException("while scanning a " + name, startMark,
+ "expected '!', but found " + s + "(" + (c) + ")", reader.getMark());
+ }
+ length++;
+ }
+ String value = reader.prefixForward(length);
+ return value;
+ }
+
+ /**
+ * <p>
+ * Scan a Tag URI. This scanning is valid for both local and global tag directives, because both
+ * appear to be valid URIs as far as scanning is concerned. The difference may be distinguished
+ * later, in parsing. This method will scan for ns-uri-char*, which covers both cases.
+ * </p>
+ *
+ * <p>
+ * This method performs no verification that the scanned URI conforms to any particular kind of
+ * URI specification.
+ * </p>
+ *
+ * @see <a href="http://www.yaml.org/spec/1.1/#ns-uri-char"></a>
+ */
+ private String scanTagUri(String name, Mark startMark) {
+ // See the specification for details.
+ // Note: we do not check if URI is well-formed.
+ StringBuilder chunks = new StringBuilder();
+ // Scan through accepted URI characters, which includes the standard
+ // URI characters, plus the start-escape character ('%'). When we get
+ // to a start-escape, scan the escaped sequence, then return.
+ int length = 0;
+ int c = reader.peek(length);
+ while (Constant.URI_CHARS.has(c)) {
+ if (c == '%') {
+ chunks.append(reader.prefixForward(length));
+ length = 0;
+ chunks.append(scanUriEscapes(name, startMark));
+ } else {
+ length++;
+ }
+ c = reader.peek(length);
+ }
+ // Consume the last "chunk", which would not otherwise be consumed by
+ // the loop above.
+ if (length != 0) {
+ chunks.append(reader.prefixForward(length));
+ }
+ if (chunks.length() == 0) {
+ // If no URI was found, an error has occurred.
+ final String s = String.valueOf(Character.toChars(c));
+ throw new ScannerException("while scanning a " + name, startMark,
+ "expected URI, but found " + s + "(" + (c) + ")", reader.getMark());
+ }
+ return chunks.toString();
+ }
+
+ /**
+ * <p>
+ * Scan a sequence of %-escaped URI escape codes and convert them into a String representing the
+ * unescaped values.
+ * </p>
+ *
+ * FIXME This method fails for more than 256 bytes' worth of URI-encoded characters in a row. Is
+ * this possible? Is this a use-case?
+ *
+ * @see <a href="http://www.ietf.org/rfc/rfc2396.txt">section 2.4, Escaped Encoding</a>
+ */
+ private String scanUriEscapes(String name, Mark startMark) {
+ // First, look ahead to see how many URI-escaped characters we should
+ // expect, so we can use the correct buffer size.
+ int length = 1;
+ while (reader.peek(length * 3) == '%') {
+ length++;
+ }
+ // See the specification for details.
+ // URIs containing 16 and 32 bit Unicode characters are
+ // encoded in UTF-8, and then each octet is written as a
+ // separate character.
+ Mark beginningMark = reader.getMark();
+ ByteBuffer buff = ByteBuffer.allocate(length);
+ while (reader.peek() == '%') {
+ reader.forward();
+ try {
+ byte code = (byte) Integer.parseInt(reader.prefix(2), 16);
+ buff.put(code);
+ } catch (NumberFormatException nfe) {
+ int c1 = reader.peek();
+ final String s1 = String.valueOf(Character.toChars(c1));
+ int c2 = reader.peek(1);
+ final String s2 = String.valueOf(Character.toChars(c2));
+ throw new ScannerException("while scanning a " + name, startMark,
+ "expected URI escape sequence of 2 hexadecimal numbers, but found " + s1 + "(" + c1
+ + ") and " + s2 + "(" + c2 + ")",
+ reader.getMark());
+ }
+ reader.forward(2);
+ }
+ buff.flip();
+ try {
+ return UriEncoder.decode(buff);
+ } catch (CharacterCodingException e) {
+ throw new ScannerException("while scanning a " + name, startMark,
+ "expected URI in UTF-8: " + e.getMessage(), beginningMark);
+ }
+ }
+
+ /**
+ * Scan a line break, transforming:
+ *
+ * <pre>
+ * '\r\n' : '\n'
+ * '\r' : '\n'
+ * '\n' : '\n'
+ * '\x85' : '\n'
+ * default : ''
+ * </pre>
+ */
+ private String scanLineBreak() {
+ int c = reader.peek();
+ if (c == '\r' || c == '\n' || c == '\u0085') {
+ if (c == '\r' && '\n' == reader.peek(1)) {
+ reader.forward(2);
+ } else {
+ reader.forward();
+ }
+ return "\n";
+ } else if (c == '\u2028' || c == '\u2029') {
+ reader.forward();
+ return String.valueOf(Character.toChars(c));
}
+ return "";
+ }
- /**
- * Scan a plain scalar.
- *
- * <pre>
- * See the specification for details.
- * We add an additional restriction for the flow context:
- * plain scalars in the flow context cannot contain ',', ':' and '?'.
- * We also keep track of the `allow_simple_key` flag here.
- * Indentation rules are loosed for the flow context.
- * </pre>
- */
- private Token scanPlain() {
- StringBuilder chunks = new StringBuilder();
- Mark startMark = reader.getMark();
- Mark endMark = startMark;
- int indent = this.indent + 1;
- String spaces = "";
- while (true) {
- char ch;
- int length = 0;
- // A comment indicates the end of the scalar.
- if (reader.peek() == '#') {
- break;
- }
- while (true) {
- ch = reader.peek(length);
- if (Constant.NULL_BL_T_LINEBR.has(ch)
- || (this.flowLevel == 0 && ch == ':' && Constant.NULL_BL_T_LINEBR
- .has(reader.peek(length + 1)))
- || (this.flowLevel != 0 && ",:?[]{}".indexOf(ch) != -1)) {
- break;
- }
- length++;
- }
- // It's not clear what we should do with ':' in the flow context.
- if (this.flowLevel != 0 && ch == ':'
- && Constant.NULL_BL_T_LINEBR.hasNo(reader.peek(length + 1), ",[]{}")) {
- reader.forward(length);
- throw new ScannerException("while scanning a plain scalar", startMark,
- "found unexpected ':'", reader.getMark(),
- "Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.");
- }
- if (length == 0) {
- break;
- }
- this.allowSimpleKey = false;
- chunks.append(spaces);
- chunks.append(reader.prefixForward(length));
- endMark = reader.getMark();
- spaces = scanPlainSpaces();
- // System.out.printf("spaces[%s]\n", spaces);
- if (spaces.length() == 0 || reader.peek() == '#'
- || (this.flowLevel == 0 && this.reader.getColumn() < indent)) {
- break;
- }
- }
- return new ScalarToken(chunks.toString(), startMark, endMark, true);
+ private List<Token> makeTokenList(Token... tokens) {
+ List<Token> tokenList = new ArrayList<>();
+ for (int ix = 0; ix < tokens.length; ix++) {
+ if (tokens[ix] == null) {
+ continue;
+ }
+ if (!parseComments && (tokens[ix] instanceof CommentToken)) {
+ continue;
+ }
+ tokenList.add(tokens[ix]);
}
+ return tokenList;
+ }
- /**
- * See the specification for details. SnakeYAML and libyaml allow tabs
- * inside plain scalar
- */
- private String scanPlainSpaces() {
- int length = 0;
- while (reader.peek(length) == ' ' || reader.peek(length) == '\t') {
- length++;
- }
- String whitespaces = reader.prefixForward(length);
- String lineBreak = scanLineBreak();
- if (lineBreak.length() != 0) {
- this.allowSimpleKey = true;
- String prefix = reader.prefix(3);
- if ("---".equals(prefix) || "...".equals(prefix)
- && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
- return "";
- }
- StringBuilder breaks = new StringBuilder();
- while (true) {
- if (reader.peek() == ' ') {
- reader.forward();
- } else {
- String lb = scanLineBreak();
- if (lb.length() != 0) {
- breaks.append(lb);
- prefix = reader.prefix(3);
- if ("---".equals(prefix) || "...".equals(prefix)
- && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
- return "";
- }
- } else {
- break;
- }
- }
- }
- if (!"\n".equals(lineBreak)) {
- return lineBreak + breaks;
- } else if (breaks.length() == 0) {
- return " ";
- }
- return breaks.toString();
- }
- return whitespaces;
- }
+ /**
+ * Chomping the tail may have 3 values - yes, no, not defined.
+ */
+ private static class Chomping {
- /**
- * <p>
- * Scan a Tag handle. A Tag handle takes one of three forms:
- *
- * <pre>
- * "!" (c-primary-tag-handle)
- * "!!" (ns-secondary-tag-handle)
- * "!(name)!" (c-named-tag-handle)
- * </pre>
- *
- * Where (name) must be formatted as an ns-word-char.
- * </p>
- *
- * @see <a href="http://www.yaml.org/spec/1.1/#c-tag-handle"></a>
- * @see <a href="http://www.yaml.org/spec/1.1/#ns-word-char"></a>
- *
- * <pre>
- * See the specification for details.
- * For some strange reasons, the specification does not allow '_' in
- * tag handles. I have allowed it anyway.
- * </pre>
- */
- private String scanTagHandle(String name, Mark startMark) {
- char ch = reader.peek();
- if (ch != '!') {
- throw new ScannerException("while scanning a " + name, startMark,
- "expected '!', but found " + ch + "(" + ((int) ch) + ")", reader.getMark());
- }
- // Look for the next '!' in the stream, stopping if we hit a
- // non-word-character. If the first character is a space, then the
- // tag-handle is a c-primary-tag-handle ('!').
- int length = 1;
- ch = reader.peek(length);
- if (ch != ' ') {
- // Scan through 0+ alphabetic characters.
- // FIXME According to the specification, these should be
- // ns-word-char only, which prohibits '_'. This might be a
- // candidate for a configuration option.
- while (Constant.ALPHA.has(ch)) {
- length++;
- ch = reader.peek(length);
- }
- // Found the next non-word-char. If this is not a space and not an
- // '!', then this is an error, as the tag-handle was specified as:
- // !(name) or similar; the trailing '!' is missing.
- if (ch != '!') {
- reader.forward(length);
- throw new ScannerException("while scanning a " + name, startMark,
- "expected '!', but found " + ch + "(" + ((int) ch) + ")", reader.getMark());
- }
- length++;
- }
- String value = reader.prefixForward(length);
- return value;
- }
+ private final Boolean value;
+ private final int increment;
- /**
- * <p>
- * Scan a Tag URI. This scanning is valid for both local and global tag
- * directives, because both appear to be valid URIs as far as scanning is
- * concerned. The difference may be distinguished later, in parsing. This
- * method will scan for ns-uri-char*, which covers both cases.
- * </p>
- *
- * <p>
- * This method performs no verification that the scanned URI conforms to any
- * particular kind of URI specification.
- * </p>
- *
- * @see <a href="http://www.yaml.org/spec/1.1/#ns-uri-char"></a>
- */
- private String scanTagUri(String name, Mark startMark) {
- // See the specification for details.
- // Note: we do not check if URI is well-formed.
- StringBuilder chunks = new StringBuilder();
- // Scan through accepted URI characters, which includes the standard
- // URI characters, plus the start-escape character ('%'). When we get
- // to a start-escape, scan the escaped sequence, then return.
- int length = 0;
- char ch = reader.peek(length);
- while (Constant.URI_CHARS.has(ch)) {
- if (ch == '%') {
- chunks.append(reader.prefixForward(length));
- length = 0;
- chunks.append(scanUriEscapes(name, startMark));
- } else {
- length++;
- }
- ch = reader.peek(length);
- }
- // Consume the last "chunk", which would not otherwise be consumed by
- // the loop above.
- if (length != 0) {
- chunks.append(reader.prefixForward(length));
- length = 0;
- }
- if (chunks.length() == 0) {
- // If no URI was found, an error has occurred.
- throw new ScannerException("while scanning a " + name, startMark,
- "expected URI, but found " + ch + "(" + ((int) ch) + ")", reader.getMark());
- }
- return chunks.toString();
+ public Chomping(Boolean value, int increment) {
+ this.value = value;
+ this.increment = increment;
}
- /**
- * <p>
- * Scan a sequence of %-escaped URI escape codes and convert them into a
- * String representing the unescaped values.
- * </p>
- *
- * FIXME This method fails for more than 256 bytes' worth of URI-encoded
- * characters in a row. Is this possible? Is this a use-case?
- *
- * @see <a href="http://www.ietf.org/rfc/rfc2396.txt"></a>, section 2.4, Escaped Encoding.
- */
- private String scanUriEscapes(String name, Mark startMark) {
- // First, look ahead to see how many URI-escaped characters we should
- // expect, so we can use the correct buffer size.
- int length = 1;
- while (reader.peek(length * 3) == '%') {
- length++;
- }
- // See the specification for details.
- // URIs containing 16 and 32 bit Unicode characters are
- // encoded in UTF-8, and then each octet is written as a
- // separate character.
- Mark beginningMark = reader.getMark();
- ByteBuffer buff = ByteBuffer.allocate(length);
- while (reader.peek() == '%') {
- reader.forward();
- try {
- byte code = (byte) Integer.parseInt(reader.prefix(2), 16);
- buff.put(code);
- } catch (NumberFormatException nfe) {
- throw new ScannerException("while scanning a " + name, startMark,
- "expected URI escape sequence of 2 hexadecimal numbers, but found "
- + reader.peek() + "(" + ((int) reader.peek()) + ") and "
- + reader.peek(1) + "(" + ((int) reader.peek(1)) + ")",
- reader.getMark());
- }
- reader.forward(2);
- }
- buff.flip();
- try {
- return UriEncoder.decode(buff);
- } catch (CharacterCodingException e) {
- throw new ScannerException("while scanning a " + name, startMark,
- "expected URI in UTF-8: " + e.getMessage(), beginningMark);
- }
+ public boolean chompTailIsNotFalse() {
+ return value == null || value;
}
- /**
- * Scan a line break, transforming:
- *
- * <pre>
- * '\r\n' : '\n'
- * '\r' : '\n'
- * '\n' : '\n'
- * '\x85' : '\n'
- * default : ''
- * </pre>
- */
- private String scanLineBreak() {
- // Transforms:
- // '\r\n' : '\n'
- // '\r' : '\n'
- // '\n' : '\n'
- // '\x85' : '\n'
- // default : ''
- char ch = reader.peek();
- if (ch == '\r' || ch == '\n' || ch == '\u0085') {
- if (ch == '\r' && '\n' == reader.peek(1)) {
- reader.forward(2);
- } else {
- reader.forward();
- }
- return "\n";
- } else if (ch == '\u2028' || ch == '\u2029') {
- reader.forward();
- return String.valueOf(ch);
- }
- return "";
+ public boolean chompTailIsTrue() {
+ return value != null && value;
}
- /**
- * Chomping the tail may have 3 values - yes, no, not defined.
- */
- private static class Chomping {
- private final Boolean value;
- private final int increment;
-
- public Chomping(Boolean value, int increment) {
- this.value = value;
- this.increment = increment;
- }
-
- public boolean chompTailIsNotFalse() {
- return value == null || value;
- }
-
- public boolean chompTailIsTrue() {
- return value != null && value;
- }
-
- public int getIncrement() {
- return increment;
- }
+ public int getIncrement() {
+ return increment;
}
+ }
}