diff --git a/src/main/java/graphql/parser/ParseCancelledTooManyCharsException.java b/src/main/java/graphql/parser/ParseCancelledTooManyCharsException.java
new file mode 100644
index 0000000000..cf3d08e591
--- /dev/null
+++ b/src/main/java/graphql/parser/ParseCancelledTooManyCharsException.java
@@ -0,0 +1,12 @@
+package graphql.parser;
+
+import graphql.Internal;
+
+@Internal
+public class ParseCancelledTooManyCharsException extends InvalidSyntaxException {
+
+ @Internal
+ public ParseCancelledTooManyCharsException(String msg, int maxCharacters) {
+ super(null, msg, null, null, null);
+ }
+}
diff --git a/src/main/java/graphql/parser/Parser.java b/src/main/java/graphql/parser/Parser.java
index 10e29a686f..a0b9086114 100644
--- a/src/main/java/graphql/parser/Parser.java
+++ b/src/main/java/graphql/parser/Parser.java
@@ -20,6 +20,7 @@
import org.antlr.v4.runtime.atn.PredictionMode;
import org.antlr.v4.runtime.tree.ParseTreeListener;
import org.antlr.v4.runtime.tree.TerminalNode;
+import org.jetbrains.annotations.NotNull;
import java.io.IOException;
import java.io.Reader;
@@ -28,6 +29,7 @@
import java.util.Optional;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
+import java.util.function.Consumer;
/**
* This can parse graphql syntax, both Query syntax and Schema Definition Language (SDL) syntax, into an
@@ -35,10 +37,10 @@
*
* You should not generally need to call this class as the {@link graphql.GraphQL} code sets this up for you
* but if you are doing specific graphql utilities this class is essential.
- *
+ *
* Graphql syntax has a series of characters, such as spaces, new lines and commas that are not considered relevant
* to the syntax. However they can be captured and associated with the AST elements they belong to.
- *
+ *
* This costs more memory but for certain use cases (like editors) this maybe be useful. We have chosen to no capture
* ignored characters by default but you can turn this on, either per parse or statically for the whole JVM
* via {@link ParserOptions#setDefaultParserOptions(ParserOptions)} ()}}
@@ -178,43 +180,18 @@ private Value> parseValueImpl(String input) throws InvalidSyntaxException {
}
private Node> parseImpl(Reader reader, BiFunction nodeFunction, ParserOptions parserOptions) throws InvalidSyntaxException {
- MultiSourceReader multiSourceReader;
- if (reader instanceof MultiSourceReader) {
- multiSourceReader = (MultiSourceReader) reader;
- } else {
- multiSourceReader = MultiSourceReader.newMultiSourceReader()
- .reader(reader, null).build();
- }
- CodePointCharStream charStream;
- try {
- charStream = CharStreams.fromReader(multiSourceReader);
- } catch (IOException e) {
- throw new UncheckedIOException(e);
- }
+ parserOptions = Optional.ofNullable(parserOptions).orElse(ParserOptions.getDefaultParserOptions());
- GraphqlLexer lexer = new GraphqlLexer(charStream);
- lexer.removeErrorListeners();
- lexer.addErrorListener(new BaseErrorListener() {
- @Override
- public void syntaxError(Recognizer, ?> recognizer, Object offendingSymbol, int line, int charPositionInLine, String msg, RecognitionException e) {
- SourceLocation sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, line, charPositionInLine);
- String preview = AntlrHelper.createPreview(multiSourceReader, line);
- throw new InvalidSyntaxException(sourceLocation, msg, preview, null, null);
- }
- });
+ MultiSourceReader multiSourceReader = setupMultiSourceReader(reader, parserOptions);
- // default in the parser options if they are not set
- parserOptions = Optional.ofNullable(parserOptions).orElse(ParserOptions.getDefaultParserOptions());
+ SafeTokenReader safeTokenReader = setupSafeTokenReader(parserOptions, multiSourceReader);
+
+ CodePointCharStream charStream = setupCharStream(safeTokenReader);
+
+ GraphqlLexer lexer = setupGraphqlLexer(multiSourceReader, charStream);
// this lexer wrapper allows us to stop lexing when too many tokens are in place. This prevents DOS attacks.
- int maxTokens = parserOptions.getMaxTokens();
- int maxWhitespaceTokens = parserOptions.getMaxWhitespaceTokens();
- BiConsumer onTooManyTokens = (maxTokenCount, token) -> throwIfTokenProblems(
- token,
- maxTokenCount,
- multiSourceReader,
- ParseCancelledException.class);
- SafeTokenSource safeTokenSource = new SafeTokenSource(lexer, maxTokens, maxWhitespaceTokens, onTooManyTokens);
+ SafeTokenSource safeTokenSource = getSafeTokenSource(parserOptions, multiSourceReader, lexer);
CommonTokenStream tokens = new CommonTokenStream(safeTokenSource);
@@ -258,6 +235,65 @@ public void syntaxError(Recognizer, ?> recognizer, Object offendingSymbol, int
return node;
}
+ private static MultiSourceReader setupMultiSourceReader(Reader reader, ParserOptions parserOptions) {
+ MultiSourceReader multiSourceReader;
+ if (reader instanceof MultiSourceReader) {
+ multiSourceReader = (MultiSourceReader) reader;
+ } else {
+ multiSourceReader = MultiSourceReader.newMultiSourceReader()
+ .reader(reader, null).build();
+ }
+ return multiSourceReader;
+ }
+
+ @NotNull
+ private static SafeTokenReader setupSafeTokenReader(ParserOptions parserOptions, MultiSourceReader multiSourceReader) {
+ int maxCharacters = parserOptions.getMaxCharacters();
+ Consumer onTooManyCharacters = it -> {
+ String msg = String.format("More than %d characters have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.", maxCharacters);
+ throw new ParseCancelledTooManyCharsException(msg, maxCharacters);
+ };
+ return new SafeTokenReader(multiSourceReader, maxCharacters, onTooManyCharacters);
+ }
+
+ @NotNull
+ private static CodePointCharStream setupCharStream(SafeTokenReader safeTokenReader) {
+ CodePointCharStream charStream;
+ try {
+ charStream = CharStreams.fromReader(safeTokenReader);
+ } catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+ return charStream;
+ }
+
+ @NotNull
+ private static GraphqlLexer setupGraphqlLexer(MultiSourceReader multiSourceReader, CodePointCharStream charStream) {
+ GraphqlLexer lexer = new GraphqlLexer(charStream);
+ lexer.removeErrorListeners();
+ lexer.addErrorListener(new BaseErrorListener() {
+ @Override
+ public void syntaxError(Recognizer, ?> recognizer, Object offendingSymbol, int line, int charPositionInLine, String msg, RecognitionException e) {
+ SourceLocation sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, line, charPositionInLine);
+ String preview = AntlrHelper.createPreview(multiSourceReader, line);
+ throw new InvalidSyntaxException(sourceLocation, msg, preview, null, null);
+ }
+ });
+ return lexer;
+ }
+
+ @NotNull
+ private SafeTokenSource getSafeTokenSource(ParserOptions parserOptions, MultiSourceReader multiSourceReader, GraphqlLexer lexer) {
+ int maxTokens = parserOptions.getMaxTokens();
+ int maxWhitespaceTokens = parserOptions.getMaxWhitespaceTokens();
+ BiConsumer onTooManyTokens = (maxTokenCount, token) -> throwIfTokenProblems(
+ token,
+ maxTokenCount,
+ multiSourceReader,
+ ParseCancelledException.class);
+ return new SafeTokenSource(lexer, maxTokens, maxWhitespaceTokens, onTooManyTokens);
+ }
+
private void setupParserListener(MultiSourceReader multiSourceReader, GraphqlParser parser, GraphqlAntlrToLanguage toLanguage) {
ParserOptions parserOptions = toLanguage.getParserOptions();
ParsingListener parsingListener = parserOptions.getParsingListener();
@@ -338,7 +374,8 @@ private void throwIfTokenProblems(Token token, int maxLimit, MultiSourceReader m
throw new ParseCancelledTooDeepException(msg, sourceLocation, offendingToken, maxLimit, tokenType);
}
String msg = String.format("More than %d %s tokens have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.", maxLimit, tokenType);
- throw new ParseCancelledException(msg, sourceLocation, offendingToken); }
+ throw new ParseCancelledException(msg, sourceLocation, offendingToken);
+ }
/**
* Allows you to override the ANTLR to AST code.
diff --git a/src/main/java/graphql/parser/ParserOptions.java b/src/main/java/graphql/parser/ParserOptions.java
index 00ea843755..2d5da2b4dd 100644
--- a/src/main/java/graphql/parser/ParserOptions.java
+++ b/src/main/java/graphql/parser/ParserOptions.java
@@ -11,9 +11,20 @@
*/
@PublicApi
public class ParserOptions {
+ /**
+ * A graphql hacking vector is to send nonsensical queries with large tokens that contain a repeated characters
+ * that burn lots of parsing CPU time and burn memory representing a document that won't ever execute.
+ * To prevent this for most users, graphql-java sets this value to 1MB.
+ * ANTLR parsing time is linear to the number of characters presented. The more you
+ * allow the longer it takes.
+ *
+ * If you want to allow more, then {@link #setDefaultParserOptions(ParserOptions)} allows you to change this
+ * JVM wide.
+ */
+ public static final int MAX_QUERY_CHARACTERS = 1024 * 1024; // 1 MB
/**
- * A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn
+ * A graphql hacking vector is to send nonsensical queries with lots of tokens that burn lots of parsing CPU time and burn
* memory representing a document that won't ever execute. To prevent this for most users, graphql-java
* sets this value to 15000. ANTLR parsing time is linear to the number of tokens presented. The more you
* allow the longer it takes.
@@ -46,6 +57,7 @@ public class ParserOptions {
.captureIgnoredChars(false)
.captureSourceLocation(true)
.captureLineComments(true)
+ .maxCharacters(MAX_QUERY_CHARACTERS)
.maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java
.maxWhitespaceTokens(MAX_WHITESPACE_TOKENS)
.maxRuleDepth(MAX_RULE_DEPTH)
@@ -55,6 +67,7 @@ public class ParserOptions {
.captureIgnoredChars(false)
.captureSourceLocation(true)
.captureLineComments(false) // #comments are not useful in query parsing
+ .maxCharacters(MAX_QUERY_CHARACTERS)
.maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java
.maxWhitespaceTokens(MAX_WHITESPACE_TOKENS)
.maxRuleDepth(MAX_RULE_DEPTH)
@@ -64,6 +77,7 @@ public class ParserOptions {
.captureIgnoredChars(false)
.captureSourceLocation(true)
.captureLineComments(true) // #comments are useful in SDL parsing
+ .maxCharacters(Integer.MAX_VALUE)
.maxTokens(Integer.MAX_VALUE) // we are less worried about a billion laughs with SDL parsing since the call path is not facing attackers
.maxWhitespaceTokens(Integer.MAX_VALUE)
.maxRuleDepth(Integer.MAX_VALUE)
@@ -167,6 +181,7 @@ public static void setDefaultSdlParserOptions(ParserOptions options) {
private final boolean captureIgnoredChars;
private final boolean captureSourceLocation;
private final boolean captureLineComments;
+ private final int maxCharacters;
private final int maxTokens;
private final int maxWhitespaceTokens;
private final int maxRuleDepth;
@@ -176,6 +191,7 @@ private ParserOptions(Builder builder) {
this.captureIgnoredChars = builder.captureIgnoredChars;
this.captureSourceLocation = builder.captureSourceLocation;
this.captureLineComments = builder.captureLineComments;
+ this.maxCharacters = builder.maxCharacters;
this.maxTokens = builder.maxTokens;
this.maxWhitespaceTokens = builder.maxWhitespaceTokens;
this.maxRuleDepth = builder.maxRuleDepth;
@@ -219,6 +235,18 @@ public boolean isCaptureLineComments() {
return captureLineComments;
}
+ /**
+ * A graphql hacking vector is to send nonsensical queries that contain a repeated characters that burn lots of parsing CPU time and burn
+ * memory representing a document that won't ever execute. To prevent this for most users, graphql-java
+ * sets this value to 1MB.
+ *
+ * @return the maximum number of characters the parser will accept, after which an exception will be thrown.
+ */
+ public int getMaxCharacters() {
+ return maxCharacters;
+ }
+
+
/**
* A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burns
* memory representing a document that won't ever execute. To prevent this you can set a maximum number of parse
@@ -272,6 +300,7 @@ public static class Builder {
private boolean captureSourceLocation = true;
private boolean captureLineComments = true;
private ParsingListener parsingListener = ParsingListener.NOOP;
+ private int maxCharacters = MAX_QUERY_CHARACTERS;
private int maxTokens = MAX_QUERY_TOKENS;
private int maxWhitespaceTokens = MAX_WHITESPACE_TOKENS;
private int maxRuleDepth = MAX_RULE_DEPTH;
@@ -283,6 +312,7 @@ public static class Builder {
this.captureIgnoredChars = parserOptions.captureIgnoredChars;
this.captureSourceLocation = parserOptions.captureSourceLocation;
this.captureLineComments = parserOptions.captureLineComments;
+ this.maxCharacters = parserOptions.maxCharacters;
this.maxTokens = parserOptions.maxTokens;
this.maxWhitespaceTokens = parserOptions.maxWhitespaceTokens;
this.maxRuleDepth = parserOptions.maxRuleDepth;
@@ -304,6 +334,11 @@ public Builder captureLineComments(boolean captureLineComments) {
return this;
}
+ public Builder maxCharacters(int maxCharacters) {
+ this.maxCharacters = maxCharacters;
+ return this;
+ }
+
public Builder maxTokens(int maxTokens) {
this.maxTokens = maxTokens;
return this;
diff --git a/src/main/java/graphql/parser/SafeTokenReader.java b/src/main/java/graphql/parser/SafeTokenReader.java
new file mode 100644
index 0000000000..be102be0d2
--- /dev/null
+++ b/src/main/java/graphql/parser/SafeTokenReader.java
@@ -0,0 +1,95 @@
+package graphql.parser;
+
+import graphql.Internal;
+import org.jetbrains.annotations.NotNull;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.nio.CharBuffer;
+import java.util.function.Consumer;
+
+/**
+ * This reader will only emit a maximum number of characters from it. This is used to protect us from evil input.
+ *
+ * If a graphql system does not have some max HTTP input limit, then this will help protect the system. This is a limit
+ * of last resort. Ideally the http input should be limited, but if its not, we have this.
+ */
+@Internal
+public class SafeTokenReader extends Reader {
+
+ private final Reader delegate;
+ private final int maxCharacters;
+ private final Consumer whenMaxCharactersExceeded;
+ private int count;
+
+ public SafeTokenReader(Reader delegate, int maxCharacters, Consumer whenMaxCharactersExceeded) {
+ this.delegate = delegate;
+ this.maxCharacters = maxCharacters;
+ this.whenMaxCharactersExceeded = whenMaxCharactersExceeded;
+ count = 0;
+ }
+
+ private int checkHowMany(int read, int howMany) {
+ if (read != -1) {
+ count += howMany;
+ if (count > maxCharacters) {
+ whenMaxCharactersExceeded.accept(maxCharacters);
+ }
+ }
+ return read;
+ }
+
+ @Override
+ public int read(char @NotNull [] buff, int off, int len) throws IOException {
+ int howMany = delegate.read(buff, off, len);
+ return checkHowMany(howMany, howMany);
+ }
+
+ @Override
+ public int read() throws IOException {
+ int ch = delegate.read();
+ return checkHowMany(ch, 1);
+ }
+
+ @Override
+ public int read(@NotNull CharBuffer target) throws IOException {
+ int howMany = delegate.read(target);
+ return checkHowMany(howMany, howMany);
+ }
+
+ @Override
+ public int read( char @NotNull [] buff) throws IOException {
+ int howMany = delegate.read(buff);
+ return checkHowMany(howMany, howMany);
+ }
+
+ @Override
+ public void close() throws IOException {
+ delegate.close();
+ }
+
+ @Override
+ public long skip(long n) throws IOException {
+ return delegate.skip(n);
+ }
+
+ @Override
+ public boolean ready() throws IOException {
+ return delegate.ready();
+ }
+
+ @Override
+ public boolean markSupported() {
+ return delegate.markSupported();
+ }
+
+ @Override
+ public void mark(int readAheadLimit) throws IOException {
+ delegate.mark(readAheadLimit);
+ }
+
+ @Override
+ public void reset() throws IOException {
+ delegate.reset();
+ }
+}
diff --git a/src/test/groovy/graphql/parser/ParserOptionsTest.groovy b/src/test/groovy/graphql/parser/ParserOptionsTest.groovy
index 5867b181fc..6a0937ff5c 100644
--- a/src/test/groovy/graphql/parser/ParserOptionsTest.groovy
+++ b/src/test/groovy/graphql/parser/ParserOptionsTest.groovy
@@ -7,6 +7,8 @@ class ParserOptionsTest extends Specification {
static defaultOperationOptions = ParserOptions.getDefaultOperationParserOptions()
static defaultSdlOptions = ParserOptions.getDefaultSdlParserOptions()
+ static final int ONE_MB = 1024 * 1024
+
void setup() {
ParserOptions.setDefaultParserOptions(defaultOptions)
ParserOptions.setDefaultOperationParserOptions(defaultOperationOptions)
@@ -21,6 +23,7 @@ class ParserOptionsTest extends Specification {
def "lock in default settings"() {
expect:
+ defaultOptions.getMaxCharacters() == ONE_MB
defaultOptions.getMaxTokens() == 15_000
defaultOptions.getMaxWhitespaceTokens() == 200_000
defaultOptions.isCaptureSourceLocation()
@@ -33,6 +36,7 @@ class ParserOptionsTest extends Specification {
!defaultOperationOptions.isCaptureLineComments()
!defaultOperationOptions.isCaptureIgnoredChars()
+ defaultSdlOptions.getMaxCharacters() == Integer.MAX_VALUE
defaultSdlOptions.getMaxTokens() == Integer.MAX_VALUE
defaultSdlOptions.getMaxWhitespaceTokens() == Integer.MAX_VALUE
defaultSdlOptions.isCaptureSourceLocation()
@@ -41,11 +45,22 @@ class ParserOptionsTest extends Specification {
}
def "can set in new option JVM wide"() {
- def newDefaultOptions = defaultOptions.transform({ it.captureIgnoredChars(true) })
+ def newDefaultOptions = defaultOptions.transform({
+ it.captureIgnoredChars(true)
+ })
def newDefaultOperationOptions = defaultOperationOptions.transform(
- { it.captureIgnoredChars(true).captureLineComments(true).maxWhitespaceTokens(300_000) })
+ {
+ it.captureIgnoredChars(true)
+ .captureLineComments(true)
+ .maxCharacters(1_000_000)
+ .maxWhitespaceTokens(300_000)
+ })
def newDefaultSDlOptions = defaultSdlOptions.transform(
- { it.captureIgnoredChars(true).captureLineComments(true).maxWhitespaceTokens(300_000) })
+ {
+ it.captureIgnoredChars(true)
+ .captureLineComments(true)
+ .maxWhitespaceTokens(300_000)
+ })
when:
ParserOptions.setDefaultParserOptions(newDefaultOptions)
@@ -58,18 +73,21 @@ class ParserOptionsTest extends Specification {
then:
+ currentDefaultOptions.getMaxCharacters() == ONE_MB
currentDefaultOptions.getMaxTokens() == 15_000
currentDefaultOptions.getMaxWhitespaceTokens() == 200_000
currentDefaultOptions.isCaptureSourceLocation()
currentDefaultOptions.isCaptureLineComments()
currentDefaultOptions.isCaptureIgnoredChars()
+ currentDefaultOperationOptions.getMaxCharacters() == 1_000_000
currentDefaultOperationOptions.getMaxTokens() == 15_000
currentDefaultOperationOptions.getMaxWhitespaceTokens() == 300_000
currentDefaultOperationOptions.isCaptureSourceLocation()
currentDefaultOperationOptions.isCaptureLineComments()
currentDefaultOperationOptions.isCaptureIgnoredChars()
+ currentDefaultSdlOptions.getMaxCharacters() == Integer.MAX_VALUE
currentDefaultSdlOptions.getMaxTokens() == Integer.MAX_VALUE
currentDefaultSdlOptions.getMaxWhitespaceTokens() == 300_000
currentDefaultSdlOptions.isCaptureSourceLocation()
diff --git a/src/test/groovy/graphql/parser/ParserStressTest.groovy b/src/test/groovy/graphql/parser/ParserStressTest.groovy
index 9eeade2eac..1c1f252d2d 100644
--- a/src/test/groovy/graphql/parser/ParserStressTest.groovy
+++ b/src/test/groovy/graphql/parser/ParserStressTest.groovy
@@ -149,6 +149,28 @@ class ParserStressTest extends Specification {
thrown(ParseCancelledException) // too many tokens will catch this wide queries
}
+ def "large single token attack parse can be prevented"() {
+ String text = "q" * 10_000_000
+ text = "query " + text + " {f}"
+
+ when:
+ new Parser().parseDocument(text, defaultOperationOptions)
+
+ then:
+ thrown(ParseCancelledTooManyCharsException)
+ }
+
+ def "inside limits single token attack parse will be accepted"() {
+ String text = "q" * 900_000
+ text = "query " + text + " {f}"
+
+ when:
+ def document = new Parser().parseDocument(text, defaultOperationOptions)
+
+ then:
+ document != null // its parsed - its invalid of course but parsed
+ }
+
String mkDeepQuery(int howMany) {
def field = 'f(a:"")'
StringBuilder sb = new StringBuilder("query q{")
diff --git a/src/test/groovy/graphql/parser/SafeTokenReaderTest.groovy b/src/test/groovy/graphql/parser/SafeTokenReaderTest.groovy
new file mode 100644
index 0000000000..e96fe93b9c
--- /dev/null
+++ b/src/test/groovy/graphql/parser/SafeTokenReaderTest.groovy
@@ -0,0 +1,18 @@
+package graphql.parser
+
+import spock.lang.Specification
+
+class SafeTokenReaderTest extends Specification {
+
+ def "will count how many its read and stop after max"() {
+ when:
+ StringReader sr = new StringReader("0123456789")
+ SafeTokenReader safeReader = new SafeTokenReader(sr, 5,
+ { Integer maxChars -> throw new RuntimeException("max " + maxChars) })
+ safeReader.readLine()
+
+ then:
+ def rte = thrown(RuntimeException)
+ rte.message == "max 5"
+ }
+}