diff --git a/src/main/java/graphql/parser/ParseCancelledTooManyCharsException.java b/src/main/java/graphql/parser/ParseCancelledTooManyCharsException.java new file mode 100644 index 0000000000..cf3d08e591 --- /dev/null +++ b/src/main/java/graphql/parser/ParseCancelledTooManyCharsException.java @@ -0,0 +1,12 @@ +package graphql.parser; + +import graphql.Internal; + +@Internal +public class ParseCancelledTooManyCharsException extends InvalidSyntaxException { + + @Internal + public ParseCancelledTooManyCharsException(String msg, int maxCharacters) { + super(null, msg, null, null, null); + } +} diff --git a/src/main/java/graphql/parser/Parser.java b/src/main/java/graphql/parser/Parser.java index 10e29a686f..a0b9086114 100644 --- a/src/main/java/graphql/parser/Parser.java +++ b/src/main/java/graphql/parser/Parser.java @@ -20,6 +20,7 @@ import org.antlr.v4.runtime.atn.PredictionMode; import org.antlr.v4.runtime.tree.ParseTreeListener; import org.antlr.v4.runtime.tree.TerminalNode; +import org.jetbrains.annotations.NotNull; import java.io.IOException; import java.io.Reader; @@ -28,6 +29,7 @@ import java.util.Optional; import java.util.function.BiConsumer; import java.util.function.BiFunction; +import java.util.function.Consumer; /** * This can parse graphql syntax, both Query syntax and Schema Definition Language (SDL) syntax, into an @@ -35,10 +37,10 @@ *

* You should not generally need to call this class as the {@link graphql.GraphQL} code sets this up for you * but if you are doing specific graphql utilities this class is essential. - * + *

* Graphql syntax has a series of characters, such as spaces, new lines and commas that are not considered relevant * to the syntax. However they can be captured and associated with the AST elements they belong to. - * + *

* This costs more memory but for certain use cases (like editors) this maybe be useful. We have chosen to no capture * ignored characters by default but you can turn this on, either per parse or statically for the whole JVM * via {@link ParserOptions#setDefaultParserOptions(ParserOptions)} ()}} @@ -178,43 +180,18 @@ private Value parseValueImpl(String input) throws InvalidSyntaxException { } private Node parseImpl(Reader reader, BiFunction nodeFunction, ParserOptions parserOptions) throws InvalidSyntaxException { - MultiSourceReader multiSourceReader; - if (reader instanceof MultiSourceReader) { - multiSourceReader = (MultiSourceReader) reader; - } else { - multiSourceReader = MultiSourceReader.newMultiSourceReader() - .reader(reader, null).build(); - } - CodePointCharStream charStream; - try { - charStream = CharStreams.fromReader(multiSourceReader); - } catch (IOException e) { - throw new UncheckedIOException(e); - } + parserOptions = Optional.ofNullable(parserOptions).orElse(ParserOptions.getDefaultParserOptions()); - GraphqlLexer lexer = new GraphqlLexer(charStream); - lexer.removeErrorListeners(); - lexer.addErrorListener(new BaseErrorListener() { - @Override - public void syntaxError(Recognizer recognizer, Object offendingSymbol, int line, int charPositionInLine, String msg, RecognitionException e) { - SourceLocation sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, line, charPositionInLine); - String preview = AntlrHelper.createPreview(multiSourceReader, line); - throw new InvalidSyntaxException(sourceLocation, msg, preview, null, null); - } - }); + MultiSourceReader multiSourceReader = setupMultiSourceReader(reader, parserOptions); - // default in the parser options if they are not set - parserOptions = Optional.ofNullable(parserOptions).orElse(ParserOptions.getDefaultParserOptions()); + SafeTokenReader safeTokenReader = setupSafeTokenReader(parserOptions, multiSourceReader); + + CodePointCharStream charStream = setupCharStream(safeTokenReader); + + GraphqlLexer lexer = setupGraphqlLexer(multiSourceReader, charStream); // this lexer wrapper allows us to stop lexing when too many tokens are in place. This prevents DOS attacks. - int maxTokens = parserOptions.getMaxTokens(); - int maxWhitespaceTokens = parserOptions.getMaxWhitespaceTokens(); - BiConsumer onTooManyTokens = (maxTokenCount, token) -> throwIfTokenProblems( - token, - maxTokenCount, - multiSourceReader, - ParseCancelledException.class); - SafeTokenSource safeTokenSource = new SafeTokenSource(lexer, maxTokens, maxWhitespaceTokens, onTooManyTokens); + SafeTokenSource safeTokenSource = getSafeTokenSource(parserOptions, multiSourceReader, lexer); CommonTokenStream tokens = new CommonTokenStream(safeTokenSource); @@ -258,6 +235,65 @@ public void syntaxError(Recognizer recognizer, Object offendingSymbol, int return node; } + private static MultiSourceReader setupMultiSourceReader(Reader reader, ParserOptions parserOptions) { + MultiSourceReader multiSourceReader; + if (reader instanceof MultiSourceReader) { + multiSourceReader = (MultiSourceReader) reader; + } else { + multiSourceReader = MultiSourceReader.newMultiSourceReader() + .reader(reader, null).build(); + } + return multiSourceReader; + } + + @NotNull + private static SafeTokenReader setupSafeTokenReader(ParserOptions parserOptions, MultiSourceReader multiSourceReader) { + int maxCharacters = parserOptions.getMaxCharacters(); + Consumer onTooManyCharacters = it -> { + String msg = String.format("More than %d characters have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.", maxCharacters); + throw new ParseCancelledTooManyCharsException(msg, maxCharacters); + }; + return new SafeTokenReader(multiSourceReader, maxCharacters, onTooManyCharacters); + } + + @NotNull + private static CodePointCharStream setupCharStream(SafeTokenReader safeTokenReader) { + CodePointCharStream charStream; + try { + charStream = CharStreams.fromReader(safeTokenReader); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + return charStream; + } + + @NotNull + private static GraphqlLexer setupGraphqlLexer(MultiSourceReader multiSourceReader, CodePointCharStream charStream) { + GraphqlLexer lexer = new GraphqlLexer(charStream); + lexer.removeErrorListeners(); + lexer.addErrorListener(new BaseErrorListener() { + @Override + public void syntaxError(Recognizer recognizer, Object offendingSymbol, int line, int charPositionInLine, String msg, RecognitionException e) { + SourceLocation sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, line, charPositionInLine); + String preview = AntlrHelper.createPreview(multiSourceReader, line); + throw new InvalidSyntaxException(sourceLocation, msg, preview, null, null); + } + }); + return lexer; + } + + @NotNull + private SafeTokenSource getSafeTokenSource(ParserOptions parserOptions, MultiSourceReader multiSourceReader, GraphqlLexer lexer) { + int maxTokens = parserOptions.getMaxTokens(); + int maxWhitespaceTokens = parserOptions.getMaxWhitespaceTokens(); + BiConsumer onTooManyTokens = (maxTokenCount, token) -> throwIfTokenProblems( + token, + maxTokenCount, + multiSourceReader, + ParseCancelledException.class); + return new SafeTokenSource(lexer, maxTokens, maxWhitespaceTokens, onTooManyTokens); + } + private void setupParserListener(MultiSourceReader multiSourceReader, GraphqlParser parser, GraphqlAntlrToLanguage toLanguage) { ParserOptions parserOptions = toLanguage.getParserOptions(); ParsingListener parsingListener = parserOptions.getParsingListener(); @@ -338,7 +374,8 @@ private void throwIfTokenProblems(Token token, int maxLimit, MultiSourceReader m throw new ParseCancelledTooDeepException(msg, sourceLocation, offendingToken, maxLimit, tokenType); } String msg = String.format("More than %d %s tokens have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.", maxLimit, tokenType); - throw new ParseCancelledException(msg, sourceLocation, offendingToken); } + throw new ParseCancelledException(msg, sourceLocation, offendingToken); + } /** * Allows you to override the ANTLR to AST code. diff --git a/src/main/java/graphql/parser/ParserOptions.java b/src/main/java/graphql/parser/ParserOptions.java index 00ea843755..2d5da2b4dd 100644 --- a/src/main/java/graphql/parser/ParserOptions.java +++ b/src/main/java/graphql/parser/ParserOptions.java @@ -11,9 +11,20 @@ */ @PublicApi public class ParserOptions { + /** + * A graphql hacking vector is to send nonsensical queries with large tokens that contain a repeated characters + * that burn lots of parsing CPU time and burn memory representing a document that won't ever execute. + * To prevent this for most users, graphql-java sets this value to 1MB. + * ANTLR parsing time is linear to the number of characters presented. The more you + * allow the longer it takes. + *

+ * If you want to allow more, then {@link #setDefaultParserOptions(ParserOptions)} allows you to change this + * JVM wide. + */ + public static final int MAX_QUERY_CHARACTERS = 1024 * 1024; // 1 MB /** - * A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn + * A graphql hacking vector is to send nonsensical queries with lots of tokens that burn lots of parsing CPU time and burn * memory representing a document that won't ever execute. To prevent this for most users, graphql-java * sets this value to 15000. ANTLR parsing time is linear to the number of tokens presented. The more you * allow the longer it takes. @@ -46,6 +57,7 @@ public class ParserOptions { .captureIgnoredChars(false) .captureSourceLocation(true) .captureLineComments(true) + .maxCharacters(MAX_QUERY_CHARACTERS) .maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java .maxWhitespaceTokens(MAX_WHITESPACE_TOKENS) .maxRuleDepth(MAX_RULE_DEPTH) @@ -55,6 +67,7 @@ public class ParserOptions { .captureIgnoredChars(false) .captureSourceLocation(true) .captureLineComments(false) // #comments are not useful in query parsing + .maxCharacters(MAX_QUERY_CHARACTERS) .maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java .maxWhitespaceTokens(MAX_WHITESPACE_TOKENS) .maxRuleDepth(MAX_RULE_DEPTH) @@ -64,6 +77,7 @@ public class ParserOptions { .captureIgnoredChars(false) .captureSourceLocation(true) .captureLineComments(true) // #comments are useful in SDL parsing + .maxCharacters(Integer.MAX_VALUE) .maxTokens(Integer.MAX_VALUE) // we are less worried about a billion laughs with SDL parsing since the call path is not facing attackers .maxWhitespaceTokens(Integer.MAX_VALUE) .maxRuleDepth(Integer.MAX_VALUE) @@ -167,6 +181,7 @@ public static void setDefaultSdlParserOptions(ParserOptions options) { private final boolean captureIgnoredChars; private final boolean captureSourceLocation; private final boolean captureLineComments; + private final int maxCharacters; private final int maxTokens; private final int maxWhitespaceTokens; private final int maxRuleDepth; @@ -176,6 +191,7 @@ private ParserOptions(Builder builder) { this.captureIgnoredChars = builder.captureIgnoredChars; this.captureSourceLocation = builder.captureSourceLocation; this.captureLineComments = builder.captureLineComments; + this.maxCharacters = builder.maxCharacters; this.maxTokens = builder.maxTokens; this.maxWhitespaceTokens = builder.maxWhitespaceTokens; this.maxRuleDepth = builder.maxRuleDepth; @@ -219,6 +235,18 @@ public boolean isCaptureLineComments() { return captureLineComments; } + /** + * A graphql hacking vector is to send nonsensical queries that contain a repeated characters that burn lots of parsing CPU time and burn + * memory representing a document that won't ever execute. To prevent this for most users, graphql-java + * sets this value to 1MB. + * + * @return the maximum number of characters the parser will accept, after which an exception will be thrown. + */ + public int getMaxCharacters() { + return maxCharacters; + } + + /** * A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burns * memory representing a document that won't ever execute. To prevent this you can set a maximum number of parse @@ -272,6 +300,7 @@ public static class Builder { private boolean captureSourceLocation = true; private boolean captureLineComments = true; private ParsingListener parsingListener = ParsingListener.NOOP; + private int maxCharacters = MAX_QUERY_CHARACTERS; private int maxTokens = MAX_QUERY_TOKENS; private int maxWhitespaceTokens = MAX_WHITESPACE_TOKENS; private int maxRuleDepth = MAX_RULE_DEPTH; @@ -283,6 +312,7 @@ public static class Builder { this.captureIgnoredChars = parserOptions.captureIgnoredChars; this.captureSourceLocation = parserOptions.captureSourceLocation; this.captureLineComments = parserOptions.captureLineComments; + this.maxCharacters = parserOptions.maxCharacters; this.maxTokens = parserOptions.maxTokens; this.maxWhitespaceTokens = parserOptions.maxWhitespaceTokens; this.maxRuleDepth = parserOptions.maxRuleDepth; @@ -304,6 +334,11 @@ public Builder captureLineComments(boolean captureLineComments) { return this; } + public Builder maxCharacters(int maxCharacters) { + this.maxCharacters = maxCharacters; + return this; + } + public Builder maxTokens(int maxTokens) { this.maxTokens = maxTokens; return this; diff --git a/src/main/java/graphql/parser/SafeTokenReader.java b/src/main/java/graphql/parser/SafeTokenReader.java new file mode 100644 index 0000000000..be102be0d2 --- /dev/null +++ b/src/main/java/graphql/parser/SafeTokenReader.java @@ -0,0 +1,95 @@ +package graphql.parser; + +import graphql.Internal; +import org.jetbrains.annotations.NotNull; + +import java.io.IOException; +import java.io.Reader; +import java.nio.CharBuffer; +import java.util.function.Consumer; + +/** + * This reader will only emit a maximum number of characters from it. This is used to protect us from evil input. + *

+ * If a graphql system does not have some max HTTP input limit, then this will help protect the system. This is a limit + * of last resort. Ideally the http input should be limited, but if its not, we have this. + */ +@Internal +public class SafeTokenReader extends Reader { + + private final Reader delegate; + private final int maxCharacters; + private final Consumer whenMaxCharactersExceeded; + private int count; + + public SafeTokenReader(Reader delegate, int maxCharacters, Consumer whenMaxCharactersExceeded) { + this.delegate = delegate; + this.maxCharacters = maxCharacters; + this.whenMaxCharactersExceeded = whenMaxCharactersExceeded; + count = 0; + } + + private int checkHowMany(int read, int howMany) { + if (read != -1) { + count += howMany; + if (count > maxCharacters) { + whenMaxCharactersExceeded.accept(maxCharacters); + } + } + return read; + } + + @Override + public int read(char @NotNull [] buff, int off, int len) throws IOException { + int howMany = delegate.read(buff, off, len); + return checkHowMany(howMany, howMany); + } + + @Override + public int read() throws IOException { + int ch = delegate.read(); + return checkHowMany(ch, 1); + } + + @Override + public int read(@NotNull CharBuffer target) throws IOException { + int howMany = delegate.read(target); + return checkHowMany(howMany, howMany); + } + + @Override + public int read( char @NotNull [] buff) throws IOException { + int howMany = delegate.read(buff); + return checkHowMany(howMany, howMany); + } + + @Override + public void close() throws IOException { + delegate.close(); + } + + @Override + public long skip(long n) throws IOException { + return delegate.skip(n); + } + + @Override + public boolean ready() throws IOException { + return delegate.ready(); + } + + @Override + public boolean markSupported() { + return delegate.markSupported(); + } + + @Override + public void mark(int readAheadLimit) throws IOException { + delegate.mark(readAheadLimit); + } + + @Override + public void reset() throws IOException { + delegate.reset(); + } +} diff --git a/src/test/groovy/graphql/parser/ParserOptionsTest.groovy b/src/test/groovy/graphql/parser/ParserOptionsTest.groovy index 5867b181fc..6a0937ff5c 100644 --- a/src/test/groovy/graphql/parser/ParserOptionsTest.groovy +++ b/src/test/groovy/graphql/parser/ParserOptionsTest.groovy @@ -7,6 +7,8 @@ class ParserOptionsTest extends Specification { static defaultOperationOptions = ParserOptions.getDefaultOperationParserOptions() static defaultSdlOptions = ParserOptions.getDefaultSdlParserOptions() + static final int ONE_MB = 1024 * 1024 + void setup() { ParserOptions.setDefaultParserOptions(defaultOptions) ParserOptions.setDefaultOperationParserOptions(defaultOperationOptions) @@ -21,6 +23,7 @@ class ParserOptionsTest extends Specification { def "lock in default settings"() { expect: + defaultOptions.getMaxCharacters() == ONE_MB defaultOptions.getMaxTokens() == 15_000 defaultOptions.getMaxWhitespaceTokens() == 200_000 defaultOptions.isCaptureSourceLocation() @@ -33,6 +36,7 @@ class ParserOptionsTest extends Specification { !defaultOperationOptions.isCaptureLineComments() !defaultOperationOptions.isCaptureIgnoredChars() + defaultSdlOptions.getMaxCharacters() == Integer.MAX_VALUE defaultSdlOptions.getMaxTokens() == Integer.MAX_VALUE defaultSdlOptions.getMaxWhitespaceTokens() == Integer.MAX_VALUE defaultSdlOptions.isCaptureSourceLocation() @@ -41,11 +45,22 @@ class ParserOptionsTest extends Specification { } def "can set in new option JVM wide"() { - def newDefaultOptions = defaultOptions.transform({ it.captureIgnoredChars(true) }) + def newDefaultOptions = defaultOptions.transform({ + it.captureIgnoredChars(true) + }) def newDefaultOperationOptions = defaultOperationOptions.transform( - { it.captureIgnoredChars(true).captureLineComments(true).maxWhitespaceTokens(300_000) }) + { + it.captureIgnoredChars(true) + .captureLineComments(true) + .maxCharacters(1_000_000) + .maxWhitespaceTokens(300_000) + }) def newDefaultSDlOptions = defaultSdlOptions.transform( - { it.captureIgnoredChars(true).captureLineComments(true).maxWhitespaceTokens(300_000) }) + { + it.captureIgnoredChars(true) + .captureLineComments(true) + .maxWhitespaceTokens(300_000) + }) when: ParserOptions.setDefaultParserOptions(newDefaultOptions) @@ -58,18 +73,21 @@ class ParserOptionsTest extends Specification { then: + currentDefaultOptions.getMaxCharacters() == ONE_MB currentDefaultOptions.getMaxTokens() == 15_000 currentDefaultOptions.getMaxWhitespaceTokens() == 200_000 currentDefaultOptions.isCaptureSourceLocation() currentDefaultOptions.isCaptureLineComments() currentDefaultOptions.isCaptureIgnoredChars() + currentDefaultOperationOptions.getMaxCharacters() == 1_000_000 currentDefaultOperationOptions.getMaxTokens() == 15_000 currentDefaultOperationOptions.getMaxWhitespaceTokens() == 300_000 currentDefaultOperationOptions.isCaptureSourceLocation() currentDefaultOperationOptions.isCaptureLineComments() currentDefaultOperationOptions.isCaptureIgnoredChars() + currentDefaultSdlOptions.getMaxCharacters() == Integer.MAX_VALUE currentDefaultSdlOptions.getMaxTokens() == Integer.MAX_VALUE currentDefaultSdlOptions.getMaxWhitespaceTokens() == 300_000 currentDefaultSdlOptions.isCaptureSourceLocation() diff --git a/src/test/groovy/graphql/parser/ParserStressTest.groovy b/src/test/groovy/graphql/parser/ParserStressTest.groovy index 9eeade2eac..1c1f252d2d 100644 --- a/src/test/groovy/graphql/parser/ParserStressTest.groovy +++ b/src/test/groovy/graphql/parser/ParserStressTest.groovy @@ -149,6 +149,28 @@ class ParserStressTest extends Specification { thrown(ParseCancelledException) // too many tokens will catch this wide queries } + def "large single token attack parse can be prevented"() { + String text = "q" * 10_000_000 + text = "query " + text + " {f}" + + when: + new Parser().parseDocument(text, defaultOperationOptions) + + then: + thrown(ParseCancelledTooManyCharsException) + } + + def "inside limits single token attack parse will be accepted"() { + String text = "q" * 900_000 + text = "query " + text + " {f}" + + when: + def document = new Parser().parseDocument(text, defaultOperationOptions) + + then: + document != null // its parsed - its invalid of course but parsed + } + String mkDeepQuery(int howMany) { def field = 'f(a:"")' StringBuilder sb = new StringBuilder("query q{") diff --git a/src/test/groovy/graphql/parser/SafeTokenReaderTest.groovy b/src/test/groovy/graphql/parser/SafeTokenReaderTest.groovy new file mode 100644 index 0000000000..e96fe93b9c --- /dev/null +++ b/src/test/groovy/graphql/parser/SafeTokenReaderTest.groovy @@ -0,0 +1,18 @@ +package graphql.parser + +import spock.lang.Specification + +class SafeTokenReaderTest extends Specification { + + def "will count how many its read and stop after max"() { + when: + StringReader sr = new StringReader("0123456789") + SafeTokenReader safeReader = new SafeTokenReader(sr, 5, + { Integer maxChars -> throw new RuntimeException("max " + maxChars) }) + safeReader.readLine() + + then: + def rte = thrown(RuntimeException) + rte.message == "max 5" + } +}