diff --git a/examples/python-grammar-tests/Cpython-tests/.gitignore b/examples/python-grammar-tests/Cpython-tests/.gitignore new file mode 100644 index 000000000..f104652b6 --- /dev/null +++ b/examples/python-grammar-tests/Cpython-tests/.gitignore @@ -0,0 +1 @@ +*.py diff --git a/examples/python-grammar-tests/all_parse_succesfully.py b/examples/python-grammar-tests/all_parse_succesfully.py new file mode 100644 index 000000000..3669da102 --- /dev/null +++ b/examples/python-grammar-tests/all_parse_succesfully.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +import logging +import os + +from lark import Lark, UnexpectedInput, logger +from lark.indenter import PythonIndenter +from pathlib import Path +logger.setLevel(logging.DEBUG) +python_parser3 = Lark.open_from_package('lark', 'python.lark', ['grammars'], + parser='lalr', postlex=PythonIndenter(), start=['file_input', 'single_input', 'eval_input'], debug=True) +# python_parser3.parse('def f(it, *varargs, **kwargs):\n return list(it)\n\n\n', start="single_input") + +for file in (Path(__file__).parent / "Cpython-tests").glob("*.py"): + try: + tree = python_parser3.parse(file.read_text(encoding="utf-8"), start="file_input") + except UnexpectedInput as e: + print(f'File "{file}", line {e.line}') + print(f"{e.__class__.__qualname__}: {str(e)}") diff --git a/examples/python-grammar-tests/get_std_tests.py b/examples/python-grammar-tests/get_std_tests.py new file mode 100644 index 000000000..63fb60b72 --- /dev/null +++ b/examples/python-grammar-tests/get_std_tests.py @@ -0,0 +1,45 @@ +""" +Downloads the three test files from the Cpython repo for their parser. +These are then analyzed, preprocessed and then run by other scripts in this folder +""" +import urllib.request +import os + +files = { + "Lib/test/test_grammar.py": ["test_with_statement"], # List of function names to comment out + "Lib/test/test_syntax.py": [], + "Lib/test/test_exceptions.py": [], + "Lib/test/test_patma.py": [], + "Lib/test/test_pep646_syntax.py": [], +} + +url_template = "https://raw.githubusercontent.com/python/cpython/main/{}" +file_template = f"{os.path.dirname(__file__)}/CPython-tests/{{}}" + +for filename in files: + file = file_template.format(filename.rpartition("/")[2]) + print(file) + urllib.request.urlretrieve( + url_template.format(filename), + file + ) + if files[filename]: + with open(file, "r+", encoding="utf-8") as f: + out = [] + commenting_out = None + f.seek(0) + for line in f.readlines(): + if any(name in line for name in files[filename]): + commenting_out = line[:line.index("def")] + ' ' + out.append(f"# {line}") + continue + if commenting_out is not None and ( + line.startswith(commenting_out) or + line.strip() == '' or + line.strip().startswith('#')): + out.append(f"# {line}") + else: + commenting_out = None + out.append(line) + f.seek(0) + f.writelines(out) diff --git a/examples/python-grammar-tests/semi_run_doctests.py b/examples/python-grammar-tests/semi_run_doctests.py new file mode 100644 index 000000000..dc8ce7c49 --- /dev/null +++ b/examples/python-grammar-tests/semi_run_doctests.py @@ -0,0 +1,69 @@ +from __future__ import annotations +from __future__ import annotations + +import logging +import os +import doctest + +from lark import Lark, UnexpectedInput, logger +from lark.indenter import PythonIndenter +from pathlib import Path +logger.setLevel(logging.DEBUG) +python_parser3 = Lark.open_from_package('lark', 'python.lark', ['grammars'], + parser='lalr', postlex=PythonIndenter(), start=['file_input', 'single_input', 'eval_input'], debug=True) + +no_print = { + "fp": lambda *args, **kwargs: None, + "fn": lambda *args, **kwargs: None, + + "tp": lambda *args, **kwargs: None, + "tn": lambda *args, **kwargs: None +} + + +print_all = { + "fp": print, + "fn": print, + + "tp": print, + "tn": print +} + +for file in (Path(__file__).parent / "Cpython-tests").glob("*.py"): + text = file.read_text(encoding="utf-8") + if "import doctest" in text: + doc_parser = doctest.DocTestParser() + docstring = next(t.value for t in python_parser3.lex(text) if "STRING" in t.type) + examples = doc_parser.get_examples(eval(docstring), str(file)) + data = {"fp": 0, "tp": 0, "fn": 0, "tn": 0} + if "test_pep646_syntax" in file.name: + functions = print_all + else: + functions = no_print + for example in examples: + try: + tree = python_parser3.parse(example.source + "\n", start="single_input") + err = None + except UnexpectedInput as e: + tree = None + err = e + if example.exc_msg is not None: + if err is None: + functions["fp"](f"Unexpected success with example:\n{example.source.rstrip()}") + functions["fp"]("Excepted error message:", example.exc_msg.rstrip()) + functions["fp"]() + data["fp"] += 1 + else: + functions["tn"]("Correctly errored on:\n", example.source.rstrip()) + data["tn"] += 1 + else: + if err is not None: + functions["fn"](f"Unexpected failure with example:\n{example.source.rstrip()}") + functions["fn"](f"Got error message: {err.__class__.__qualname__}: {str(err)}") + functions["fn"](repr(example.source)) + functions["fn"]() + data["fn"] += 1 + else: + functions["tp"]("Correctly parsed:\n", example.source.rstrip()) + data["tp"] += 1 + print(file, data) diff --git a/lark/grammars/python.lark b/lark/grammars/python.lark index 8a75966b2..ecec3d3eb 100644 --- a/lark/grammars/python.lark +++ b/lark/grammars/python.lark @@ -11,30 +11,31 @@ // NB: compound_stmt in single_input is followed by extra NEWLINE! // -single_input: _NEWLINE | simple_stmt | compound_stmt _NEWLINE +single_input: _NEWLINE* (simple_stmt | compound_stmt _NEWLINE*) file_input: (_NEWLINE | stmt)* eval_input: testlist _NEWLINE* -decorator: "@" dotted_name [ "(" [arguments] ")" ] _NEWLINE +decorator: "@" test _NEWLINE decorators: decorator+ decorated: decorators (classdef | funcdef | async_funcdef) async_funcdef: "async" funcdef funcdef: "def" name "(" [parameters] ")" ["->" test] ":" suite -parameters: paramvalue ("," paramvalue)* ["," SLASH ("," paramvalue)*] ["," [starparams | kwparams]] - | starparams - | kwparams -SLASH: "/" // Otherwise the it will completely disappear and it will be undisguisable in the result -starparams: (starparam | starguard) poststarparams -starparam: "*" typedparam -starguard: "*" -poststarparams: ("," paramvalue)* ["," kwparams] -kwparams: "**" typedparam ","? +parameters: slash_params ("," paramvalue)* ("," star_etc?)? + | paramvalue ("," paramvalue)* ("," star_etc?)? + | star_etc -?paramvalue: typedparam ("=" test)? -?typedparam: name (":" test)? +slash_params: paramvalue ("," paramvalue)* "," "/" +star_etc: kwds ","? + | "*" typedstarparam ("," paramvalue)* ("," kwds)? ","? + | "*" ("," paramvalue)+ ("," kwds)? ","? +kwds: "**" typedparam + +paramvalue: typedparam ("=" test)? +typedparam: name (":" test)? +typedstarparam: name (":" (test | star_expr))? lambdef: "lambda" [lambda_params] ":" test @@ -95,16 +96,18 @@ for_stmt: "for" exprlist "in" testlist ":" suite ["else" ":" suite] try_stmt: "try" ":" suite except_clauses ["else" ":" suite] [finally] | "try" ":" suite finally -> try_finally finally: "finally" ":" suite -except_clauses: except_clause+ +except_clauses: (except_clause+ | except_star_clause+) +except_star_clause: EXCEPT_STAR [test ["as" name]] ":" suite except_clause: "except" [test ["as" name]] ":" suite +EXCEPT_STAR.1: "except*" // NB compile.c makes sure that the default except clause is last with_stmt: "with" with_items ":" suite with_items: with_item ("," with_item)* -with_item: test ["as" name] +with_item: test ["as" (name|("(" _cs_list{name} ")"))] -match_stmt: "match" test ":" _NEWLINE _INDENT case+ _DEDENT +match_stmt: "match" testlist ":" _NEWLINE _INDENT case+ _DEDENT case: "case" pattern ["if" test] ":" suite @@ -125,11 +128,13 @@ case: "case" pattern ["if" test] ":" suite literal_pattern: inner_literal_pattern -?inner_literal_pattern: "None" -> const_none - | "True" -> const_true - | "False" -> const_false - | STRING -> string - | number +!?inner_literal_pattern: "None" -> const_none + | "True" -> const_true + | "False" -> const_false + | STRING -> string + | number + | "-" number -> neg_number + | "-"? number ("+"|"-") number -> complex_number attr_pattern: NAME ("." NAME)+ -> value @@ -142,11 +147,9 @@ _sequence_pattern: (sequence_item_pattern ("," sequence_item_pattern)* ","?)? | "*" NAME -> star_pattern class_pattern: name_or_attr_pattern "(" [arguments_pattern ","?] ")" -arguments_pattern: pos_arg_pattern ["," keyws_arg_pattern] - | keyws_arg_pattern -> no_pos_arguments +arguments_pattern: as_pattern ("," as_pattern)* ("," keyw_arg_pattern ("," keyw_arg_pattern)*)? + | keyw_arg_pattern ("," keyw_arg_pattern)* -pos_arg_pattern: as_pattern ("," as_pattern)* -keyws_arg_pattern: keyw_arg_pattern ("," keyw_arg_pattern)* keyw_arg_pattern: NAME "=" as_pattern @@ -222,12 +225,12 @@ _tuple_inner: test_or_star_expr (("," test_or_star_expr)+ [","] | ",") ?subscriptlist: subscript | subscript (("," subscript)+ [","] | ",") -> subscript_tuple -?subscript: test | ([test] ":" [test] [sliceop]) -> slice +?subscript: test | star_expr | ([test] ":" [test] [sliceop]) -> slice sliceop: ":" [test] ?exprlist: (expr|star_expr) | (expr|star_expr) (("," (expr|star_expr))+ [","]|",") ?testlist: test | testlist_tuple -testlist_tuple: test (("," test)+ [","] | ",") +testlist_tuple: (test|star_expr) (("," (test|star_expr))+ [","] | ",") _dict_exprlist: (key_value | "**" expr) ("," (key_value | "**" expr))* [","] key_value: test ":" test @@ -250,15 +253,12 @@ kwargs: "**" test ("," argvalue)* ?argvalue: test ("=" test)? -comprehension{comp_result}: comp_result comp_fors [comp_if] -comp_fors: comp_for+ -comp_for: [ASYNC] "for" exprlist "in" or_test +comprehension{comp_result}: comp_result comp_forifs +comp_forifs: comp_forif+ +comp_forif: [ASYNC] "for" exprlist "in" or_test comp_if* ASYNC: "async" ?comp_if: "if" test_nocond -// not used in grammar, but may appear in "node" passed from Parser to Compiler -encoding_decl: name - yield_expr: "yield" [testlist] | "yield" "from" test -> yield_from @@ -267,7 +267,7 @@ string: STRING | LONG_STRING // Other terminals -_NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ +_NEWLINE: (COMMENT? /\r?\n[\t ]*/ )+ %ignore /[\t \f]+/ // WS %ignore /\\[\t \f]*\r?\n/ // LINE_CONT @@ -281,7 +281,7 @@ _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ NAME: /[^\W\d]\w*/ COMMENT: /#[^\n]*/ -STRING: /([ubf]?r?|r[ubf])("(?!"").*?(?