From 2eea683a71037911cc1e276c0fd33ac1ca344ee9 Mon Sep 17 00:00:00 2001 From: MegaIng Date: Fri, 6 Oct 2023 21:09:30 +0200 Subject: [PATCH 1/8] Correct String syntax + add PEP 614 --- lark/grammars/python.lark | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lark/grammars/python.lark b/lark/grammars/python.lark index 8a75966b2..d2d812028 100644 --- a/lark/grammars/python.lark +++ b/lark/grammars/python.lark @@ -15,7 +15,7 @@ single_input: _NEWLINE | simple_stmt | compound_stmt _NEWLINE file_input: (_NEWLINE | stmt)* eval_input: testlist _NEWLINE* -decorator: "@" dotted_name [ "(" [arguments] ")" ] _NEWLINE +decorator: "@" test _NEWLINE decorators: decorator+ decorated: decorators (classdef | funcdef | async_funcdef) @@ -281,7 +281,7 @@ _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ NAME: /[^\W\d]\w*/ COMMENT: /#[^\n]*/ -STRING: /([ubf]?r?|r[ubf])("(?!"").*?(? Date: Sat, 7 Oct 2023 04:33:51 +0200 Subject: [PATCH 2/8] Sometimes empty Tokens are generated, for example by the PythonIndenter. --- lark/parsers/lalr_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index 6ae2a04fd..0d119acfb 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -101,7 +101,7 @@ def parse_from_state(self, state: ParserState, last_token: Optional[Token]=None) assert token is not None state.feed_token(token) - end_token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1) + end_token = Token.new_borrow_pos('$END', '', token) if token is not None else Token('$END', '', 0, 1, 1) return state.feed_token(end_token, True) except UnexpectedInput as e: try: From 5e9d695a73c0a632efe5dab256d752682edfbdcb Mon Sep 17 00:00:00 2001 From: MegaIng Date: Sat, 7 Oct 2023 04:35:18 +0200 Subject: [PATCH 3/8] Update comprehension syntax --- lark/grammars/python.lark | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/lark/grammars/python.lark b/lark/grammars/python.lark index d2d812028..91949b87e 100644 --- a/lark/grammars/python.lark +++ b/lark/grammars/python.lark @@ -250,15 +250,12 @@ kwargs: "**" test ("," argvalue)* ?argvalue: test ("=" test)? -comprehension{comp_result}: comp_result comp_fors [comp_if] -comp_fors: comp_for+ -comp_for: [ASYNC] "for" exprlist "in" or_test +comprehension{comp_result}: comp_result comp_forifs +comp_forifs: comp_forif+ +comp_forif: [ASYNC] "for" exprlist "in" or_test comp_if* ASYNC: "async" ?comp_if: "if" test_nocond -// not used in grammar, but may appear in "node" passed from Parser to Compiler -encoding_decl: name - yield_expr: "yield" [testlist] | "yield" "from" test -> yield_from @@ -267,7 +264,7 @@ string: STRING | LONG_STRING // Other terminals -_NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ +_NEWLINE: (COMMENT? /\r?\n[\t ]*/ )+ %ignore /[\t \f]+/ // WS %ignore /\\[\t \f]*\r?\n/ // LINE_CONT From 51319fa1aadadd7c7f1933895f83b9323449f0eb Mon Sep 17 00:00:00 2001 From: MegaIng Date: Sat, 7 Oct 2023 04:36:04 +0200 Subject: [PATCH 4/8] Small fixes to the match statement syntax --- lark/grammars/python.lark | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/lark/grammars/python.lark b/lark/grammars/python.lark index 91949b87e..2a7706d4c 100644 --- a/lark/grammars/python.lark +++ b/lark/grammars/python.lark @@ -11,7 +11,7 @@ // NB: compound_stmt in single_input is followed by extra NEWLINE! // -single_input: _NEWLINE | simple_stmt | compound_stmt _NEWLINE +single_input: _NEWLINE* (simple_stmt | compound_stmt _NEWLINE*) file_input: (_NEWLINE | stmt)* eval_input: testlist _NEWLINE* @@ -104,7 +104,7 @@ with_stmt: "with" with_items ":" suite with_items: with_item ("," with_item)* with_item: test ["as" name] -match_stmt: "match" test ":" _NEWLINE _INDENT case+ _DEDENT +match_stmt: "match" testlist ":" _NEWLINE _INDENT case+ _DEDENT case: "case" pattern ["if" test] ":" suite @@ -125,11 +125,13 @@ case: "case" pattern ["if" test] ":" suite literal_pattern: inner_literal_pattern -?inner_literal_pattern: "None" -> const_none - | "True" -> const_true - | "False" -> const_false - | STRING -> string - | number +!?inner_literal_pattern: "None" -> const_none + | "True" -> const_true + | "False" -> const_false + | STRING -> string + | number + | "-" number -> neg_number + | "-"? number ("+"|"-") number -> complex_number attr_pattern: NAME ("." NAME)+ -> value @@ -142,11 +144,9 @@ _sequence_pattern: (sequence_item_pattern ("," sequence_item_pattern)* ","?)? | "*" NAME -> star_pattern class_pattern: name_or_attr_pattern "(" [arguments_pattern ","?] ")" -arguments_pattern: pos_arg_pattern ["," keyws_arg_pattern] - | keyws_arg_pattern -> no_pos_arguments +arguments_pattern: as_pattern ("," as_pattern)* ("," keyw_arg_pattern ("," keyw_arg_pattern)*)? + | keyw_arg_pattern ("," keyw_arg_pattern)* -pos_arg_pattern: as_pattern ("," as_pattern)* -keyws_arg_pattern: keyw_arg_pattern ("," keyw_arg_pattern)* keyw_arg_pattern: NAME "=" as_pattern From 9e6fe37240ed5bdf6f4bdc52915bbe1f8de78e53 Mon Sep 17 00:00:00 2001 From: MegaIng Date: Sat, 7 Oct 2023 04:37:07 +0200 Subject: [PATCH 5/8] Add Multi target with syntax --- lark/grammars/python.lark | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lark/grammars/python.lark b/lark/grammars/python.lark index 2a7706d4c..e03304710 100644 --- a/lark/grammars/python.lark +++ b/lark/grammars/python.lark @@ -102,7 +102,7 @@ except_clause: "except" [test ["as" name]] ":" suite with_stmt: "with" with_items ":" suite with_items: with_item ("," with_item)* -with_item: test ["as" name] +with_item: test ["as" (name|("(" _cs_list{name} ")"))] match_stmt: "match" testlist ":" _NEWLINE _INDENT case+ _DEDENT From 9d931f60a518bce8e26846afec9ec933ce56dee1 Mon Sep 17 00:00:00 2001 From: MegaIng Date: Sat, 7 Oct 2023 04:38:15 +0200 Subject: [PATCH 6/8] Add PEP-654 syntax --- lark/grammars/python.lark | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lark/grammars/python.lark b/lark/grammars/python.lark index e03304710..8dac9360f 100644 --- a/lark/grammars/python.lark +++ b/lark/grammars/python.lark @@ -95,8 +95,10 @@ for_stmt: "for" exprlist "in" testlist ":" suite ["else" ":" suite] try_stmt: "try" ":" suite except_clauses ["else" ":" suite] [finally] | "try" ":" suite finally -> try_finally finally: "finally" ":" suite -except_clauses: except_clause+ +except_clauses: (except_clause+ | except_star_clause+) +except_star_clause: EXCEPT_STAR [test ["as" name]] ":" suite except_clause: "except" [test ["as" name]] ":" suite +EXCEPT_STAR.1: "except*" // NB compile.c makes sure that the default except clause is last From 8e2474dc137cc2dc32e060a650e5f617f71fb718 Mon Sep 17 00:00:00 2001 From: MegaIng Date: Sat, 7 Oct 2023 04:46:48 +0200 Subject: [PATCH 7/8] Update function parameter syntax, Add star subscript (PEP 646) --- lark/grammars/python.lark | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/lark/grammars/python.lark b/lark/grammars/python.lark index 8dac9360f..ecec3d3eb 100644 --- a/lark/grammars/python.lark +++ b/lark/grammars/python.lark @@ -22,19 +22,20 @@ decorated: decorators (classdef | funcdef | async_funcdef) async_funcdef: "async" funcdef funcdef: "def" name "(" [parameters] ")" ["->" test] ":" suite -parameters: paramvalue ("," paramvalue)* ["," SLASH ("," paramvalue)*] ["," [starparams | kwparams]] - | starparams - | kwparams -SLASH: "/" // Otherwise the it will completely disappear and it will be undisguisable in the result -starparams: (starparam | starguard) poststarparams -starparam: "*" typedparam -starguard: "*" -poststarparams: ("," paramvalue)* ["," kwparams] -kwparams: "**" typedparam ","? +parameters: slash_params ("," paramvalue)* ("," star_etc?)? + | paramvalue ("," paramvalue)* ("," star_etc?)? + | star_etc -?paramvalue: typedparam ("=" test)? -?typedparam: name (":" test)? +slash_params: paramvalue ("," paramvalue)* "," "/" +star_etc: kwds ","? + | "*" typedstarparam ("," paramvalue)* ("," kwds)? ","? + | "*" ("," paramvalue)+ ("," kwds)? ","? +kwds: "**" typedparam + +paramvalue: typedparam ("=" test)? +typedparam: name (":" test)? +typedstarparam: name (":" (test | star_expr))? lambdef: "lambda" [lambda_params] ":" test @@ -224,12 +225,12 @@ _tuple_inner: test_or_star_expr (("," test_or_star_expr)+ [","] | ",") ?subscriptlist: subscript | subscript (("," subscript)+ [","] | ",") -> subscript_tuple -?subscript: test | ([test] ":" [test] [sliceop]) -> slice +?subscript: test | star_expr | ([test] ":" [test] [sliceop]) -> slice sliceop: ":" [test] ?exprlist: (expr|star_expr) | (expr|star_expr) (("," (expr|star_expr))+ [","]|",") ?testlist: test | testlist_tuple -testlist_tuple: test (("," test)+ [","] | ",") +testlist_tuple: (test|star_expr) (("," (test|star_expr))+ [","] | ",") _dict_exprlist: (key_value | "**" expr) ("," (key_value | "**" expr))* [","] key_value: test ":" test From 19a9276750d47098662c05044b441762c2d43580 Mon Sep 17 00:00:00 2001 From: MegaIng Date: Sat, 7 Oct 2023 04:49:06 +0200 Subject: [PATCH 8/8] Add python grammar tests based on files from the Cpython test suite. --- .../Cpython-tests/.gitignore | 1 + .../all_parse_succesfully.py | 19 +++++ .../python-grammar-tests/get_std_tests.py | 45 ++++++++++++ .../python-grammar-tests/semi_run_doctests.py | 69 +++++++++++++++++++ 4 files changed, 134 insertions(+) create mode 100644 examples/python-grammar-tests/Cpython-tests/.gitignore create mode 100644 examples/python-grammar-tests/all_parse_succesfully.py create mode 100644 examples/python-grammar-tests/get_std_tests.py create mode 100644 examples/python-grammar-tests/semi_run_doctests.py diff --git a/examples/python-grammar-tests/Cpython-tests/.gitignore b/examples/python-grammar-tests/Cpython-tests/.gitignore new file mode 100644 index 000000000..f104652b6 --- /dev/null +++ b/examples/python-grammar-tests/Cpython-tests/.gitignore @@ -0,0 +1 @@ +*.py diff --git a/examples/python-grammar-tests/all_parse_succesfully.py b/examples/python-grammar-tests/all_parse_succesfully.py new file mode 100644 index 000000000..3669da102 --- /dev/null +++ b/examples/python-grammar-tests/all_parse_succesfully.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +import logging +import os + +from lark import Lark, UnexpectedInput, logger +from lark.indenter import PythonIndenter +from pathlib import Path +logger.setLevel(logging.DEBUG) +python_parser3 = Lark.open_from_package('lark', 'python.lark', ['grammars'], + parser='lalr', postlex=PythonIndenter(), start=['file_input', 'single_input', 'eval_input'], debug=True) +# python_parser3.parse('def f(it, *varargs, **kwargs):\n return list(it)\n\n\n', start="single_input") + +for file in (Path(__file__).parent / "Cpython-tests").glob("*.py"): + try: + tree = python_parser3.parse(file.read_text(encoding="utf-8"), start="file_input") + except UnexpectedInput as e: + print(f'File "{file}", line {e.line}') + print(f"{e.__class__.__qualname__}: {str(e)}") diff --git a/examples/python-grammar-tests/get_std_tests.py b/examples/python-grammar-tests/get_std_tests.py new file mode 100644 index 000000000..63fb60b72 --- /dev/null +++ b/examples/python-grammar-tests/get_std_tests.py @@ -0,0 +1,45 @@ +""" +Downloads the three test files from the Cpython repo for their parser. +These are then analyzed, preprocessed and then run by other scripts in this folder +""" +import urllib.request +import os + +files = { + "Lib/test/test_grammar.py": ["test_with_statement"], # List of function names to comment out + "Lib/test/test_syntax.py": [], + "Lib/test/test_exceptions.py": [], + "Lib/test/test_patma.py": [], + "Lib/test/test_pep646_syntax.py": [], +} + +url_template = "https://raw.githubusercontent.com/python/cpython/main/{}" +file_template = f"{os.path.dirname(__file__)}/CPython-tests/{{}}" + +for filename in files: + file = file_template.format(filename.rpartition("/")[2]) + print(file) + urllib.request.urlretrieve( + url_template.format(filename), + file + ) + if files[filename]: + with open(file, "r+", encoding="utf-8") as f: + out = [] + commenting_out = None + f.seek(0) + for line in f.readlines(): + if any(name in line for name in files[filename]): + commenting_out = line[:line.index("def")] + ' ' + out.append(f"# {line}") + continue + if commenting_out is not None and ( + line.startswith(commenting_out) or + line.strip() == '' or + line.strip().startswith('#')): + out.append(f"# {line}") + else: + commenting_out = None + out.append(line) + f.seek(0) + f.writelines(out) diff --git a/examples/python-grammar-tests/semi_run_doctests.py b/examples/python-grammar-tests/semi_run_doctests.py new file mode 100644 index 000000000..dc8ce7c49 --- /dev/null +++ b/examples/python-grammar-tests/semi_run_doctests.py @@ -0,0 +1,69 @@ +from __future__ import annotations +from __future__ import annotations + +import logging +import os +import doctest + +from lark import Lark, UnexpectedInput, logger +from lark.indenter import PythonIndenter +from pathlib import Path +logger.setLevel(logging.DEBUG) +python_parser3 = Lark.open_from_package('lark', 'python.lark', ['grammars'], + parser='lalr', postlex=PythonIndenter(), start=['file_input', 'single_input', 'eval_input'], debug=True) + +no_print = { + "fp": lambda *args, **kwargs: None, + "fn": lambda *args, **kwargs: None, + + "tp": lambda *args, **kwargs: None, + "tn": lambda *args, **kwargs: None +} + + +print_all = { + "fp": print, + "fn": print, + + "tp": print, + "tn": print +} + +for file in (Path(__file__).parent / "Cpython-tests").glob("*.py"): + text = file.read_text(encoding="utf-8") + if "import doctest" in text: + doc_parser = doctest.DocTestParser() + docstring = next(t.value for t in python_parser3.lex(text) if "STRING" in t.type) + examples = doc_parser.get_examples(eval(docstring), str(file)) + data = {"fp": 0, "tp": 0, "fn": 0, "tn": 0} + if "test_pep646_syntax" in file.name: + functions = print_all + else: + functions = no_print + for example in examples: + try: + tree = python_parser3.parse(example.source + "\n", start="single_input") + err = None + except UnexpectedInput as e: + tree = None + err = e + if example.exc_msg is not None: + if err is None: + functions["fp"](f"Unexpected success with example:\n{example.source.rstrip()}") + functions["fp"]("Excepted error message:", example.exc_msg.rstrip()) + functions["fp"]() + data["fp"] += 1 + else: + functions["tn"]("Correctly errored on:\n", example.source.rstrip()) + data["tn"] += 1 + else: + if err is not None: + functions["fn"](f"Unexpected failure with example:\n{example.source.rstrip()}") + functions["fn"](f"Got error message: {err.__class__.__qualname__}: {str(err)}") + functions["fn"](repr(example.source)) + functions["fn"]() + data["fn"] += 1 + else: + functions["tp"]("Correctly parsed:\n", example.source.rstrip()) + data["tp"] += 1 + print(file, data)