2020
2121from .exceptions import ConfigurationError , assert_config , UnexpectedInput
2222from .utils import Serialize , SerializeMemoizer , FS , isascii , logger
23- from .load_grammar import load_grammar , FromPackageLoader , Grammar , verify_used_files , PackageResource , md5_digest
23+ from .load_grammar import load_grammar , FromPackageLoader , Grammar , verify_used_files , PackageResource , sha256_digest
2424from .tree import Tree
2525from .common import LexerConf , ParserConf , _ParserArgType , _LexerArgType
2626
@@ -54,6 +54,7 @@ class LarkOptions(Serialize):
5454
5555 start : List [str ]
5656 debug : bool
57+ strict : bool
5758 transformer : 'Optional[Transformer]'
5859 propagate_positions : Union [bool , str ]
5960 maybe_placeholders : bool
@@ -81,10 +82,14 @@ class LarkOptions(Serialize):
8182 debug
8283 Display debug information and extra warnings. Use only when debugging (Default: ``False``)
8384 When used with Earley, it generates a forest graph as "sppf.png", if 'dot' is installed.
85+ strict
86+ Throw an exception on any potential ambiguity, including shift/reduce conflicts, and regex collisions.
8487 transformer
8588 Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster)
8689 propagate_positions
87- Propagates (line, column, end_line, end_column) attributes into all tree branches.
90+ Propagates positional attributes into the 'meta' attribute of all tree branches.
91+ Sets attributes: (line, column, end_line, end_column, start_pos, end_pos,
92+ container_line, container_column, container_end_line, container_end_column)
8893 Accepts ``False``, ``True``, or a callable, which will filter which nodes to ignore when propagating.
8994 maybe_placeholders
9095 When ``True``, the ``[]`` operator returns ``None`` when not matched.
@@ -156,6 +161,7 @@ class LarkOptions(Serialize):
156161 # - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument
157162 _defaults : Dict [str , Any ] = {
158163 'debug' : False ,
164+ 'strict' : False ,
159165 'keep_all_tokens' : False ,
160166 'tree_class' : None ,
161167 'cache' : False ,
@@ -254,6 +260,7 @@ class Lark(Serialize):
254260 grammar : 'Grammar'
255261 options : LarkOptions
256262 lexer : Lexer
263+ parser : 'ParsingFrontend'
257264 terminals : Collection [TerminalDef ]
258265
259266 def __init__ (self , grammar : 'Union[Grammar, str, IO[str]]' , ** options ) -> None :
@@ -288,7 +295,7 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
288295 grammar = read ()
289296
290297 cache_fn = None
291- cache_md5 = None
298+ cache_sha256 = None
292299 if isinstance (grammar , str ):
293300 self .source_grammar = grammar
294301 if self .options .use_bytes :
@@ -303,7 +310,7 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
303310 options_str = '' .join (k + str (v ) for k , v in options .items () if k not in unhashable )
304311 from . import __version__
305312 s = grammar + options_str + __version__ + str (sys .version_info [:2 ])
306- cache_md5 = md5_digest (s )
313+ cache_sha256 = sha256_digest (s )
307314
308315 if isinstance (self .options .cache , str ):
309316 cache_fn = self .options .cache
@@ -319,7 +326,7 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
319326 # specific reason - we just want a username.
320327 username = "unknown"
321328
322- cache_fn = tempfile .gettempdir () + "/.lark_cache_%s_%s_%s_%s.tmp" % (username , cache_md5 , * sys .version_info [:2 ])
329+ cache_fn = tempfile .gettempdir () + "/.lark_cache_%s_%s_%s_%s.tmp" % (username , cache_sha256 , * sys .version_info [:2 ])
323330
324331 old_options = self .options
325332 try :
@@ -328,9 +335,9 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
328335 # Remove options that aren't relevant for loading from cache
329336 for name in (set (options ) - _LOAD_ALLOWED_OPTIONS ):
330337 del options [name ]
331- file_md5 = f .readline ().rstrip (b'\n ' )
338+ file_sha256 = f .readline ().rstrip (b'\n ' )
332339 cached_used_files = pickle .load (f )
333- if file_md5 == cache_md5 .encode ('utf8' ) and verify_used_files (cached_used_files ):
340+ if file_sha256 == cache_sha256 .encode ('utf8' ) and verify_used_files (cached_used_files ):
334341 cached_parser_data = pickle .load (f )
335342 self ._load (cached_parser_data , ** options )
336343 return
@@ -424,7 +431,7 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
424431 # TODO Deprecate lexer_callbacks?
425432 self .lexer_conf = LexerConf (
426433 self .terminals , re_module , self .ignore_tokens , self .options .postlex ,
427- self .options .lexer_callbacks , self .options .g_regex_flags , use_bytes = self .options .use_bytes
434+ self .options .lexer_callbacks , self .options .g_regex_flags , use_bytes = self .options .use_bytes , strict = self . options . strict
428435 )
429436
430437 if self .options .parser :
@@ -436,8 +443,8 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
436443 logger .debug ('Saving grammar to cache: %s' , cache_fn )
437444 try :
438445 with FS .open (cache_fn , 'wb' ) as f :
439- assert cache_md5 is not None
440- f .write (cache_md5 .encode ('utf8' ) + b'\n ' )
446+ assert cache_sha256 is not None
447+ f .write (cache_sha256 .encode ('utf8' ) + b'\n ' )
441448 pickle .dump (used_files , f )
442449 self .save (f , _LOAD_ALLOWED_OPTIONS )
443450 except IOError as e :
0 commit comments