") + ident("name") + LPAR + Optional(delimitedList(arg), [])("args") + RPAR + code_body("body")) c_function.ignore(cStyleComment) source_code = ''' int is_odd(int x) { return (x%2); } int dec_to_hex(char hchar) { if (hchar >= '0' && hchar <= '9') { return (ord(hchar)-ord('0')); } else { return (10+ord(hchar)-ord('A')); } } ''' for func in c_function.searchString(source_code): print("%(name)s (%(type)s) args: %(args)s" % func) prints:: is_odd (int) args: [['int', 'x']] dec_to_hex (int) args: [['char', 'hchar']] """ if opener == closer: raise ValueError("opening and closing strings cannot be the same") if content is None: if isinstance(opener, basestring) and isinstance(closer, basestring): if len(opener) == 1 and len(closer) == 1: if ignoreExpr is not None: content = (Combine(OneOrMore(~ignoreExpr + CharsNotIn(opener + closer + ParserElement.DEFAULT_WHITE_CHARS, exact=1) ) ).setParseAction(lambda t: t[0].strip())) else: content = (empty.copy() + CharsNotIn(opener + closer + ParserElement.DEFAULT_WHITE_CHARS ).setParseAction(lambda t: t[0].strip())) else: if ignoreExpr is not None: content = (Combine(OneOrMore(~ignoreExpr + ~Literal(opener) + ~Literal(closer) + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)) ).setParseAction(lambda t: t[0].strip())) else: content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)) ).setParseAction(lambda t: t[0].strip())) else: raise ValueError("opening and closing arguments must be strings if no content expression is given") ret = Forward() if ignoreExpr is not None: ret <<= Group(Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer)) else: ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) ret.setName('nested %s%s expression' % (opener, closer)) return ret def indentedBlock(blockStatementExpr, indentStack, indent=True): """Helper method for defining space-delimited indentation blocks, such as those used to define block statements in Python source code. Parameters: - blockStatementExpr - expression defining syntax of statement that is repeated within the indented block - indentStack - list created by caller to manage indentation stack (multiple statementWithIndentedBlock expressions within a single grammar should share a common indentStack) - indent - boolean indicating whether block must be indented beyond the current level; set to False for block of left-most statements (default= ``True``) A valid block must contain at least one ``blockStatement``. Example:: data = ''' def A(z): A1 B = 100 G = A2 A2 A3 B def BB(a,b,c): BB1 def BBA(): bba1 bba2 bba3 C D def spam(x,y): def eggs(z): pass ''' indentStack = [1] stmt = Forward() identifier = Word(alphas, alphanums) funcDecl = ("def" + identifier + Group("(" + Optional(delimitedList(identifier)) + ")") + ":") func_body = indentedBlock(stmt, indentStack) funcDef = Group(funcDecl + func_body) rvalue = Forward() funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")") rvalue << (funcCall | identifier | Word(nums)) assignment = Group(identifier + "=" + rvalue) stmt << (funcDef | assignment | identifier) module_body = OneOrMore(stmt) parseTree = module_body.parseString(data) parseTree.pprint() prints:: [['def', 'A', ['(', 'z', ')'], ':', [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], 'B', ['def', 'BB', ['(', 'a', 'b', 'c', ')'], ':', [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], 'C', 'D', ['def', 'spam', ['(', 'x', 'y', ')'], ':', [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] """ backup_stack = indentStack[:] def reset_stack(): indentStack[:] = backup_stack def checkPeerIndent(s, l, t): if l >= len(s): return curCol = col(l, s) if curCol != indentStack[-1]: if curCol > indentStack[-1]: raise ParseException(s, l, "illegal nesting") raise ParseException(s, l, "not a peer entry") def checkSubIndent(s, l, t): curCol = col(l, s) if curCol > indentStack[-1]: indentStack.append(curCol) else: raise ParseException(s, l, "not a subentry") def checkUnindent(s, l, t): if l >= len(s): return curCol = col(l, s) if not(indentStack and curCol in indentStack): raise ParseException(s, l, "not an unindent") if curCol < indentStack[-1]: indentStack.pop() NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress(), stopOn=StringEnd()) INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') PEER = Empty().setParseAction(checkPeerIndent).setName('') UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') if indent: smExpr = Group(Optional(NL) + INDENT + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL), stopOn=StringEnd()) + UNDENT) else: smExpr = Group(Optional(NL) + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL), stopOn=StringEnd()) + UNDENT) smExpr.setFailAction(lambda a, b, c, d: reset_stack()) blockStatementExpr.ignore(_bslash + LineEnd()) return smExpr.setName('indented block') alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") anyOpenTag, anyCloseTag = makeHTMLTags(Word(alphas, alphanums + "_:").setName('any tag')) _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(), '><& "\'')) commonHTMLEntity = Regex('&(?P' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity") def replaceHTMLEntity(t): """Helper parser action to replace common HTML entities with their special characters""" return _htmlEntityMap.get(t.entity) # it's easy to get these comment structures wrong - they're very common, so may as well make them available cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") "Comment of the form ``/* ... */``" htmlComment = Regex(r"").setName("HTML comment") "Comment of the form ````" restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") "Comment of the form ``// ... (to end of line)``" cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/' | dblSlashComment).setName("C++ style comment") "Comment of either form :class:`cStyleComment` or :class:`dblSlashComment`" javaStyleComment = cppStyleComment "Same as :class:`cppStyleComment`" pythonStyleComment = Regex(r"#.*").setName("Python style comment") "Comment of the form ``# ... (to end of line)``" _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + Optional(Word(" \t") + ~Literal(",") + ~LineEnd()))).streamline().setName("commaItem") commaSeparatedList = delimitedList(Optional(quotedString.copy() | _commasepitem, default="")).setName("commaSeparatedList") """(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas. This expression is deprecated in favor of :class:`pyparsing_common.comma_separated_list`. """ # some other useful expressions - using lower-case class name since we are really using this as a namespace class pyparsing_common: """Here are some common low-level expressions that may be useful in jump-starting parser development: - numeric forms (:class:`integers`, :class:`reals`, :class:`scientific notation`) - common :class:`programming identifiers` - network addresses (:class:`MAC`, :class:`IPv4`, :class:`IPv6`) - ISO8601 :class:`dates` and :class:`datetime` - :class:`UUID` - :class:`comma-separated list` Parse actions: - :class:`convertToInteger` - :class:`convertToFloat` - :class:`convertToDate` - :class:`convertToDatetime` - :class:`stripHTMLTags` - :class:`upcaseTokens` - :class:`downcaseTokens` Example:: pyparsing_common.number.runTests(''' # any int or real number, returned as the appropriate type 100 -100 +100 3.14159 6.02e23 1e-12 ''') pyparsing_common.fnumber.runTests(''' # any int or real number, returned as float 100 -100 +100 3.14159 6.02e23 1e-12 ''') pyparsing_common.hex_integer.runTests(''' # hex numbers 100 FF ''') pyparsing_common.fraction.runTests(''' # fractions 1/2 -3/4 ''') pyparsing_common.mixed_integer.runTests(''' # mixed fractions 1 1/2 -3/4 1-3/4 ''') import uuid pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) pyparsing_common.uuid.runTests(''' # uuid 12345678-1234-5678-1234-567812345678 ''') prints:: # any int or real number, returned as the appropriate type 100 [100] -100 [-100] +100 [100] 3.14159 [3.14159] 6.02e23 [6.02e+23] 1e-12 [1e-12] # any int or real number, returned as float 100 [100.0] -100 [-100.0] +100 [100.0] 3.14159 [3.14159] 6.02e23 [6.02e+23] 1e-12 [1e-12] # hex numbers 100 [256] FF [255] # fractions 1/2 [0.5] -3/4 [-0.75] # mixed fractions 1 [1] 1/2 [0.5] -3/4 [-0.75] 1-3/4 [1.75] # uuid 12345678-1234-5678-1234-567812345678 [UUID('12345678-1234-5678-1234-567812345678')] """ convertToInteger = tokenMap(int) """ Parse action for converting parsed integers to Python int """ convertToFloat = tokenMap(float) """ Parse action for converting parsed numbers to Python float """ integer = Word(nums).setName("integer").setParseAction(convertToInteger) """expression that parses an unsigned integer, returns an int""" hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int, 16)) """expression that parses a hexadecimal integer, returns an int""" signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger) """expression that parses an integer with optional leading sign, returns an int""" fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction") """fractional expression of an integer divided by an integer, returns a float""" fraction.addParseAction(lambda t: t[0]/t[-1]) mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction") """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" mixed_integer.addParseAction(sum) real = Regex(r'[+-]?(?:\d+\.\d*|\.\d+)').setName("real number").setParseAction(convertToFloat) """expression that parses a floating point number and returns a float""" sci_real = Regex(r'[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat) """expression that parses a floating point number with optional scientific notation and returns a float""" # streamlining this expression makes the docs nicer-looking number = (sci_real | real | signed_integer).streamline() """any numeric expression, returns the corresponding Python type""" fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat) """any int or real number, returned as float""" identifier = Word(alphas + '_', alphanums + '_').setName("identifier") """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address") "IPv4 address (``0.0.0.0 - 255.255.255.255``)" _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer") _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part) * 7).setName("full IPv6 address") _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part) * (0, 6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part) * (0, 6)) ).setName("short IPv6 address") _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8) _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address") ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address") "IPv6 address (long, short, or mixed form)" mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address") "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" @staticmethod def convertToDate(fmt="%Y-%m-%d"): """ Helper to create a parse action for converting parsed date string to Python datetime.date Params - - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``) Example:: date_expr = pyparsing_common.iso8601_date.copy() date_expr.setParseAction(pyparsing_common.convertToDate()) print(date_expr.parseString("1999-12-31")) prints:: [datetime.date(1999, 12, 31)] """ def cvt_fn(s, l, t): try: return datetime.strptime(t[0], fmt).date() except ValueError as ve: raise ParseException(s, l, str(ve)) return cvt_fn @staticmethod def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"): """Helper to create a parse action for converting parsed datetime string to Python datetime.datetime Params - - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``) Example:: dt_expr = pyparsing_common.iso8601_datetime.copy() dt_expr.setParseAction(pyparsing_common.convertToDatetime()) print(dt_expr.parseString("1999-12-31T23:59:59.999")) prints:: [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] """ def cvt_fn(s, l, t): try: return datetime.strptime(t[0], fmt) except ValueError as ve: raise ParseException(s, l, str(ve)) return cvt_fn iso8601_date = Regex(r'(?P\d{4})(?:-(?P\d\d)(?:-(?P\d\d))?)?').setName("ISO8601 date") "ISO8601 date (``yyyy-mm-dd``)" iso8601_datetime = Regex(r'(?P\d{4})-(?P\d\d)-(?P\d\d)[T ](?P\d\d):(?P\d\d)(:(?P\d\d(\.\d*)?)?)?(?PZ|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime") "ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``" uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID") "UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)" _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() @staticmethod def stripHTMLTags(s, l, tokens): """Parse action to remove HTML tags from web page HTML source Example:: # strip HTML links from normal text text = 'More info at the pyparsing wiki page' td, td_end = makeHTMLTags("TD") table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end print(table_text.parseString(text).body) Prints:: More info at the pyparsing wiki page """ return pyparsing_common._html_stripper.transformString(tokens[0]) _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',') + Optional(White(" \t")))).streamline().setName("commaItem") comma_separated_list = delimitedList(Optional(quotedString.copy() | _commasepitem, default='') ).setName("comma separated list") """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper())) """Parse action to convert tokens to upper case.""" downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower())) """Parse action to convert tokens to lower case.""" class _lazyclassproperty(object): def __init__(self, fn): self.fn = fn self.__doc__ = fn.__doc__ self.__name__ = fn.__name__ def __get__(self, obj, cls): if cls is None: cls = type(obj) if not hasattr(cls, '_intern') or any(cls._intern is getattr(superclass, '_intern', []) for superclass in cls.__mro__[1:]): cls._intern = {} attrname = self.fn.__name__ if attrname not in cls._intern: cls._intern[attrname] = self.fn(cls) return cls._intern[attrname] class unicode_set(object): """ A set of Unicode characters, for language-specific strings for ``alphas``, ``nums``, ``alphanums``, and ``printables``. A unicode_set is defined by a list of ranges in the Unicode character set, in a class attribute ``_ranges``, such as:: _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),] A unicode set can also be defined using multiple inheritance of other unicode sets:: class CJK(Chinese, Japanese, Korean): pass """ _ranges = [] @classmethod def _get_chars_for_ranges(cls): ret = [] for cc in cls.__mro__: if cc is unicode_set: break for rr in cc._ranges: ret.extend(range(rr[0], rr[-1] + 1)) return [unichr(c) for c in sorted(set(ret))] @_lazyclassproperty def printables(cls): "all non-whitespace characters in this range" return u''.join(filterfalse(unicode.isspace, cls._get_chars_for_ranges())) @_lazyclassproperty def alphas(cls): "all alphabetic characters in this range" return u''.join(filter(unicode.isalpha, cls._get_chars_for_ranges())) @_lazyclassproperty def nums(cls): "all numeric digit characters in this range" return u''.join(filter(unicode.isdigit, cls._get_chars_for_ranges())) @_lazyclassproperty def alphanums(cls): "all alphanumeric characters in this range" return cls.alphas + cls.nums class pyparsing_unicode(unicode_set): """ A namespace class for defining common language unicode_sets. """ _ranges = [(32, sys.maxunicode)] class Latin1(unicode_set): "Unicode set for Latin-1 Unicode Character Range" _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),] class LatinA(unicode_set): "Unicode set for Latin-A Unicode Character Range" _ranges = [(0x0100, 0x017f),] class LatinB(unicode_set): "Unicode set for Latin-B Unicode Character Range" _ranges = [(0x0180, 0x024f),] class Greek(unicode_set): "Unicode set for Greek Unicode Character Ranges" _ranges = [ (0x0370, 0x03ff), (0x1f00, 0x1f15), (0x1f18, 0x1f1d), (0x1f20, 0x1f45), (0x1f48, 0x1f4d), (0x1f50, 0x1f57), (0x1f59,), (0x1f5b,), (0x1f5d,), (0x1f5f, 0x1f7d), (0x1f80, 0x1fb4), (0x1fb6, 0x1fc4), (0x1fc6, 0x1fd3), (0x1fd6, 0x1fdb), (0x1fdd, 0x1fef), (0x1ff2, 0x1ff4), (0x1ff6, 0x1ffe), ] class Cyrillic(unicode_set): "Unicode set for Cyrillic Unicode Character Range" _ranges = [(0x0400, 0x04ff)] class Chinese(unicode_set): "Unicode set for Chinese Unicode Character Range" _ranges = [(0x4e00, 0x9fff), (0x3000, 0x303f),] class Japanese(unicode_set): "Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges" _ranges = [] class Kanji(unicode_set): "Unicode set for Kanji Unicode Character Range" _ranges = [(0x4E00, 0x9Fbf), (0x3000, 0x303f),] class Hiragana(unicode_set): "Unicode set for Hiragana Unicode Character Range" _ranges = [(0x3040, 0x309f),] class Katakana(unicode_set): "Unicode set for Katakana Unicode Character Range" _ranges = [(0x30a0, 0x30ff),] class Korean(unicode_set): "Unicode set for Korean Unicode Character Range" _ranges = [(0xac00, 0xd7af), (0x1100, 0x11ff), (0x3130, 0x318f), (0xa960, 0xa97f), (0xd7b0, 0xd7ff), (0x3000, 0x303f),] class CJK(Chinese, Japanese, Korean): "Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range" pass class Thai(unicode_set): "Unicode set for Thai Unicode Character Range" _ranges = [(0x0e01, 0x0e3a), (0x0e3f, 0x0e5b),] class Arabic(unicode_set): "Unicode set for Arabic Unicode Character Range" _ranges = [(0x0600, 0x061b), (0x061e, 0x06ff), (0x0700, 0x077f),] class Hebrew(unicode_set): "Unicode set for Hebrew Unicode Character Range" _ranges = [(0x0590, 0x05ff),] class Devanagari(unicode_set): "Unicode set for Devanagari Unicode Character Range" _ranges = [(0x0900, 0x097f), (0xa8e0, 0xa8ff)] pyparsing_unicode.Japanese._ranges = (pyparsing_unicode.Japanese.Kanji._ranges + pyparsing_unicode.Japanese.Hiragana._ranges + pyparsing_unicode.Japanese.Katakana._ranges) # define ranges in language character sets if PY_3: setattr(pyparsing_unicode, u"العربية", pyparsing_unicode.Arabic) setattr(pyparsing_unicode, u"中文", pyparsing_unicode.Chinese) setattr(pyparsing_unicode, u"кириллица", pyparsing_unicode.Cyrillic) setattr(pyparsing_unicode, u"Ελληνικά", pyparsing_unicode.Greek) setattr(pyparsing_unicode, u"עִברִית", pyparsing_unicode.Hebrew) setattr(pyparsing_unicode, u"日本語", pyparsing_unicode.Japanese) setattr(pyparsing_unicode.Japanese, u"漢字", pyparsing_unicode.Japanese.Kanji) setattr(pyparsing_unicode.Japanese, u"カタカナ", pyparsing_unicode.Japanese.Katakana) setattr(pyparsing_unicode.Japanese, u"ひらがな", pyparsing_unicode.Japanese.Hiragana) setattr(pyparsing_unicode, u"한국어", pyparsing_unicode.Korean) setattr(pyparsing_unicode, u"ไทย", pyparsing_unicode.Thai) setattr(pyparsing_unicode, u"देवनागरी", pyparsing_unicode.Devanagari) class pyparsing_test: """ namespace class for classes useful in writing unit tests """ class reset_pyparsing_context: """ Context manager to be used when writing unit tests that modify pyparsing config values: - packrat parsing - default whitespace characters. - default keyword characters - literal string auto-conversion class - __diag__ settings Example: with reset_pyparsing_context(): # test that literals used to construct a grammar are automatically suppressed ParserElement.inlineLiteralsUsing(Suppress) term = Word(alphas) | Word(nums) group = Group('(' + term[...] + ')') # assert that the '()' characters are not included in the parsed tokens self.assertParseAndCheckLisst(group, "(abc 123 def)", ['abc', '123', 'def']) # after exiting context manager, literals are converted to Literal expressions again """ def __init__(self): self._save_context = {} def save(self): self._save_context["default_whitespace"] = ParserElement.DEFAULT_WHITE_CHARS self._save_context["default_keyword_chars"] = Keyword.DEFAULT_KEYWORD_CHARS self._save_context[ "literal_string_class" ] = ParserElement._literalStringClass self._save_context["packrat_enabled"] = ParserElement._packratEnabled self._save_context["packrat_parse"] = ParserElement._parse self._save_context["__diag__"] = { name: getattr(__diag__, name) for name in __diag__._all_names } self._save_context["__compat__"] = { "collect_all_And_tokens": __compat__.collect_all_And_tokens } return self def restore(self): # reset pyparsing global state if ( ParserElement.DEFAULT_WHITE_CHARS != self._save_context["default_whitespace"] ): ParserElement.setDefaultWhitespaceChars( self._save_context["default_whitespace"] ) Keyword.DEFAULT_KEYWORD_CHARS = self._save_context["default_keyword_chars"] ParserElement.inlineLiteralsUsing( self._save_context["literal_string_class"] ) for name, value in self._save_context["__diag__"].items(): setattr(__diag__, name, value) ParserElement._packratEnabled = self._save_context["packrat_enabled"] ParserElement._parse = self._save_context["packrat_parse"] __compat__.collect_all_And_tokens = self._save_context["__compat__"] def __enter__(self): return self.save() def __exit__(self, *args): return self.restore() class TestParseResultsAsserts: """ A mixin class to add parse results assertion methods to normal unittest.TestCase classes. """ def assertParseResultsEquals( self, result, expected_list=None, expected_dict=None, msg=None ): """ Unit test assertion to compare a ParseResults object with an optional expected_list, and compare any defined results names with an optional expected_dict. """ if expected_list is not None: self.assertEqual(expected_list, result.asList(), msg=msg) if expected_dict is not None: self.assertEqual(expected_dict, result.asDict(), msg=msg) def assertParseAndCheckList( self, expr, test_string, expected_list, msg=None, verbose=True ): """ Convenience wrapper assert to test a parser element and input string, and assert that the resulting ParseResults.asList() is equal to the expected_list. """ result = expr.parseString(test_string, parseAll=True) if verbose: print(result.dump()) self.assertParseResultsEquals(result, expected_list=expected_list, msg=msg) def assertParseAndCheckDict( self, expr, test_string, expected_dict, msg=None, verbose=True ): """ Convenience wrapper assert to test a parser element and input string, and assert that the resulting ParseResults.asDict() is equal to the expected_dict. """ result = expr.parseString(test_string, parseAll=True) if verbose: print(result.dump()) self.assertParseResultsEquals(result, expected_dict=expected_dict, msg=msg) def assertRunTestResults( self, run_tests_report, expected_parse_results=None, msg=None ): """ Unit test assertion to evaluate output of ParserElement.runTests(). If a list of list-dict tuples is given as the expected_parse_results argument, then these are zipped with the report tuples returned by runTests and evaluated using assertParseResultsEquals. Finally, asserts that the overall runTests() success value is True. :param run_tests_report: tuple(bool, [tuple(str, ParseResults or Exception)]) returned from runTests :param expected_parse_results (optional): [tuple(str, list, dict, Exception)] """ run_test_success, run_test_results = run_tests_report if expected_parse_results is not None: merged = [ (rpt[0], rpt[1], expected) for rpt, expected in zip(run_test_results, expected_parse_results) ] for test_string, result, expected in merged: # expected should be a tuple containing a list and/or a dict or an exception, # and optional failure message string # an empty tuple will skip any result validation fail_msg = next( (exp for exp in expected if isinstance(exp, str)), None ) expected_exception = next( ( exp for exp in expected if isinstance(exp, type) and issubclass(exp, Exception) ), None, ) if expected_exception is not None: with self.assertRaises( expected_exception=expected_exception, msg=fail_msg or msg ): if isinstance(result, Exception): raise result else: expected_list = next( (exp for exp in expected if isinstance(exp, list)), None ) expected_dict = next( (exp for exp in expected if isinstance(exp, dict)), None ) if (expected_list, expected_dict) != (None, None): self.assertParseResultsEquals( result, expected_list=expected_list, expected_dict=expected_dict, msg=fail_msg or msg, ) else: # warning here maybe? print("no validation for {!r}".format(test_string)) # do this last, in case some specific test results can be reported instead self.assertTrue( run_test_success, msg=msg if msg is not None else "failed runTests" ) @contextmanager def assertRaisesParseException(self, exc_type=ParseException, msg=None): with self.assertRaises(exc_type, msg=msg): yield if __name__ == "__main__": selectToken = CaselessLiteral("select") fromToken = CaselessLiteral("from") ident = Word(alphas, alphanums + "_$") columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) columnNameList = Group(delimitedList(columnName)).setName("columns") columnSpec = ('*' | columnNameList) tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) tableNameList = Group(delimitedList(tableName)).setName("tables") simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables") # demo runTests method, including embedded comments in test string simpleSQL.runTests(""" # '*' as column list and dotted table name select * from SYS.XYZZY # caseless match on "SELECT", and casts back to "select" SELECT * from XYZZY, ABC # list of column names, and mixed case SELECT keyword Select AA,BB,CC from Sys.dual # multiple tables Select A, B, C from Sys.dual, Table2 # invalid SELECT keyword - should fail Xelect A, B, C from Sys.dual # incomplete command - should fail Select # invalid column name - should fail Select ^^^ frox Sys.dual """) pyparsing_common.number.runTests(""" 100 -100 +100 3.14159 6.02e23 1e-12 """) # any int or real number, returned as float pyparsing_common.fnumber.runTests(""" 100 -100 +100 3.14159 6.02e23 1e-12 """) pyparsing_common.hex_integer.runTests(""" 100 FF """) import uuid pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) pyparsing_common.uuid.runTests(""" 12345678-1234-5678-1234-567812345678 """)