次のように、再帰降下パーサーを使用してテキストを解析できますlepl
。
from string import ascii_lowercase as lowercase, digits
from lepl import AnyBut, Delayed, Drop, Integer, Word
def Parser():
# ABNF-ish grammar:
# object = '{' pair *( '|' pair ) '}' # sequence of pairs inside {}
# pair = key '=' value # key, value separated by =
# value = integer / string / object # any of ..
# key = [a-z_][a-z_0-9]* # regex syntax
# string = [^{}=|]+ # regex syntax
obj = Delayed() # forward declaration
key = Word(lowercase + '_', lowercase + '_' + digits) # digit can't be 1st
value = (Integer() >> int) | AnyBut('{}=|')[1:,...] | obj
pair = key & Drop('=') & value > tuple # (key, value)
obj += Drop('{') & pair[1:,Drop('|')] & Drop('}') > dict
return obj.get_parse()
例:
import json
text = ("{resp_to={request=objects_download}|"
"objects={object1={object_name=Name of object 1|objecttype=type1}"
"|object2={object_name=Name of object 2|objecttype=type1}}|error=0}")
obj = Parser()(text)[0] # parse text into a dict
print(json.dumps(obj, indent=2)) # pretty print
出力:
{
"resp_to": {
"request": "objects_download"
},
"objects": {
"object1": {
"object_name": "Name of object 1",
"objecttype": "type1"
},
"object2": {
"object_name": "Name of object 2",
"objecttype": "type1"
}
},
"error": 0
}