1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
|
import math
import re
import functools
from typing import Any, Optional
import uuid
from dataclasses import dataclass, fields, field
from copy import deepcopy
class SexpError(ValueError):
""" Low-level error parsing S-Expression format """
pass
class FormatError(ValueError):
""" Semantic error in S-Expression structure """
pass
class AtomType(type):
def __getattr__(cls, key):
return cls(key)
@functools.total_ordering
class Atom(metaclass=AtomType):
def __init__(self, obj=''):
if isinstance(obj, str):
self.value = obj
elif isinstance(obj, Atom):
self.value = obj.value
else:
raise TypeError(f'Atom argument must be str, not {type(obj)}')
def __str__(self):
return self.value
def __repr__(self):
return f'@{self.value}'
def __hash__(self):
return hash(self.value)
def __eq__(self, other):
if not isinstance(other, (Atom, str)):
return self.value == other
return self.value == str(other)
def __lt__(self, other):
if not isinstance(other, (Atom, str)):
raise TypeError(f'Cannot compare Atom and {type(other)}')
return self.value < str(other)
def __gt__(self, other):
if not isinstance(other, (Atom, str)):
raise TypeError(f'Cannot compare Atom and {type(other)}')
return self.value > str(other)
term_regex = r"""(?mx)
\s*(?:
"((?:\\\\|\\"|[^"])*)"|
(\()|
(\))|
([+-]?\d+\.\d+(?=[\s\)]))|
(\-?\d+(?=[\s\)]))|
([^0-9"\s()][^"\s)]*)
)"""
def parse_sexp(sexp: str) -> Any:
re_iter = re.finditer(term_regex, sexp)
rv = list(_parse_sexp_internal(re_iter))
for leftover in re_iter:
quoted_str, lparen, rparen, *rest = leftover.groups()
if quoted_str or lparen or any(rest):
raise SexpError(f'Leftover garbage after end of expression at position {leftover.start()}') # noqa: E501
elif rparen:
raise SexpError(f'Unbalanced closing parenthesis at position {leftover.start()}')
if len(rv) == 0:
raise SexpError('No or empty expression')
if len(rv) > 1:
print(rv[0])
print(rv[1])
raise SexpError('Missing initial opening parenthesis')
return rv[0]
def _parse_sexp_internal(re_iter) -> Any:
for match in re_iter:
quoted_str, lparen, rparen, float_num, integer_num, bare_str = match.groups()
if lparen:
yield list(_parse_sexp_internal(re_iter))
elif rparen:
break
elif bare_str is not None:
yield Atom(bare_str)
elif quoted_str is not None:
yield quoted_str.replace('\\"', '"')
elif float_num:
yield float(float_num)
elif integer_num:
yield int(integer_num)
def build_sexp(exp, indent=' ') -> str:
# Special case for multi-values
if isinstance(exp, (list, tuple)):
joined = '('
for i, elem in enumerate(exp):
if 1 <= i <= 5 and len(joined) < 120 and not isinstance(elem, (list, tuple)):
joined += ' '
elif i >= 1:
joined += '\n' + indent
joined += build_sexp(elem, indent=f'{indent} ')
return joined + ')'
if exp == '':
return '""'
if isinstance(exp, str):
exp = exp.replace('"', r'\"')
return f'"{exp}"'
if isinstance(exp, float):
# python whyyyy
val = f'{exp:.6f}'
val = val.rstrip('0')
if val[-1] == '.':
val += '0'
return val
else:
return str(exp)
if __name__ == "__main__":
sexp = """ ( ( Winson_GM-402B_5x5mm_P1.27mm data "quoted data" 123 4.5)
(data "with \\"escaped quotes\\"")
(data (123 (4.5) "(more" "data)")))"""
print("Input S-expression:")
print(sexp)
parsed = parse_sexp(sexp)
print("\nParsed to Python:", parsed)
print("\nThen back to: '%s'" % build_sexp(parsed))
|