Basic: Reduce GOSUB use. Partial self-host to step3
[jackhill/mal.git] / basic / basicpp.py
1 #!/usr/bin/env python
2
3 from __future__ import print_function
4 import argparse
5 import re
6 import sys
7
8 def debug(*args, **kwargs):
9 print(*args, file=sys.stderr, **kwargs)
10
11 def parse_args():
12 parser = argparse.ArgumentParser(description='Preprocess Basic code.')
13 parser.add_argument('infile', type=str,
14 help='the Basic file to preprocess')
15 parser.add_argument('--keep-rems', action='store_true', default=False,
16 help='The type of REMs to keep (0 (none) -> 4 (all)')
17 parser.add_argument('--keep-blank-lines', action='store_true', default=False,
18 help='Keep blank lines from the original file')
19 parser.add_argument('--keep-indent', action='store_true', default=False,
20 help='Keep line identing')
21 parser.add_argument('--skip-misc-fixups', action='store_true', default=False,
22 help='Skip miscellaneous fixup/shrink fixups')
23 parser.add_argument('--number-lines', action='store_true', default=False,
24 help='Number the lines')
25 parser.add_argument('--keep-labels', action='store_true', default=False,
26 help='Keep string labels instead of replacing with line numbers')
27 parser.add_argument('--combine-lines', action='store_true', default=False,
28 help='Combine lines using the ":" separator')
29
30 args = parser.parse_args()
31 if args.combine_lines and args.keep_rems:
32 parser.error("--combine-lines and --keep-rems are mutually exclusive")
33
34 return args
35
36 # pull in include files
37 def resolve_includes(orig_lines, keep_rems=0):
38 included = {}
39 lines = []
40 for line in orig_lines:
41 m = re.match(r"^ *REM \$INCLUDE: '([^']*)' *$", line)
42 if m and m.group(1) not in included:
43 f = m.group(1)
44 if f not in included:
45 ilines = [l.rstrip() for l in open(f).readlines()]
46 if keep_rems: lines.append("REM vvv BEGIN '%s' vvv" % f)
47 lines.extend(ilines)
48 if keep_rems: lines.append("REM ^^^ END '%s' ^^^" % f)
49 else:
50 debug("Ignoring already included file: %s" % f)
51 else:
52 lines.append(line)
53 return lines
54
55 def drop_blank_lines(orig_lines):
56 lines = []
57 for line in orig_lines:
58 if re.match(r"^\w*$", line): continue
59 lines.append(line)
60 return lines
61
62
63 def drop_rems(orig_lines):
64 lines = []
65 for line in orig_lines:
66 if re.match(r"^ *REM", line):
67 continue
68 m = re.match(r"^(.*): *REM .*$", line)
69 if m:
70 lines.append(m.group(1))
71 else:
72 lines.append(line)
73 return lines
74
75 def remove_indent(orig_lines):
76 lines = []
77 for line in orig_lines:
78 m = re.match(r"^ *([^ ].*)$", line)
79 lines.append(m.group(1))
80 return lines
81
82 def misc_fixups(orig_lines):
83 text = "\n".join(orig_lines)
84 text = re.sub(r"\bTHEN GOTO\b", "THEN", text)
85 text = re.sub(r"\bPRINT \"", "PRINT\"", text)
86 text = re.sub(r"\bIF ", "IF", text)
87 return text.split("\n")
88
89 def finalize(lines, args):
90 labels_lines = {}
91 lines_labels = {}
92
93 # number lines
94 if args.number_lines:
95 src_lines = lines
96 lines = []
97 lnum=1
98 for line in src_lines:
99 if not args.keep_labels:
100 m = re.match(r"^ *([^ ]*): *$", line)
101 if m:
102 labels_lines[m.groups(1)[0]] = lnum
103 lines_labels[lnum] = m.groups(1)[0]
104 continue
105 lines.append("%s %s" % (lnum, line))
106 lnum += 1
107
108 def update_labels_lines(text, a,b):
109 stext = ""
110 while stext != text:
111 stext = text
112 text = re.sub(r"(THEN) %s\b" % a, r"THEN %s" % b, stext)
113 #text = re.sub(r"(THEN)%s\b" % a, r"THEN%s" % b, stext)
114 text = re.sub(r"(ON [^:\n]* GOTO [^:\n]*)\b%s\b" % a, r"\g<1>%s" % b, text)
115 text = re.sub(r"(ON [^:\n]* GOSUB [^:\n]*)\b%s\b" % a, r"\g<2>%s" % b, text)
116 text = re.sub(r"(GOSUB) %s\b" % a, r"\1 %s" % b, text)
117 text = re.sub(r"(GOTO) %s\b" % a, r"\1 %s" % b, text)
118 #text = re.sub(r"(GOTO)%s\b" % a, r"\1%s" % b, text)
119 return text
120
121 if not args.keep_labels:
122 src_lines = lines
123 text = "\n".join(lines)
124 # search for and replace GOTO/GOSUBs
125 for label, lnum in labels_lines.items():
126 text = update_labels_lines(text, label, lnum)
127 lines = text.split("\n")
128
129 if args.combine_lines:
130 renumber = {}
131 src_lines = lines
132 lines = []
133 pos = 0
134 acc_line = ""
135 def renum(line):
136 lnum = len(lines)+1
137 renumber[old_num] = lnum
138 return "%s %s" % (lnum, line)
139 while pos < len(src_lines):
140 line = src_lines[pos]
141 # TODO: handle args.keep_labels and (not args.number_lines)
142 m = re.match(r"^([0-9]*) (.*)$", line)
143 old_num = int(m.group(1))
144 line = m.group(2)
145
146 if acc_line == "":
147 # Starting a new line
148 acc_line = renum(line)
149 elif old_num in lines_labels or re.match(r"^ *FOR\b.*", line):
150 # This is a GOTO/GOSUB target or FOR loop so it must
151 # be on a line by itself
152 lines.append(acc_line)
153 acc_line = renum(line)
154 elif re.match(r".*\b(?:GOTO|THEN|RETURN)\b.*", acc_line):
155 # GOTO/THEN/RETURN are last thing on the line
156 lines.append(acc_line)
157 acc_line = renum(line)
158 # TODO: not sure why this is 88 rather than 80
159 elif len(acc_line) + 1 + len(line) < 88:
160 # Continue building up the line
161 acc_line = acc_line + ":" + line
162 # GOTO/IF/RETURN must be the last things on a line so
163 # start a new line
164 if re.match(r".*\b(?:GOTO|THEN|RETURN)\b.*", line):
165 lines.append(acc_line)
166 acc_line = ""
167 else:
168 # Too long so start a new line
169 lines.append(acc_line)
170 acc_line = renum(line)
171 pos += 1
172 if acc_line != "":
173 lines.append(acc_line)
174
175 # Finally renumber GOTO/GOSUBS
176 src_lines = lines
177 text = "\n".join(lines)
178 # search for and replace GOTO/GOSUBs
179 for a in sorted(renumber.keys()):
180 b = renumber[a]
181 text = update_labels_lines(text, a, b)
182 lines = text.split("\n")
183
184
185 return lines
186
187 if __name__ == '__main__':
188 args = parse_args()
189
190 debug("Preprocessing basic file '"+args.infile+"'")
191
192 # read in lines
193 lines = [l.rstrip() for l in open(args.infile).readlines()]
194 debug("Number of original lines: %s" % len(lines))
195
196 # pull in include files
197 lines = resolve_includes(lines, keep_rems=args.keep_rems)
198 debug("Number of lines after includes: %s" % len(lines))
199
200 # drop blank lines
201 if not args.keep_blank_lines:
202 lines = drop_blank_lines(lines)
203 debug("Number of lines after dropping blank lines: %s" % len(lines))
204
205 # keep/drop REMs
206 if not args.keep_rems:
207 lines = drop_rems(lines)
208 debug("Number of lines after dropping REMs: %s" % len(lines))
209
210 # keep/remove the indenting
211 if not args.keep_indent:
212 lines = remove_indent(lines)
213
214 # apply some miscellaneous simple fixups/regex transforms
215 if not args.skip_misc_fixups:
216 lines = misc_fixups(lines)
217
218 # number lines, drop/keep labels, combine lines
219 lines = finalize(lines, args)
220
221 print("\n".join(lines))