All: fix read/print of \\, and \n
[jackhill/mal.git] / nim / reader.nim
1 import re, strutils, sequtils, types
2
3 let
4 tokenRE = re"""[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"|;.*|[^\s\[\]{}('"`,;)]*)"""
5 intRE = re"-?[0-9]+$"
6
7 type
8 Blank* = object of Exception
9
10 Reader = object
11 tokens: seq[string]
12 position: int
13
14 proc next(r: var Reader): string =
15 if r.position >= r.tokens.len:
16 result = nil
17 else:
18 result = r.tokens[r.position]
19 inc r.position
20
21 proc peek(r: Reader): string =
22 if r.position >= r.tokens.len: nil
23 else: r.tokens[r.position]
24
25 proc tokenize(str: string): seq[string] =
26 result = @[]
27 var pos = 0
28 while pos < str.len:
29 var matches: array[2, string]
30 var len = str.findBounds(tokenRE, matches, pos)
31 if len.first != -1 and len.last != -1:
32 pos = len.last + 1
33 if matches[0][0] != ';':
34 result.add matches[0]
35 else:
36 inc pos
37
38 proc read_form(r: var Reader): MalType
39
40 proc read_seq(r: var Reader, fr, to: string): seq[MalType] =
41 result = @[]
42 var t = r.next
43 if t != fr: raise newException(ValueError, "expected '" & fr & "'")
44
45 t = r.peek
46 while t != to:
47 if t == nil: raise newException(ValueError, "expected '" & to & "', got EOF")
48 result.add r.read_form
49 t = r.peek
50 discard r.next
51
52 proc read_list(r: var Reader): MalType =
53 result = list r.read_seq("(", ")")
54
55 proc read_vector(r: var Reader): MalType =
56 result = vector r.read_seq("[", "]")
57
58 proc read_hash_map(r: var Reader): MalType =
59 result = hash_map r.read_seq("{", "}")
60
61 proc read_atom(r: var Reader): MalType =
62 let t = r.next
63 if t.match(intRE): number t.parseInt
64 elif t[0] == '"': str t[1 .. <t.high].replace("\\\"", "\"").replace("\\n", "\n").replace("\\\\", "\\")
65 elif t[0] == ':': keyword t[1 .. t.high]
66 elif t == "nil": nilObj
67 elif t == "true": trueObj
68 elif t == "false": falseObj
69 else: symbol t
70
71 proc read_form(r: var Reader): MalType =
72 if r.peek[0] == ';':
73 discard r.next
74 return nilObj
75 case r.peek
76 of "'":
77 discard r.next
78 result = list(symbol "quote", r.read_form)
79 of "`":
80 discard r.next
81 result = list(symbol "quasiquote", r.read_form)
82 of "~":
83 discard r.next
84 result = list(symbol "unquote", r.read_form)
85 of "~@":
86 discard r.next
87 result = list(symbol "splice-unquote", r.read_form)
88 of "^":
89 discard r.next
90 let meta = r.read_form
91 result = list(symbol "with-meta", r.read_form, meta)
92 of "@":
93 discard r.next
94 result = list(symbol "deref", r.read_form)
95
96 # list
97 of "(": result = r.read_list
98 of ")": raise newException(ValueError, "unexpected ')'")
99
100 # vector
101 of "[": result = r.read_vector
102 of "]": raise newException(ValueError, "unexpected ']'")
103
104 # hash-map
105 of "{": result = r.read_hash_map
106 of "}": raise newException(ValueError, "unexpected '}'")
107
108 # atom
109 else: result = r.read_atom
110
111 proc read_str*(str: string): MalType =
112 var r = Reader(tokens: str.tokenize)
113 if r.tokens.len == 0:
114 raise newException(Blank, "Blank line")
115 r.read_form