Implemented conj, (badly) implemented meta--needs to work for all objects and not...
[jackhill/mal.git] / kotlin / src / mal / reader.kt
1 package mal
2
3 import kotlin.text.Regex
4
5 val TOKEN_REGEX = Regex("[\\s,]*(~@|[\\[\\]{}()'`~^@]|\"(?:\\\\.|[^\\\\\"])*\"|;.*|[^\\s\\[\\]{}('\"`,;)]*)")
6 val ATOM_REGEX = Regex("(^-?[0-9]+$)|(^nil$)|(^true$)|(^false$)|^\"(.*)\"$|:(.*)|(^[^\"]*$)")
7
8 class Reader(sequence: Sequence<String>) {
9 val tokens = sequence.iterator()
10 var current = advance()
11
12 fun next(): String? {
13 var result = current
14 current = advance()
15 return result
16 }
17
18 fun peek(): String? = current
19
20 private fun advance(): String? = if (tokens.hasNext()) tokens.next() else null
21 }
22
23 fun read_str(input: String?): MalType {
24 val tokens = tokenizer(input) ?: return NIL
25 return read_form(Reader(tokens))
26 }
27
28 fun tokenizer(input: String?): Sequence<String>? {
29 if (input == null) return null
30
31 return TOKEN_REGEX.findAll(input)
32 .map({ it -> it.groups[1]?.value as String })
33 .filter({ it != "" && !it.startsWith(";")})
34 }
35
36 fun read_form(reader: Reader): MalType =
37 when (reader.peek()) {
38 null -> throw MalContinue()
39 "(" -> read_list(reader)
40 ")" -> throw MalReaderException("expected form, got ')'")
41 "[" -> read_vector(reader)
42 "]" -> throw MalReaderException("expected form, got ']'")
43 "{" -> read_hashmap(reader)
44 "}" -> throw MalReaderException("expected form, got '}'")
45 "'" -> read_shorthand(reader, "quote")
46 "`" -> read_shorthand(reader, "quasiquote")
47 "~" -> read_shorthand(reader, "unquote")
48 "~@" -> read_shorthand(reader, "splice-unquote")
49 "^" -> {
50 reader.next()
51
52 val meta = read_form(reader)
53 val obj = read_form(reader)
54
55 val list = MalList()
56 list.conj_BANG(MalSymbol("with-meta"))
57 list.conj_BANG(obj)
58 list.conj_BANG(meta)
59
60 list
61 }
62 else -> read_atom(reader)
63 }
64
65 fun read_list(reader: Reader): MalType = read_sequence(reader, MalList(), ")")
66 fun read_vector(reader: Reader): MalType = read_sequence(reader, MalVector(), "]")
67
68 private fun read_sequence(reader: Reader, sequence: IMutableSeq, end: String): MalType {
69 reader.next()
70
71 do {
72 val form = when (reader.peek()) {
73 null -> throw MalReaderException("expected '$end', got EOF")
74 end -> { reader.next(); null }
75 else -> read_form(reader)
76 }
77
78 if (form != null) {
79 sequence.conj_BANG(form)
80 }
81 } while (form != null)
82
83 return sequence
84 }
85
86 fun read_hashmap(reader: Reader): MalType {
87 reader.next()
88 val hashMap = MalHashMap()
89
90 do {
91 var value : MalType? = null;
92 val key = when (reader.peek()) {
93 null -> throw MalReaderException("expected '}', got EOF")
94 "}" -> { reader.next(); null }
95 else -> {
96 var key = read_form(reader)
97 if (key !is MalString) {
98 throw MalReaderException("hash-map keys must be strings or keywords")
99 }
100 value = when (reader.peek()) {
101 null -> throw MalReaderException("expected form, got EOF")
102 else -> read_form(reader)
103 }
104 key
105 }
106 }
107
108 if (key != null) {
109 hashMap.assoc_BANG(key as MalString, value as MalType)
110 }
111 } while (key != null)
112
113 return hashMap
114 }
115
116 fun read_shorthand(reader: Reader, symbol: String): MalType {
117 reader.next()
118
119 val list = MalList()
120 list.conj_BANG(MalSymbol(symbol))
121 list.conj_BANG(read_form(reader))
122
123 return list
124 }
125
126 fun read_atom(reader: Reader): MalType {
127 val next = reader.next() ?: throw MalReaderException("Unexpected null token")
128 val groups = ATOM_REGEX.find(next)?.groups ?: throw MalReaderException("Unrecognized token: " + next)
129
130 return if (groups[1]?.value != null) {
131 MalInteger(Integer.valueOf(groups[1]?.value))
132 } else if (groups[2]?.value != null) {
133 NIL
134 } else if (groups[3]?.value != null) {
135 TRUE
136 } else if (groups[4]?.value != null) {
137 FALSE
138 } else if (groups[5]?.value != null) {
139 MalString((groups[5]?.value as String).replace("\\n", "\n").replace("\\\"", "\""))
140 } else if (groups[6]?.value != null) {
141 MalKeyword(groups[6]?.value as String)
142 } else if (groups[7]?.value != null) {
143 MalSymbol(groups[7]?.value as String)
144 } else {
145 throw MalReaderException("Unrecognized token: " + next)
146 }
147 }