| 1 | package mal |
| 2 | |
| 3 | import kotlin.text.Regex |
| 4 | |
| 5 | val TOKEN_REGEX = Regex("[\\s,]*(~@|[\\[\\]{}()'`~^@]|\"(?:\\\\.|[^\\\\\"])*\"|;.*|[^\\s\\[\\]{}('\"`,;)]*)") |
| 6 | val ATOM_REGEX = Regex("(^-?[0-9]+$)|(^nil$)|(^true$)|(^false$)|^\"(.*)\"$|:(.*)|(^[^\"]*$)") |
| 7 | |
| 8 | class Reader(sequence: Sequence<String>) { |
| 9 | val tokens = sequence.iterator() |
| 10 | var current = advance() |
| 11 | |
| 12 | fun next(): String? { |
| 13 | var result = current |
| 14 | current = advance() |
| 15 | return result |
| 16 | } |
| 17 | |
| 18 | fun peek(): String? = current |
| 19 | |
| 20 | private fun advance(): String? = if (tokens.hasNext()) tokens.next() else null |
| 21 | } |
| 22 | |
| 23 | fun read_str(input: String?): MalType { |
| 24 | val tokens = tokenizer(input) ?: return NIL |
| 25 | return read_form(Reader(tokens)) |
| 26 | } |
| 27 | |
| 28 | fun tokenizer(input: String?): Sequence<String>? { |
| 29 | if (input == null) return null |
| 30 | |
| 31 | return TOKEN_REGEX.findAll(input) |
| 32 | .map({ it -> it.groups[1]?.value as String }) |
| 33 | .filter({ it != "" && !it.startsWith(";")}) |
| 34 | } |
| 35 | |
| 36 | fun read_form(reader: Reader): MalType = |
| 37 | when (reader.peek()) { |
| 38 | null -> throw MalContinue() |
| 39 | "(" -> read_list(reader) |
| 40 | ")" -> throw MalReaderException("expected form, got ')'") |
| 41 | "[" -> read_vector(reader) |
| 42 | "]" -> throw MalReaderException("expected form, got ']'") |
| 43 | "{" -> read_hashmap(reader) |
| 44 | "}" -> throw MalReaderException("expected form, got '}'") |
| 45 | "'" -> read_shorthand(reader, "quote") |
| 46 | "`" -> read_shorthand(reader, "quasiquote") |
| 47 | "~" -> read_shorthand(reader, "unquote") |
| 48 | "~@" -> read_shorthand(reader, "splice-unquote") |
| 49 | else -> read_atom(reader) |
| 50 | } |
| 51 | |
| 52 | fun read_list(reader: Reader): MalType = read_sequence(reader, MalList(), ")") |
| 53 | fun read_vector(reader: Reader): MalType = read_sequence(reader, MalVector(), "]") |
| 54 | |
| 55 | private fun read_sequence(reader: Reader, sequence: IMutableSeq, end: String): MalType { |
| 56 | reader.next() |
| 57 | |
| 58 | do { |
| 59 | val form = when (reader.peek()) { |
| 60 | null -> throw MalReaderException("expected '$end', got EOF") |
| 61 | end -> { reader.next(); null } |
| 62 | else -> read_form(reader) |
| 63 | } |
| 64 | |
| 65 | if (form != null) { |
| 66 | sequence.conj_BANG(form) |
| 67 | } |
| 68 | } while (form != null) |
| 69 | |
| 70 | return sequence |
| 71 | } |
| 72 | |
| 73 | fun read_hashmap(reader: Reader): MalType { |
| 74 | reader.next() |
| 75 | val hashMap = MalHashMap() |
| 76 | |
| 77 | do { |
| 78 | var value : MalType? = null; |
| 79 | val key = when (reader.peek()) { |
| 80 | null -> throw MalReaderException("expected '}', got EOF") |
| 81 | "}" -> { reader.next(); null } |
| 82 | else -> { |
| 83 | var key = read_form(reader) |
| 84 | if (key !is MalString) { |
| 85 | throw MalReaderException("hash-map keys must be strings or keywords") |
| 86 | } |
| 87 | value = when (reader.peek()) { |
| 88 | null -> throw MalReaderException("expected form, got EOF") |
| 89 | else -> read_form(reader) |
| 90 | } |
| 91 | key |
| 92 | } |
| 93 | } |
| 94 | |
| 95 | if (key != null) { |
| 96 | hashMap.assoc_BANG(key as MalString, value as MalType) |
| 97 | } |
| 98 | } while (key != null) |
| 99 | |
| 100 | return hashMap |
| 101 | } |
| 102 | |
| 103 | fun read_shorthand(reader: Reader, symbol: String): MalType { |
| 104 | reader.next() |
| 105 | |
| 106 | val list = MalList() |
| 107 | list.conj_BANG(MalSymbol(symbol)) |
| 108 | list.conj_BANG(read_form(reader)) |
| 109 | |
| 110 | return list |
| 111 | } |
| 112 | |
| 113 | fun read_atom(reader: Reader): MalType { |
| 114 | val next = reader.next() ?: throw MalReaderException("Unexpected null token") |
| 115 | val groups = ATOM_REGEX.find(next)?.groups ?: throw MalReaderException("Unrecognized token: " + next) |
| 116 | |
| 117 | return if (groups[1]?.value != null) { |
| 118 | MalInteger(Integer.valueOf(groups[1]?.value)) |
| 119 | } else if (groups[2]?.value != null) { |
| 120 | NIL |
| 121 | } else if (groups[3]?.value != null) { |
| 122 | TRUE |
| 123 | } else if (groups[4]?.value != null) { |
| 124 | FALSE |
| 125 | } else if (groups[5]?.value != null) { |
| 126 | MalString((groups[5]?.value as String).replace("\\n", "\n").replace("\\\"", "\"")) |
| 127 | } else if (groups[6]?.value != null) { |
| 128 | MalKeyword(groups[6]?.value as String) |
| 129 | } else if (groups[7]?.value != null) { |
| 130 | MalSymbol(groups[7]?.value as String) |
| 131 | } else { |
| 132 | throw MalReaderException("Unrecognized token: " + next) |
| 133 | } |
| 134 | } |