perl, python, tcl: Correctly detect more unterminated strings.
[jackhill/mal.git] / tcl / reader.tcl
CommitLineData
54d9903c
DM
1oo::class create Reader {
2 variable pos tokens
3
4 constructor {tokens_list} {
5 set tokens $tokens_list
6 set pos 0
7 }
8
9 method peek {} {
10 lindex $tokens $pos
11 }
12
13 method next {} {
14 set token [my peek]
15 incr pos
16 return $token
17 }
18}
19
20proc tokenize str {
4aa0ebdf 21 set re {[\s,]*(~@|[\[\]\{\}()'`~^@]|\"(?:\\.|[^\\\"])*\"?|;.*|[^\s\[\]\{\}('\"`~^@,;)]*)}
54d9903c
DM
22 set tokens {}
23 foreach {_ capture} [regexp -line -all -inline $re $str] {
24 if {[string length $capture] > 0 && [string range $capture 0 0] != ";"} {
25 lappend tokens $capture
26 }
27 }
28 return $tokens
29}
30
31proc read_tokens_list {reader start_char end_char} {
32 set token [$reader next]
33 if {$token != $start_char} {
24928363 34 error "expected '$start_char', got EOF"
54d9903c
DM
35 }
36
37 set elements {}
38 set token [$reader peek]
39 while {$token != $end_char} {
40 if {$token == ""} {
24928363 41 error "expected '$end_char', got EOF"
54d9903c
DM
42 }
43 lappend elements [read_form $reader]
44 set token [$reader peek]
45 }
46 $reader next
47 return $elements
48}
49
50proc read_list {reader} {
51 set elements [read_tokens_list $reader "(" ")"]
52 list_new $elements
53}
54
55proc read_vector {reader} {
56 set elements [read_tokens_list $reader "\[" "\]"]
57 vector_new $elements
58}
59
60proc read_hashmap {reader} {
61 set res [dict create]
62 foreach {keytoken valtoken} [read_tokens_list $reader "{" "}"] {
63 dict set res [obj_val $keytoken] $valtoken
64 }
65 hashmap_new $res
66}
67
68proc parse_string {str} {
69 set res [string range $str 1 end-1]
70 string map {"\\n" "\n" "\\\"" "\"" "\\\\" "\\"} $res
71}
72
73proc parse_keyword {str} {
74 # Remove initial ":"
75 string range $str 1 end
76}
77
78proc read_atom {reader} {
79 set token [$reader next]
80 switch -regexp $token {
81 ^-?[0-9]+$ { return [obj_new "integer" $token] }
82 ^nil$ { return $::mal_nil }
83 ^true$ { return $::mal_true }
84 ^false$ { return $::mal_false }
85 ^: { return [keyword_new [parse_keyword $token]] }
16309256
BH
86 ^\"(\\\\.|[^\\\\\"])*\"$
87 { return [string_new [parse_string $token]] }
88 ^\" { error "expected '\"', got EOF" }
54d9903c
DM
89 default { return [symbol_new $token] }
90 }
91}
92
93proc symbol_shortcut {symbol_name reader} {
94 $reader next
95 list_new [list [symbol_new $symbol_name] [read_form $reader]]
96}
97
98proc read_form {reader} {
99 switch [$reader peek] {
100 "'" { return [symbol_shortcut "quote" $reader] }
101 "`" { return [symbol_shortcut "quasiquote" $reader] }
102 "~" { return [symbol_shortcut "unquote" $reader] }
103 "~@" { return [symbol_shortcut "splice-unquote" $reader] }
104 "^" {
105 $reader next
106 set meta [read_form $reader]
107 return [list_new [list [symbol_new "with-meta"] [read_form $reader] $meta]]
108 }
109 "@" { return [symbol_shortcut "deref" $reader] }
110 "(" { return [read_list $reader] }
111 ")" { error "unexpected ')'" }
112 "\[" { return [read_vector $reader] }
113 "\]" { error "unexpected '\]'" }
114 "\{" { return [read_hashmap $reader] }
115 "\}" { error "unexpected '\}'" }
116 default { return [read_atom $reader] }
117 }
118}
119
120proc read_str str {
121 set tokens [tokenize $str]
122 set reader [Reader new $tokens]
123 set res [read_form $reader]
124 $reader destroy
125 return $res
126}