Tcl implementation
[jackhill/mal.git] / tcl / reader.tcl
1 oo::class create Reader {
2 variable pos tokens
3
4 constructor {tokens_list} {
5 set tokens $tokens_list
6 set pos 0
7 }
8
9 method peek {} {
10 lindex $tokens $pos
11 }
12
13 method next {} {
14 set token [my peek]
15 incr pos
16 return $token
17 }
18 }
19
20 proc tokenize str {
21 set re {[\s,]*(~@|[\[\]\{\}()'`~^@]|\"(?:\\.|[^\\\"])*\"|;.*|[^\s\[\]\{\}('\"`~^@,;)]*)}
22 set tokens {}
23 foreach {_ capture} [regexp -line -all -inline $re $str] {
24 if {[string length $capture] > 0 && [string range $capture 0 0] != ";"} {
25 lappend tokens $capture
26 }
27 }
28 return $tokens
29 }
30
31 proc read_tokens_list {reader start_char end_char} {
32 set token [$reader next]
33 if {$token != $start_char} {
34 error "expected '$start_char'"
35 }
36
37 set elements {}
38 set token [$reader peek]
39 while {$token != $end_char} {
40 if {$token == ""} {
41 error "expected '$end_char'"
42 }
43 lappend elements [read_form $reader]
44 set token [$reader peek]
45 }
46 $reader next
47 return $elements
48 }
49
50 proc read_list {reader} {
51 set elements [read_tokens_list $reader "(" ")"]
52 list_new $elements
53 }
54
55 proc read_vector {reader} {
56 set elements [read_tokens_list $reader "\[" "\]"]
57 vector_new $elements
58 }
59
60 proc read_hashmap {reader} {
61 set res [dict create]
62 foreach {keytoken valtoken} [read_tokens_list $reader "{" "}"] {
63 dict set res [obj_val $keytoken] $valtoken
64 }
65 hashmap_new $res
66 }
67
68 proc parse_string {str} {
69 set res [string range $str 1 end-1]
70 string map {"\\n" "\n" "\\\"" "\"" "\\\\" "\\"} $res
71 }
72
73 proc parse_keyword {str} {
74 # Remove initial ":"
75 string range $str 1 end
76 }
77
78 proc read_atom {reader} {
79 set token [$reader next]
80 switch -regexp $token {
81 ^-?[0-9]+$ { return [obj_new "integer" $token] }
82 ^nil$ { return $::mal_nil }
83 ^true$ { return $::mal_true }
84 ^false$ { return $::mal_false }
85 ^: { return [keyword_new [parse_keyword $token]] }
86 ^\".*\"$ { return [string_new [parse_string $token]] }
87 default { return [symbol_new $token] }
88 }
89 }
90
91 proc symbol_shortcut {symbol_name reader} {
92 $reader next
93 list_new [list [symbol_new $symbol_name] [read_form $reader]]
94 }
95
96 proc read_form {reader} {
97 switch [$reader peek] {
98 "'" { return [symbol_shortcut "quote" $reader] }
99 "`" { return [symbol_shortcut "quasiquote" $reader] }
100 "~" { return [symbol_shortcut "unquote" $reader] }
101 "~@" { return [symbol_shortcut "splice-unquote" $reader] }
102 "^" {
103 $reader next
104 set meta [read_form $reader]
105 return [list_new [list [symbol_new "with-meta"] [read_form $reader] $meta]]
106 }
107 "@" { return [symbol_shortcut "deref" $reader] }
108 "(" { return [read_list $reader] }
109 ")" { error "unexpected ')'" }
110 "\[" { return [read_vector $reader] }
111 "\]" { error "unexpected '\]'" }
112 "\{" { return [read_hashmap $reader] }
113 "\}" { error "unexpected '\}'" }
114 default { return [read_atom $reader] }
115 }
116 }
117
118 proc read_str str {
119 set tokens [tokenize $str]
120 set reader [Reader new $tokens]
121 set res [read_form $reader]
122 $reader destroy
123 return $res
124 }