Commit | Line | Data |
---|---|---|
54d9903c DM |
1 | oo::class create Reader { |
2 | variable pos tokens | |
3 | ||
4 | constructor {tokens_list} { | |
5 | set tokens $tokens_list | |
6 | set pos 0 | |
7 | } | |
8 | ||
9 | method peek {} { | |
10 | lindex $tokens $pos | |
11 | } | |
12 | ||
13 | method next {} { | |
14 | set token [my peek] | |
15 | incr pos | |
16 | return $token | |
17 | } | |
18 | } | |
19 | ||
20 | proc tokenize str { | |
4aa0ebdf | 21 | set re {[\s,]*(~@|[\[\]\{\}()'`~^@]|\"(?:\\.|[^\\\"])*\"?|;.*|[^\s\[\]\{\}('\"`~^@,;)]*)} |
54d9903c DM |
22 | set tokens {} |
23 | foreach {_ capture} [regexp -line -all -inline $re $str] { | |
24 | if {[string length $capture] > 0 && [string range $capture 0 0] != ";"} { | |
25 | lappend tokens $capture | |
26 | } | |
27 | } | |
28 | return $tokens | |
29 | } | |
30 | ||
31 | proc read_tokens_list {reader start_char end_char} { | |
32 | set token [$reader next] | |
33 | if {$token != $start_char} { | |
24928363 | 34 | error "expected '$start_char', got EOF" |
54d9903c DM |
35 | } |
36 | ||
37 | set elements {} | |
38 | set token [$reader peek] | |
39 | while {$token != $end_char} { | |
40 | if {$token == ""} { | |
24928363 | 41 | error "expected '$end_char', got EOF" |
54d9903c DM |
42 | } |
43 | lappend elements [read_form $reader] | |
44 | set token [$reader peek] | |
45 | } | |
46 | $reader next | |
47 | return $elements | |
48 | } | |
49 | ||
50 | proc read_list {reader} { | |
51 | set elements [read_tokens_list $reader "(" ")"] | |
52 | list_new $elements | |
53 | } | |
54 | ||
55 | proc read_vector {reader} { | |
56 | set elements [read_tokens_list $reader "\[" "\]"] | |
57 | vector_new $elements | |
58 | } | |
59 | ||
60 | proc read_hashmap {reader} { | |
61 | set res [dict create] | |
62 | foreach {keytoken valtoken} [read_tokens_list $reader "{" "}"] { | |
63 | dict set res [obj_val $keytoken] $valtoken | |
64 | } | |
65 | hashmap_new $res | |
66 | } | |
67 | ||
68 | proc parse_string {str} { | |
69 | set res [string range $str 1 end-1] | |
70 | string map {"\\n" "\n" "\\\"" "\"" "\\\\" "\\"} $res | |
71 | } | |
72 | ||
73 | proc parse_keyword {str} { | |
74 | # Remove initial ":" | |
75 | string range $str 1 end | |
76 | } | |
77 | ||
78 | proc read_atom {reader} { | |
79 | set token [$reader next] | |
80 | switch -regexp $token { | |
81 | ^-?[0-9]+$ { return [obj_new "integer" $token] } | |
82 | ^nil$ { return $::mal_nil } | |
83 | ^true$ { return $::mal_true } | |
84 | ^false$ { return $::mal_false } | |
85 | ^: { return [keyword_new [parse_keyword $token]] } | |
16309256 BH |
86 | ^\"(\\\\.|[^\\\\\"])*\"$ |
87 | { return [string_new [parse_string $token]] } | |
88 | ^\" { error "expected '\"', got EOF" } | |
54d9903c DM |
89 | default { return [symbol_new $token] } |
90 | } | |
91 | } | |
92 | ||
93 | proc symbol_shortcut {symbol_name reader} { | |
94 | $reader next | |
95 | list_new [list [symbol_new $symbol_name] [read_form $reader]] | |
96 | } | |
97 | ||
98 | proc read_form {reader} { | |
99 | switch [$reader peek] { | |
100 | "'" { return [symbol_shortcut "quote" $reader] } | |
101 | "`" { return [symbol_shortcut "quasiquote" $reader] } | |
102 | "~" { return [symbol_shortcut "unquote" $reader] } | |
103 | "~@" { return [symbol_shortcut "splice-unquote" $reader] } | |
104 | "^" { | |
105 | $reader next | |
106 | set meta [read_form $reader] | |
107 | return [list_new [list [symbol_new "with-meta"] [read_form $reader] $meta]] | |
108 | } | |
109 | "@" { return [symbol_shortcut "deref" $reader] } | |
110 | "(" { return [read_list $reader] } | |
111 | ")" { error "unexpected ')'" } | |
112 | "\[" { return [read_vector $reader] } | |
113 | "\]" { error "unexpected '\]'" } | |
114 | "\{" { return [read_hashmap $reader] } | |
115 | "\}" { error "unexpected '\}'" } | |
116 | default { return [read_atom $reader] } | |
117 | } | |
118 | } | |
119 | ||
120 | proc read_str str { | |
121 | set tokens [tokenize $str] | |
122 | set reader [Reader new $tokens] | |
123 | set res [read_form $reader] | |
124 | $reader destroy | |
125 | return $res | |
126 | } |