Commit | Line | Data |
---|---|---|
31690700 JM |
1 | <?php |
2 | ||
3 | require_once 'types.php'; | |
4 | ||
5 | class Reader { | |
6 | protected $tokens = array(); | |
7 | protected $position = 0; | |
8 | public function __construct($tokens) { | |
9 | $this->tokens = $tokens; | |
10 | $this->position = 0; | |
11 | } | |
12 | public function next() { | |
ea81a808 | 13 | if ($this->position >= count($this->tokens)) { return null; } |
31690700 JM |
14 | return $this->tokens[$this->position++]; |
15 | } | |
16 | public function peek() { | |
ea81a808 | 17 | if ($this->position >= count($this->tokens)) { return null; } |
31690700 JM |
18 | return $this->tokens[$this->position]; |
19 | } | |
20 | } | |
21 | ||
22 | class BlankException extends Exception { | |
23 | } | |
24 | ||
25 | function _real_token($s) { | |
26 | return $s !== '' && $s[0] !== ';'; | |
27 | } | |
28 | ||
29 | function tokenize($str) { | |
30 | $pat = "/[\s,]*(~@|[\[\]{}()'`~^@]|\"(?:\\\\.|[^\\\\\"])*\"|;.*|[^\s\[\]{}('\"`,;)]*)/"; | |
31 | preg_match_all($pat, $str, $matches); | |
32 | return array_values(array_filter($matches[1], '_real_token')); | |
33 | } | |
34 | ||
35 | function read_atom($reader) { | |
36 | $token = $reader->next(); | |
37 | if (preg_match("/^-?[0-9]+$/", $token)) { | |
38 | return intval($token, 10); | |
39 | } elseif ($token[0] === "\"") { | |
40 | $str = substr($token, 1, -1); | |
41 | $str = preg_replace('/\\\\"/', '"', $str); | |
8d78bc26 JM |
42 | $str = preg_replace('/\\\\n/', "\n", $str); |
43 | $str = preg_replace('/\\\\\\\\/', "\\", $str); | |
31690700 | 44 | return $str; |
b8ee29b2 JM |
45 | } elseif ($token[0] === ":") { |
46 | return _keyword(substr($token,1)); | |
31690700 JM |
47 | } elseif ($token === "nil") { |
48 | return NULL; | |
49 | } elseif ($token === "true") { | |
50 | return true; | |
51 | } elseif ($token === "false") { | |
52 | return false; | |
53 | } else { | |
ea81a808 | 54 | return _symbol($token); |
31690700 JM |
55 | } |
56 | } | |
57 | ||
ea81a808 | 58 | function read_list($reader, $constr='_list', $start='(', $end=')') { |
31690700 JM |
59 | $ast = $constr(); |
60 | $token = $reader->next(); | |
61 | if ($token !== $start) { | |
62 | throw new Exception("expected '" . $start . "'"); | |
63 | } | |
64 | while (($token = $reader->peek()) !== $end) { | |
ea81a808 | 65 | if ($token === "" || $token === null) { |
31690700 JM |
66 | throw new Exception("expected '" . $end . "', got EOF"); |
67 | } | |
68 | $ast[] = read_form($reader); | |
69 | } | |
70 | $reader->next(); | |
71 | return $ast; | |
72 | } | |
73 | ||
74 | function read_hash_map($reader) { | |
ea81a808 JM |
75 | $lst = read_list($reader, '_list', '{', '}'); |
76 | return call_user_func_array('_hash_map', $lst->getArrayCopy()); | |
31690700 JM |
77 | } |
78 | ||
79 | function read_form($reader) { | |
80 | $token = $reader->peek(); | |
81 | switch ($token) { | |
82 | case '\'': $reader->next(); | |
ea81a808 | 83 | return _list(_symbol('quote'), |
31690700 JM |
84 | read_form($reader)); |
85 | case '`': $reader->next(); | |
ea81a808 | 86 | return _list(_symbol('quasiquote'), |
31690700 JM |
87 | read_form($reader)); |
88 | case '~': $reader->next(); | |
ea81a808 | 89 | return _list(_symbol('unquote'), |
31690700 JM |
90 | read_form($reader)); |
91 | case '~@': $reader->next(); | |
ea81a808 | 92 | return _list(_symbol('splice-unquote'), |
31690700 JM |
93 | read_form($reader)); |
94 | case '^': $reader->next(); | |
95 | $meta = read_form($reader); | |
ea81a808 | 96 | return _list(_symbol('with-meta'), |
31690700 JM |
97 | read_form($reader), |
98 | $meta); | |
99 | ||
100 | case '@': $reader->next(); | |
ea81a808 | 101 | return _list(_symbol('deref'), |
31690700 JM |
102 | read_form($reader)); |
103 | ||
104 | case ')': throw new Exception("unexpected ')'"); | |
105 | case '(': return read_list($reader); | |
106 | case ']': throw new Exception("unexpected ']'"); | |
ea81a808 | 107 | case '[': return read_list($reader, '_vector', '[', ']'); |
31690700 JM |
108 | case '}': throw new Exception("unexpected '}'"); |
109 | case '{': return read_hash_map($reader); | |
110 | ||
111 | default: return read_atom($reader); | |
112 | } | |
113 | } | |
114 | ||
115 | function read_str($str) { | |
116 | $tokens = tokenize($str); | |
117 | if (count($tokens) === 0) { throw new BlankException(); } | |
118 | return read_form(new Reader($tokens)); | |
119 | } | |
120 | ||
121 | ?> |