Commit | Line | Data |
---|---|---|
adc5b4fb JM |
1 | -- --------------------------------------------------------- |
2 | -- reader.sql | |
3 | ||
494792ab JM |
4 | CREATE SCHEMA reader; |
5 | ||
6 | CREATE FUNCTION reader.tokenize(str varchar) RETURNS varchar[] AS $$ | |
adc5b4fb | 7 | DECLARE |
5340418b | 8 | re varchar = E'[[:space:] ,]*(~@|[\\[\\]{}()\'`~@]|"(?:[\\\\].|[^\\\\"])*"|;[^\n]*|[^\\s \\[\\]{}()\'"`~@,;]*)'; |
adc5b4fb JM |
9 | BEGIN |
10 | RETURN ARRAY(SELECT tok FROM | |
11 | (SELECT (regexp_matches(str, re, 'g'))[1] AS tok) AS x | |
12 | WHERE tok <> '' AND tok NOT LIKE ';%'); | |
4facce82 | 13 | END; $$ LANGUAGE plpgsql IMMUTABLE; |
adc5b4fb JM |
14 | |
15 | -- read_atom: | |
16 | -- takes a tokens array and position | |
17 | -- returns new position and value_id | |
494792ab JM |
18 | CREATE FUNCTION reader.read_atom(tokens varchar[], |
19 | INOUT pos integer, OUT result integer) AS $$ | |
adc5b4fb JM |
20 | DECLARE |
21 | str_id integer; | |
5340418b | 22 | str varchar; |
adc5b4fb JM |
23 | token varchar; |
24 | BEGIN | |
25 | token := tokens[pos]; | |
26 | pos := pos + 1; | |
27 | -- RAISE NOTICE 'read_atom: %', token; | |
53105a77 | 28 | IF token = 'nil' THEN -- nil |
adc5b4fb | 29 | result := 0; |
53105a77 | 30 | ELSIF token = 'false' THEN -- false |
adc5b4fb | 31 | result := 1; |
53105a77 | 32 | ELSIF token = 'true' THEN -- true |
adc5b4fb | 33 | result := 2; |
53105a77 | 34 | ELSIF token ~ '^-?[0-9][0-9]*$' THEN -- integer |
adc5b4fb | 35 | -- integer |
494792ab | 36 | INSERT INTO types.value (type_id, val_int) |
adc5b4fb JM |
37 | VALUES (3, CAST(token AS integer)) |
38 | RETURNING value_id INTO result; | |
53105a77 | 39 | ELSIF token ~ '^".*"' THEN -- string |
adc5b4fb | 40 | -- string |
5340418b JM |
41 | str := substring(token FROM 2 FOR (char_length(token)-2)); |
42 | str := replace(str, '\"', '"'); | |
43 | str := replace(str, '\n', E'\n'); | |
44 | str := replace(str, '\\', E'\\'); | |
494792ab | 45 | result := types._stringv(str); |
53105a77 JM |
46 | ELSIF token ~ '^:.*' THEN -- keyword |
47 | -- keyword | |
494792ab | 48 | result := types._keywordv(substring(token FROM 2 FOR (char_length(token)-1))); |
adc5b4fb JM |
49 | ELSE |
50 | -- symbol | |
494792ab | 51 | result := types._symbolv(token); |
adc5b4fb JM |
52 | END IF; |
53 | END; $$ LANGUAGE plpgsql; | |
54 | ||
97c0256d | 55 | -- read_seq: |
53105a77 JM |
56 | -- takes a tokens array, type (8, 9, 10), first and last characters |
57 | -- and position | |
58 | -- returns new position and value_id for a list (8), vector (9) or | |
59 | -- hash-map (10) | |
494792ab JM |
60 | CREATE FUNCTION reader.read_seq(tokens varchar[], first varchar, last varchar, |
61 | INOUT p integer, OUT items integer[]) AS $$ | |
adc5b4fb | 62 | DECLARE |
adc5b4fb | 63 | token varchar; |
53105a77 | 64 | key varchar = NULL; |
adc5b4fb JM |
65 | item_id integer; |
66 | BEGIN | |
97c0256d JM |
67 | token := tokens[p]; |
68 | p := p + 1; | |
adc5b4fb JM |
69 | IF token <> first THEN |
70 | RAISE EXCEPTION 'expected ''%''', first; | |
71 | END IF; | |
b642c0db | 72 | items := ARRAY[]::integer[]; |
adc5b4fb | 73 | LOOP |
97c0256d | 74 | IF p > array_length(tokens, 1) THEN |
adc5b4fb JM |
75 | RAISE EXCEPTION 'expected ''%''', last; |
76 | END IF; | |
97c0256d | 77 | token := tokens[p]; |
adc5b4fb | 78 | IF token = last THEN EXIT; END IF; |
494792ab | 79 | SELECT * FROM reader.read_form(tokens, p) INTO p, item_id; |
97c0256d | 80 | items := array_append(items, item_id); |
adc5b4fb JM |
81 | END LOOP; |
82 | ||
97c0256d | 83 | p := p + 1; |
adc5b4fb JM |
84 | END; $$ LANGUAGE plpgsql; |
85 | ||
86 | -- read_form: | |
87 | -- takes a tokens array and position | |
88 | -- returns new position and value_id | |
494792ab JM |
89 | CREATE FUNCTION reader.read_form(tokens varchar[], |
90 | INOUT pos integer, OUT result integer) AS $$ | |
adc5b4fb JM |
91 | DECLARE |
92 | vid integer; | |
93 | meta integer; | |
94 | token varchar; | |
95 | BEGIN | |
96 | token := tokens[pos]; -- peek | |
97 | CASE | |
98 | WHEN token = '''' THEN | |
99 | BEGIN | |
100 | pos := pos + 1; | |
494792ab JM |
101 | SELECT * FROM reader.read_form(tokens, pos) INTO pos, vid; |
102 | result := types._list(ARRAY[types._symbolv('quote'), vid]); | |
adc5b4fb JM |
103 | END; |
104 | WHEN token = '`' THEN | |
105 | BEGIN | |
106 | pos := pos + 1; | |
494792ab JM |
107 | SELECT * FROM reader.read_form(tokens, pos) INTO pos, vid; |
108 | result := types._list(ARRAY[types._symbolv('quasiquote'), vid]); | |
adc5b4fb JM |
109 | END; |
110 | WHEN token = '~' THEN | |
111 | BEGIN | |
112 | pos := pos + 1; | |
494792ab JM |
113 | SELECT * FROM reader.read_form(tokens, pos) INTO pos, vid; |
114 | result := types._list(ARRAY[types._symbolv('unquote'), vid]); | |
adc5b4fb JM |
115 | END; |
116 | WHEN token = '~@' THEN | |
117 | BEGIN | |
118 | pos := pos + 1; | |
494792ab JM |
119 | SELECT * FROM reader.read_form(tokens, pos) INTO pos, vid; |
120 | result := types._list(ARRAY[types._symbolv('splice-unquote'), vid]); | |
adc5b4fb JM |
121 | END; |
122 | WHEN token = '^' THEN | |
123 | BEGIN | |
124 | pos := pos + 1; | |
494792ab JM |
125 | SELECT * FROM reader.read_form(tokens, pos) INTO pos, meta; |
126 | SELECT * FROM reader.read_form(tokens, pos) INTO pos, vid; | |
127 | result := types._list(ARRAY[types._symbolv('with-meta'), vid, meta]); | |
adc5b4fb JM |
128 | END; |
129 | WHEN token = '@' THEN | |
130 | BEGIN | |
131 | pos := pos + 1; | |
494792ab JM |
132 | SELECT * FROM reader.read_form(tokens, pos) INTO pos, vid; |
133 | result := types._list(ARRAY[types._symbolv('deref'), vid]); | |
adc5b4fb JM |
134 | END; |
135 | ||
136 | -- list | |
137 | WHEN token = ')' THEN | |
138 | RAISE EXCEPTION 'unexpected '')'''; | |
139 | WHEN token = '(' THEN | |
140 | BEGIN | |
494792ab JM |
141 | SELECT p, types._list(items) |
142 | FROM reader.read_seq(tokens, '(', ')', pos) INTO pos, result; | |
adc5b4fb JM |
143 | END; |
144 | ||
145 | -- vector | |
146 | WHEN token = ']' THEN | |
147 | RAISE EXCEPTION 'unexpected '']'''; | |
148 | WHEN token = '[' THEN | |
149 | BEGIN | |
494792ab JM |
150 | SELECT p, types._vector(items) |
151 | FROM reader.read_seq(tokens, '[', ']', pos) INTO pos, result; | |
53105a77 JM |
152 | END; |
153 | ||
154 | -- hash-map | |
155 | WHEN token = '}' THEN | |
156 | RAISE EXCEPTION 'unexpected ''}'''; | |
157 | WHEN token = '{' THEN | |
158 | BEGIN | |
494792ab JM |
159 | SELECT p, types._hash_map(items) |
160 | FROM reader.read_seq(tokens, '{', '}', pos) INTO pos, result; | |
adc5b4fb JM |
161 | END; |
162 | ||
163 | -- | |
164 | ELSE | |
494792ab | 165 | SELECT * FROM reader.read_atom(tokens, pos) INTO pos, result; |
adc5b4fb JM |
166 | END CASE; |
167 | END; $$ LANGUAGE plpgsql; | |
168 | ||
169 | -- read_str: | |
170 | -- takes a string | |
171 | -- returns a new value_id | |
494792ab | 172 | CREATE FUNCTION reader.read_str(str varchar) RETURNS integer AS $$ |
adc5b4fb JM |
173 | DECLARE |
174 | tokens varchar[]; | |
175 | pos integer; | |
176 | ast integer; | |
177 | BEGIN | |
494792ab | 178 | tokens := reader.tokenize(str); |
adc5b4fb JM |
179 | -- RAISE NOTICE 'read_str first: %', tokens[1]; |
180 | pos := 1; | |
494792ab | 181 | SELECT * FROM reader.read_form(tokens, pos) INTO pos, ast; |
adc5b4fb JM |
182 | -- RAISE NOTICE 'pos after read_atom: %', pos; |
183 | RETURN ast; | |
184 | END; $$ LANGUAGE plpgsql; | |
185 |