Refactor to use run scripts, remove *_RUNSTEP
[jackhill/mal.git] / plpgsql / reader.sql
1 -- ---------------------------------------------------------
2 -- reader.sql
3
4 CREATE SCHEMA reader;
5
6 CREATE FUNCTION reader.tokenize(str varchar) RETURNS varchar[] AS $$
7 DECLARE
8 re varchar = E'[[:space:] ,]*(~@|[\\[\\]{}()\'`~@]|"(?:[\\\\].|[^\\\\"])*"|;[^\n]*|[^\\s \\[\\]{}()\'"`~@,;]*)';
9 BEGIN
10 RETURN ARRAY(SELECT tok FROM
11 (SELECT (regexp_matches(str, re, 'g'))[1] AS tok) AS x
12 WHERE tok <> '' AND tok NOT LIKE ';%');
13 END; $$ LANGUAGE plpgsql IMMUTABLE;
14
15 -- read_atom:
16 -- takes a tokens array and position
17 -- returns new position and value_id
18 CREATE FUNCTION reader.read_atom(tokens varchar[],
19 INOUT pos integer, OUT result integer) AS $$
20 DECLARE
21 str_id integer;
22 str varchar;
23 token varchar;
24 BEGIN
25 token := tokens[pos];
26 pos := pos + 1;
27 -- RAISE NOTICE 'read_atom: %', token;
28 IF token = 'nil' THEN -- nil
29 result := 0;
30 ELSIF token = 'false' THEN -- false
31 result := 1;
32 ELSIF token = 'true' THEN -- true
33 result := 2;
34 ELSIF token ~ '^-?[0-9][0-9]*$' THEN -- integer
35 -- integer
36 INSERT INTO types.value (type_id, val_int)
37 VALUES (3, CAST(token AS integer))
38 RETURNING value_id INTO result;
39 ELSIF token ~ '^".*"' THEN -- string
40 -- string
41 str := substring(token FROM 2 FOR (char_length(token)-2));
42 str := replace(str, '\"', '"');
43 str := replace(str, '\n', E'\n');
44 str := replace(str, '\\', E'\\');
45 result := types._stringv(str);
46 ELSIF token ~ '^:.*' THEN -- keyword
47 -- keyword
48 result := types._keywordv(substring(token FROM 2 FOR (char_length(token)-1)));
49 ELSE
50 -- symbol
51 result := types._symbolv(token);
52 END IF;
53 END; $$ LANGUAGE plpgsql;
54
55 -- read_seq:
56 -- takes a tokens array, type (8, 9, 10), first and last characters
57 -- and position
58 -- returns new position and value_id for a list (8), vector (9) or
59 -- hash-map (10)
60 CREATE FUNCTION reader.read_seq(tokens varchar[], first varchar, last varchar,
61 INOUT p integer, OUT items integer[]) AS $$
62 DECLARE
63 token varchar;
64 key varchar = NULL;
65 item_id integer;
66 BEGIN
67 token := tokens[p];
68 p := p + 1;
69 IF token <> first THEN
70 RAISE EXCEPTION 'expected ''%''', first;
71 END IF;
72 items := ARRAY[]::integer[];
73 LOOP
74 IF p > array_length(tokens, 1) THEN
75 RAISE EXCEPTION 'expected ''%''', last;
76 END IF;
77 token := tokens[p];
78 IF token = last THEN EXIT; END IF;
79 SELECT * FROM reader.read_form(tokens, p) INTO p, item_id;
80 items := array_append(items, item_id);
81 END LOOP;
82
83 p := p + 1;
84 END; $$ LANGUAGE plpgsql;
85
86 -- read_form:
87 -- takes a tokens array and position
88 -- returns new position and value_id
89 CREATE FUNCTION reader.read_form(tokens varchar[],
90 INOUT pos integer, OUT result integer) AS $$
91 DECLARE
92 vid integer;
93 meta integer;
94 token varchar;
95 BEGIN
96 token := tokens[pos]; -- peek
97 CASE
98 WHEN token = '''' THEN
99 BEGIN
100 pos := pos + 1;
101 SELECT * FROM reader.read_form(tokens, pos) INTO pos, vid;
102 result := types._list(ARRAY[types._symbolv('quote'), vid]);
103 END;
104 WHEN token = '`' THEN
105 BEGIN
106 pos := pos + 1;
107 SELECT * FROM reader.read_form(tokens, pos) INTO pos, vid;
108 result := types._list(ARRAY[types._symbolv('quasiquote'), vid]);
109 END;
110 WHEN token = '~' THEN
111 BEGIN
112 pos := pos + 1;
113 SELECT * FROM reader.read_form(tokens, pos) INTO pos, vid;
114 result := types._list(ARRAY[types._symbolv('unquote'), vid]);
115 END;
116 WHEN token = '~@' THEN
117 BEGIN
118 pos := pos + 1;
119 SELECT * FROM reader.read_form(tokens, pos) INTO pos, vid;
120 result := types._list(ARRAY[types._symbolv('splice-unquote'), vid]);
121 END;
122 WHEN token = '^' THEN
123 BEGIN
124 pos := pos + 1;
125 SELECT * FROM reader.read_form(tokens, pos) INTO pos, meta;
126 SELECT * FROM reader.read_form(tokens, pos) INTO pos, vid;
127 result := types._list(ARRAY[types._symbolv('with-meta'), vid, meta]);
128 END;
129 WHEN token = '@' THEN
130 BEGIN
131 pos := pos + 1;
132 SELECT * FROM reader.read_form(tokens, pos) INTO pos, vid;
133 result := types._list(ARRAY[types._symbolv('deref'), vid]);
134 END;
135
136 -- list
137 WHEN token = ')' THEN
138 RAISE EXCEPTION 'unexpected '')''';
139 WHEN token = '(' THEN
140 BEGIN
141 SELECT p, types._list(items)
142 FROM reader.read_seq(tokens, '(', ')', pos) INTO pos, result;
143 END;
144
145 -- vector
146 WHEN token = ']' THEN
147 RAISE EXCEPTION 'unexpected '']''';
148 WHEN token = '[' THEN
149 BEGIN
150 SELECT p, types._vector(items)
151 FROM reader.read_seq(tokens, '[', ']', pos) INTO pos, result;
152 END;
153
154 -- hash-map
155 WHEN token = '}' THEN
156 RAISE EXCEPTION 'unexpected ''}''';
157 WHEN token = '{' THEN
158 BEGIN
159 SELECT p, types._hash_map(items)
160 FROM reader.read_seq(tokens, '{', '}', pos) INTO pos, result;
161 END;
162
163 --
164 ELSE
165 SELECT * FROM reader.read_atom(tokens, pos) INTO pos, result;
166 END CASE;
167 END; $$ LANGUAGE plpgsql;
168
169 -- read_str:
170 -- takes a string
171 -- returns a new value_id
172 CREATE FUNCTION reader.read_str(str varchar) RETURNS integer AS $$
173 DECLARE
174 tokens varchar[];
175 pos integer;
176 ast integer;
177 BEGIN
178 tokens := reader.tokenize(str);
179 -- RAISE NOTICE 'read_str first: %', tokens[1];
180 pos := 1;
181 SELECT * FROM reader.read_form(tokens, pos) INTO pos, ast;
182 -- RAISE NOTICE 'pos after read_atom: %', pos;
183 RETURN ast;
184 END; $$ LANGUAGE plpgsql;
185