plpgsql, scala, vimscript, yorick: Detect more unterminated strings.
[jackhill/mal.git] / plpgsql / reader.sql
1 -- ---------------------------------------------------------
2 -- reader.sql
3
4 CREATE SCHEMA reader;
5
6 CREATE FUNCTION reader.tokenize(str varchar) RETURNS varchar[] AS $$
7 DECLARE
8 re varchar = E'[[:space:] ,]*(~@|[\\[\\]{}()\'`~@]|"(?:[\\\\].|[^\\\\"])*"?|;[^\n]*|[^\\s \\[\\]{}()\'"`~@,;]*)';
9 BEGIN
10 RETURN ARRAY(SELECT tok FROM
11 (SELECT (regexp_matches(str, re, 'g'))[1] AS tok) AS x
12 WHERE tok <> '' AND tok NOT LIKE ';%');
13 END; $$ LANGUAGE plpgsql IMMUTABLE;
14
15 -- read_atom:
16 -- takes a tokens array and position
17 -- returns new position and value_id
18 CREATE FUNCTION reader.read_atom(tokens varchar[],
19 INOUT pos integer, OUT result integer) AS $$
20 DECLARE
21 str_id integer;
22 str varchar;
23 token varchar;
24 BEGIN
25 token := tokens[pos];
26 pos := pos + 1;
27 -- RAISE NOTICE 'read_atom: %', token;
28 IF token = 'nil' THEN -- nil
29 result := 0;
30 ELSIF token = 'false' THEN -- false
31 result := 1;
32 ELSIF token = 'true' THEN -- true
33 result := 2;
34 ELSIF token ~ '^-?[0-9][0-9]*$' THEN -- integer
35 -- integer
36 INSERT INTO types.value (type_id, val_int)
37 VALUES (3, CAST(token AS integer))
38 RETURNING value_id INTO result;
39 ELSIF token ~ '^"(?:[\\\\].|[^\\\\"])*"' THEN -- string
40 -- string
41 str := substring(token FROM 2 FOR (char_length(token)-2));
42 str := replace(str, '\\', chr(CAST(x'7f' AS integer)));
43 str := replace(str, '\"', '"');
44 str := replace(str, '\n', E'\n');
45 str := replace(str, chr(CAST(x'7f' AS integer)), E'\\');
46 result := types._stringv(str);
47 ELSIF token ~ '^".*' THEN -- unclosed string
48 RAISE EXCEPTION 'expected ''"'', got EOF';
49 ELSIF token ~ '^:.*' THEN -- keyword
50 -- keyword
51 result := types._keywordv(substring(token FROM 2 FOR (char_length(token)-1)));
52 ELSE
53 -- symbol
54 result := types._symbolv(token);
55 END IF;
56 END; $$ LANGUAGE plpgsql;
57
58 -- read_seq:
59 -- takes a tokens array, type (8, 9, 10), first and last characters
60 -- and position
61 -- returns new position and value_id for a list (8), vector (9) or
62 -- hash-map (10)
63 CREATE FUNCTION reader.read_seq(tokens varchar[], first varchar, last varchar,
64 INOUT p integer, OUT items integer[]) AS $$
65 DECLARE
66 token varchar;
67 key varchar = NULL;
68 item_id integer;
69 BEGIN
70 token := tokens[p];
71 p := p + 1;
72 IF token <> first THEN
73 RAISE EXCEPTION 'expected ''%'', got EOF', first;
74 END IF;
75 items := ARRAY[]::integer[];
76 LOOP
77 IF p > array_length(tokens, 1) THEN
78 RAISE EXCEPTION 'expected ''%'', got EOF', last;
79 END IF;
80 token := tokens[p];
81 IF token = last THEN EXIT; END IF;
82 SELECT * FROM reader.read_form(tokens, p) INTO p, item_id;
83 items := array_append(items, item_id);
84 END LOOP;
85
86 p := p + 1;
87 END; $$ LANGUAGE plpgsql;
88
89 -- read_form:
90 -- takes a tokens array and position
91 -- returns new position and value_id
92 CREATE FUNCTION reader.read_form(tokens varchar[],
93 INOUT pos integer, OUT result integer) AS $$
94 DECLARE
95 vid integer;
96 meta integer;
97 token varchar;
98 BEGIN
99 token := tokens[pos]; -- peek
100 CASE
101 WHEN token = '''' THEN
102 BEGIN
103 pos := pos + 1;
104 SELECT * FROM reader.read_form(tokens, pos) INTO pos, vid;
105 result := types._list(ARRAY[types._symbolv('quote'), vid]);
106 END;
107 WHEN token = '`' THEN
108 BEGIN
109 pos := pos + 1;
110 SELECT * FROM reader.read_form(tokens, pos) INTO pos, vid;
111 result := types._list(ARRAY[types._symbolv('quasiquote'), vid]);
112 END;
113 WHEN token = '~' THEN
114 BEGIN
115 pos := pos + 1;
116 SELECT * FROM reader.read_form(tokens, pos) INTO pos, vid;
117 result := types._list(ARRAY[types._symbolv('unquote'), vid]);
118 END;
119 WHEN token = '~@' THEN
120 BEGIN
121 pos := pos + 1;
122 SELECT * FROM reader.read_form(tokens, pos) INTO pos, vid;
123 result := types._list(ARRAY[types._symbolv('splice-unquote'), vid]);
124 END;
125 WHEN token = '^' THEN
126 BEGIN
127 pos := pos + 1;
128 SELECT * FROM reader.read_form(tokens, pos) INTO pos, meta;
129 SELECT * FROM reader.read_form(tokens, pos) INTO pos, vid;
130 result := types._list(ARRAY[types._symbolv('with-meta'), vid, meta]);
131 END;
132 WHEN token = '@' THEN
133 BEGIN
134 pos := pos + 1;
135 SELECT * FROM reader.read_form(tokens, pos) INTO pos, vid;
136 result := types._list(ARRAY[types._symbolv('deref'), vid]);
137 END;
138
139 -- list
140 WHEN token = ')' THEN
141 RAISE EXCEPTION 'unexpected '')''';
142 WHEN token = '(' THEN
143 BEGIN
144 SELECT p, types._list(items)
145 FROM reader.read_seq(tokens, '(', ')', pos) INTO pos, result;
146 END;
147
148 -- vector
149 WHEN token = ']' THEN
150 RAISE EXCEPTION 'unexpected '']''';
151 WHEN token = '[' THEN
152 BEGIN
153 SELECT p, types._vector(items)
154 FROM reader.read_seq(tokens, '[', ']', pos) INTO pos, result;
155 END;
156
157 -- hash-map
158 WHEN token = '}' THEN
159 RAISE EXCEPTION 'unexpected ''}''';
160 WHEN token = '{' THEN
161 BEGIN
162 SELECT p, types._hash_map(items)
163 FROM reader.read_seq(tokens, '{', '}', pos) INTO pos, result;
164 END;
165
166 --
167 ELSE
168 SELECT * FROM reader.read_atom(tokens, pos) INTO pos, result;
169 END CASE;
170 END; $$ LANGUAGE plpgsql;
171
172 -- read_str:
173 -- takes a string
174 -- returns a new value_id
175 CREATE FUNCTION reader.read_str(str varchar) RETURNS integer AS $$
176 DECLARE
177 tokens varchar[];
178 pos integer;
179 ast integer;
180 BEGIN
181 tokens := reader.tokenize(str);
182 -- RAISE NOTICE 'read_str first: %', tokens[1];
183 pos := 1;
184 SELECT * FROM reader.read_form(tokens, pos) INTO pos, ast;
185 -- RAISE NOTICE 'pos after read_atom: %', pos;
186 RETURN ast;
187 END; $$ LANGUAGE plpgsql;
188