Merge pull request #319 from chr15m/refactor-php-web-runner
[jackhill/mal.git] / matlab / reader.m
1 % this is just being used as a namespace
2 classdef reader
3 methods (Static = true)
4 function tokens = tokenize(str)
5 re = '[\s,]*(~@|[\[\]{}()''`~^@]|"(?:\\.|[^\\"])*"|;[^\n]*|[^\s\[\]{}(''"`,;)]*)';
6 % extract the capture group (to ignore spaces and commas)
7 tokens = cellfun(@(x) x(1), regexp(str, re, 'tokens'));
8 comments = cellfun(@(x) length(x) > 0 && x(1) == ';', tokens);
9 tokens = tokens(~comments);
10 end
11
12 function atm = read_atom(rdr)
13 token = rdr.next();
14 %fprintf('in read_atom: %s\n', token);
15 if not(isempty(regexp(token, '^-?[0-9]+$', 'match')))
16 atm = str2double(token);
17 elseif strcmp(token(1), '"')
18 atm = token(2:length(token)-1);
19 atm = strrep(atm, '\\', char(255));
20 atm = strrep(atm, '\"', '"');
21 atm = strrep(atm, '\n', char(10));
22 atm = strrep(atm, char(255), '\');
23 elseif strcmp(token(1), ':')
24 s = token(2:end);
25 atm = type_utils.keyword(s);
26 elseif strcmp(token, 'nil')
27 atm = type_utils.nil;
28 elseif strcmp(token, 'true')
29 atm = true;
30 elseif strcmp(token, 'false')
31 atm = false;
32 else
33 atm = types.Symbol(token);
34 end
35 end
36
37 function seq = read_seq(rdr, start, last)
38 %fprintf('in read_seq\n');
39 seq = {};
40 token = rdr.next();
41 if not(strcmp(token, start))
42 error(sprintf('expected ''%s''', start));
43 end
44 token = rdr.peek();
45 while true
46 if eq(token, false)
47 error(sprintf('expected ''%s''', last));
48 end
49 if strcmp(token, last), break, end
50 seq{end+1} = reader.read_form(rdr);
51 token = rdr.peek();
52 end
53 rdr.next();
54 end
55
56 function lst = read_list(rdr)
57 seq = reader.read_seq(rdr, '(', ')');
58 lst = types.List(seq{:});
59 end
60
61 function vec = read_vector(rdr)
62 seq = reader.read_seq(rdr, '[', ']');
63 vec = types.Vector(seq{:});
64 end
65
66 function map = read_hash_map(rdr)
67 seq = reader.read_seq(rdr, '{', '}');
68 map = types.HashMap(seq{:});
69 end
70
71 function ast = read_form(rdr)
72 %fprintf('in read_form\n');
73 token = rdr.peek();
74 switch token
75 case ''''
76 rdr.next();
77 ast = types.List(types.Symbol('quote'), ...
78 reader.read_form(rdr));
79 case '`'
80 rdr.next();
81 ast = types.List(types.Symbol('quasiquote'), ...
82 reader.read_form(rdr));
83 case '~'
84 rdr.next();
85 ast = types.List(types.Symbol('unquote'), ...
86 reader.read_form(rdr));
87 case '~@'
88 rdr.next();
89 ast = types.List(types.Symbol('splice-unquote'), ...
90 reader.read_form(rdr));
91 case '^'
92 rdr.next();
93 meta = reader.read_form(rdr);
94 ast = types.List(types.Symbol('with-meta'), ...
95 reader.read_form(rdr), meta);
96 case '@'
97 rdr.next();
98 ast = types.List(types.Symbol('deref'), ...
99 reader.read_form(rdr));
100
101 case ')'
102 error('unexpected '')''');
103 case '('
104 ast = reader.read_list(rdr);
105 case ']'
106 error('unexpected '']''');
107 case '['
108 ast = reader.read_vector(rdr);
109 case '}'
110 error('unexpected ''}''');
111 case '{'
112 ast = reader.read_hash_map(rdr);
113 otherwise
114 ast = reader.read_atom(rdr);
115 end
116 end
117
118 function ast = read_str(str)
119 %fprintf('in read_str\n');
120 tokens = reader.tokenize(str);
121 %disp(tokens);
122 rdr = types.Reader(tokens);
123 ast = reader.read_form(rdr);
124 end
125 end
126 end