classdef reader
methods (Static = true)
function tokens = tokenize(str)
- re = '[\s,]*(~@|[\[\]{}()''`~^@]|"(?:\\.|[^\\"])*"|;.*|[^\s\[\]{}(''"`,;)]*)';
+ re = '[\s,]*(~@|[\[\]{}()''`~^@]|"(?:\\.|[^\\"])*"?|;[^\n]*|[^\s\[\]{}(''"`,;)]*)';
% extract the capture group (to ignore spaces and commas)
tokens = cellfun(@(x) x(1), regexp(str, re, 'tokens'));
+ comments = cellfun(@(x) length(x) > 0 && x(1) == ';', tokens);
+ tokens = tokens(~comments);
end
function atm = read_atom(rdr)
%fprintf('in read_atom: %s\n', token);
if not(isempty(regexp(token, '^-?[0-9]+$', 'match')))
atm = str2double(token);
- elseif strcmp(token(1), '"')
+ elseif not(isempty(regexp(token, '^"(?:\\.|[^\\"])*"$', 'match')))
atm = token(2:length(token)-1);
+ atm = strrep(atm, '\\', char(255));
atm = strrep(atm, '\"', '"');
atm = strrep(atm, '\n', char(10));
+ atm = strrep(atm, char(255), '\');
+ elseif strcmp(token(1), '"')
+ error('expected ''"'', got EOF');
+ elseif strcmp(token(1), ':')
+ s = token(2:end);
+ atm = type_utils.keyword(s);
elseif strcmp(token, 'nil')
- atm = types.nil;
+ atm = type_utils.nil;
elseif strcmp(token, 'true')
atm = true;
elseif strcmp(token, 'false')
end
end
- function lst = read_list(rdr)
- %fprintf('in read_list\n');
- lst = {};
+ function seq = read_seq(rdr, start, last)
+ %fprintf('in read_seq\n');
+ seq = {};
token = rdr.next();
- if not(strcmp(token, '('))
- error('expected ''(''');
+ if not(strcmp(token, start))
+ error(sprintf('expected ''%s'', got EOF', start));
end
token = rdr.peek();
while true
if eq(token, false)
- error('expected '')''');
+ error(sprintf('expected ''%s'', got EOF', last));
end
- if strcmp(token, ')'), break, end
- lst{length(lst)+1} = reader.read_form(rdr);
+ if strcmp(token, last), break, end
+ seq{end+1} = reader.read_form(rdr);
token = rdr.peek();
end
rdr.next();
end
+ function lst = read_list(rdr)
+ seq = reader.read_seq(rdr, '(', ')');
+ lst = types.List(seq{:});
+ end
+
+ function vec = read_vector(rdr)
+ seq = reader.read_seq(rdr, '[', ']');
+ vec = types.Vector(seq{:});
+ end
+
+ function map = read_hash_map(rdr)
+ seq = reader.read_seq(rdr, '{', '}');
+ map = types.HashMap(seq{:});
+ end
+
function ast = read_form(rdr)
%fprintf('in read_form\n');
token = rdr.peek();
- switch token(1)
+ switch token
+ case ''''
+ rdr.next();
+ ast = types.List(types.Symbol('quote'), ...
+ reader.read_form(rdr));
+ case '`'
+ rdr.next();
+ ast = types.List(types.Symbol('quasiquote'), ...
+ reader.read_form(rdr));
+ case '~'
+ rdr.next();
+ ast = types.List(types.Symbol('unquote'), ...
+ reader.read_form(rdr));
+ case '~@'
+ rdr.next();
+ ast = types.List(types.Symbol('splice-unquote'), ...
+ reader.read_form(rdr));
+ case '^'
+ rdr.next();
+ meta = reader.read_form(rdr);
+ ast = types.List(types.Symbol('with-meta'), ...
+ reader.read_form(rdr), meta);
+ case '@'
+ rdr.next();
+ ast = types.List(types.Symbol('deref'), ...
+ reader.read_form(rdr));
+
case ')'
error('unexpected '')''');
case '('
ast = reader.read_list(rdr);
+ case ']'
+ error('unexpected '']''');
+ case '['
+ ast = reader.read_vector(rdr);
+ case '}'
+ error('unexpected ''}''');
+ case '{'
+ ast = reader.read_hash_map(rdr);
otherwise
ast = reader.read_atom(rdr);
end
function ast = read_str(str)
%fprintf('in read_str\n');
tokens = reader.tokenize(str);
- rdr = Reader(tokens);
+ %disp(tokens);
+ rdr = types.Reader(tokens);
ast = reader.read_form(rdr);
end
end