Merge pull request #406 from chr15m/lib-alias-hacks
[jackhill/mal.git] / matlab / reader.m
index 7053aef..4fba3ed 100644 (file)
@@ -2,9 +2,11 @@
 classdef reader
     methods (Static = true)
         function tokens = tokenize(str)
-            re = '[\s,]*(~@|[\[\]{}()''`~^@]|"(?:\\.|[^\\"])*"|;.*|[^\s\[\]{}(''"`,;)]*)';
+            re = '[\s,]*(~@|[\[\]{}()''`~^@]|"(?:\\.|[^\\"])*"?|;[^\n]*|[^\s\[\]{}(''"`,;)]*)';
             % extract the capture group (to ignore spaces and commas)
             tokens = cellfun(@(x) x(1), regexp(str, re, 'tokens'));
+            comments = cellfun(@(x) length(x) > 0 && x(1) == ';', tokens);
+            tokens = tokens(~comments);
         end
       
         function atm = read_atom(rdr)
@@ -12,12 +14,19 @@ classdef reader
             %fprintf('in read_atom: %s\n', token);
             if not(isempty(regexp(token, '^-?[0-9]+$', 'match')))
                 atm = str2double(token);
-            elseif strcmp(token(1), '"')
+            elseif not(isempty(regexp(token, '^"(?:\\.|[^\\"])*"$', 'match')))
                 atm = token(2:length(token)-1);
+                atm = strrep(atm, '\\', char(255));
                 atm = strrep(atm, '\"', '"');
                 atm = strrep(atm, '\n', char(10));
+                atm = strrep(atm, char(255), '\');
+            elseif strcmp(token(1), '"')
+                error('expected ''"'', got EOF');
+            elseif strcmp(token(1), ':')
+                s = token(2:end);
+                atm = type_utils.keyword(s);
             elseif strcmp(token, 'nil')
-                atm = types.nil;
+                atm = type_utils.nil;
             elseif strcmp(token, 'true')
                 atm = true;
             elseif strcmp(token, 'false')
@@ -27,33 +36,82 @@ classdef reader
             end
         end
 
-        function lst = read_list(rdr)
-            %fprintf('in read_list\n');
-            lst = {};
+        function seq = read_seq(rdr, start, last)
+            %fprintf('in read_seq\n');
+            seq = {};
             token = rdr.next();
-            if not(strcmp(token, '('))
-                error('expected ''(''');
+            if not(strcmp(token, start))
+                error(sprintf('expected ''%s'', got EOF', start));
             end
             token = rdr.peek();
             while true
                 if eq(token, false)
-                    error('expected '')''');
+                    error(sprintf('expected ''%s'', got EOF', last));
                 end
-                if strcmp(token, ')'), break, end
-                lst{length(lst)+1} = reader.read_form(rdr);
+                if strcmp(token, last), break, end
+                seq{end+1} = reader.read_form(rdr);
                 token = rdr.peek();
             end
             rdr.next();
         end
 
+        function lst = read_list(rdr)
+            seq = reader.read_seq(rdr, '(', ')');
+            lst = types.List(seq{:});
+        end
+
+        function vec = read_vector(rdr)
+            seq = reader.read_seq(rdr, '[', ']');
+            vec = types.Vector(seq{:});
+        end
+
+        function map = read_hash_map(rdr)
+            seq = reader.read_seq(rdr, '{', '}');
+            map = types.HashMap(seq{:});
+        end
+
         function ast = read_form(rdr)
             %fprintf('in read_form\n');
             token = rdr.peek();
-            switch token(1)
+            switch token
+            case ''''
+                rdr.next();
+                ast = types.List(types.Symbol('quote'), ...
+                                 reader.read_form(rdr));
+            case '`'
+                rdr.next();
+                ast = types.List(types.Symbol('quasiquote'), ...
+                                 reader.read_form(rdr));
+            case '~'
+                rdr.next();
+                ast = types.List(types.Symbol('unquote'), ...
+                                 reader.read_form(rdr));
+            case '~@'
+                rdr.next();
+                ast = types.List(types.Symbol('splice-unquote'), ...
+                                 reader.read_form(rdr));
+            case '^'
+                rdr.next();
+                meta = reader.read_form(rdr);
+                ast = types.List(types.Symbol('with-meta'), ...
+                                 reader.read_form(rdr), meta);
+            case '@'
+                rdr.next();
+                ast = types.List(types.Symbol('deref'), ...
+                                 reader.read_form(rdr));
+
             case ')'
                 error('unexpected '')''');
             case '('
                 ast = reader.read_list(rdr);
+            case ']'
+                error('unexpected '']''');
+            case '['
+                ast = reader.read_vector(rdr);
+            case '}'
+                error('unexpected ''}''');
+            case '{'
+                ast = reader.read_hash_map(rdr);
             otherwise
                 ast = reader.read_atom(rdr);
             end
@@ -62,7 +120,8 @@ classdef reader
         function ast = read_str(str)
             %fprintf('in read_str\n');
             tokens = reader.tokenize(str);
-            rdr = Reader(tokens);
+            %disp(tokens);
+            rdr = types.Reader(tokens);
             ast = reader.read_form(rdr);
         end
     end