Commit | Line | Data |
---|---|---|
53beaa0a JM |
1 | using System; |
2 | using System.Collections; | |
3 | using System.Collections.Generic; | |
4 | using System.Text.RegularExpressions; | |
5 | using Mal; | |
6 | using MalVal = Mal.types.MalVal; | |
5a159ae7 | 7 | using MalSymbol = Mal.types.MalSymbol; |
53beaa0a JM |
8 | using MalList = Mal.types.MalList; |
9 | using MalVector = Mal.types.MalVector; | |
10 | using MalHashMap = Mal.types.MalHashMap; | |
11 | using MalThrowable = Mal.types.MalThrowable; | |
12 | using MalContinue = Mal.types.MalContinue; | |
13 | ||
14 | namespace Mal { | |
15 | public class reader { | |
16 | public class ParseError : MalThrowable { | |
17 | public ParseError(string msg) : base(msg) { } | |
18 | } | |
19 | ||
20 | public class Reader { | |
21 | List<string> tokens; | |
22 | int position; | |
23 | public Reader(List<string> t) { | |
24 | tokens = t; | |
25 | position = 0; | |
26 | } | |
27 | ||
28 | public string peek() { | |
29 | if (position >= tokens.Count) { | |
30 | return null; | |
31 | } else { | |
32 | return tokens[position]; | |
33 | } | |
34 | } | |
35 | public string next() { | |
36 | return tokens[position++]; | |
37 | } | |
38 | } | |
39 | ||
40 | public static List<string> tokenize(string str) { | |
41 | List<string> tokens = new List<string>(); | |
42 | string pattern = @"[\s ,]*(~@|[\[\]{}()'`~@]|""(?:[\\].|[^\\""])*""|;.*|[^\s \[\]{}()'""`~@,;]*)"; | |
43 | Regex regex = new Regex(pattern); | |
44 | foreach (Match match in regex.Matches(str)) { | |
45 | string token = match.Groups[1].Value; | |
46 | if ((token != null) && !(token == "") && !(token[0] == ';')) { | |
47 | //Console.WriteLine("match: ^" + match.Groups[1] + "$"); | |
48 | tokens.Add(token); | |
49 | } | |
50 | } | |
51 | return tokens; | |
52 | } | |
53 | ||
54 | public static MalVal read_atom(Reader rdr) { | |
55 | string token = rdr.next(); | |
afdf531e | 56 | string pattern = @"(^-?[0-9]+$)|(^-?[0-9][0-9.]*$)|(^nil$)|(^true$)|(^false$)|^("".*"")$|(^[^""]*$)"; |
53beaa0a JM |
57 | Regex regex = new Regex(pattern); |
58 | Match match = regex.Match(token); | |
59 | //Console.WriteLine("token: ^" + token + "$"); | |
60 | if (!match.Success) { | |
61 | throw new ParseError("unrecognized token '" + token + "'"); | |
62 | } | |
63 | if (match.Groups[1].Value != String.Empty) { | |
64 | return new Mal.types.MalInteger(int.Parse(match.Groups[1].Value)); | |
65 | } else if (match.Groups[3].Value != String.Empty) { | |
66 | return Mal.types.Nil; | |
67 | } else if (match.Groups[4].Value != String.Empty) { | |
68 | return Mal.types.True; | |
69 | } else if (match.Groups[5].Value != String.Empty) { | |
70 | return Mal.types.False; | |
71 | } else if (match.Groups[6].Value != String.Empty) { | |
afdf531e JM |
72 | string str = match.Groups[6].Value; |
73 | str = str.Substring(1, str.Length-2) | |
74 | .Replace("\\\"", "\"") | |
75 | .Replace("\\n", "\n"); | |
76 | return new Mal.types.MalString(str); | |
53beaa0a JM |
77 | } else if (match.Groups[7].Value != String.Empty) { |
78 | return new Mal.types.MalSymbol(match.Groups[7].Value); | |
79 | } else { | |
80 | throw new ParseError("unrecognized '" + match.Groups[0] + "'"); | |
81 | } | |
82 | } | |
83 | ||
84 | public static MalVal read_list(Reader rdr, MalList lst, char start, char end) { | |
85 | string token = rdr.next(); | |
86 | if (token[0] != start) { | |
87 | throw new ParseError("expected '" + start + "'"); | |
88 | } | |
89 | ||
90 | while ((token = rdr.peek()) != null && token[0] != end) { | |
91 | lst.conj_BANG(read_form(rdr)); | |
92 | } | |
93 | ||
94 | if (token == null) { | |
95 | throw new ParseError("expected '" + end + "', got EOF"); | |
96 | } | |
97 | rdr.next(); | |
98 | ||
99 | return lst; | |
100 | } | |
101 | ||
102 | public static MalVal read_hash_map(Reader rdr) { | |
103 | MalList lst = (MalList)read_list(rdr, new MalList(), '{', '}'); | |
104 | return new MalHashMap(lst); | |
105 | } | |
106 | ||
107 | ||
108 | public static MalVal read_form(Reader rdr) { | |
109 | string token = rdr.peek(); | |
110 | if (token == null) { throw new MalContinue(); } | |
111 | MalVal form = null; | |
112 | ||
5a159ae7 JM |
113 | switch (token) { |
114 | case "'": rdr.next(); | |
115 | return new MalList(new MalSymbol("quote"), | |
116 | read_form(rdr)); | |
117 | case "`": rdr.next(); | |
118 | return new MalList(new MalSymbol("quasiquote"), | |
119 | read_form(rdr)); | |
120 | case "~": | |
121 | rdr.next(); | |
122 | return new MalList(new MalSymbol("unquote"), | |
123 | read_form(rdr)); | |
124 | case "~@": | |
125 | rdr.next(); | |
126 | return new MalList(new MalSymbol("splice-unquote"), | |
127 | read_form(rdr)); | |
17ae845e JM |
128 | case "^": rdr.next(); |
129 | MalVal meta = read_form(rdr); | |
130 | return new MalList(new MalSymbol("with-meta"), | |
131 | read_form(rdr), | |
132 | meta); | |
133 | case "@": rdr.next(); | |
134 | return new MalList(new MalSymbol("deref"), | |
135 | read_form(rdr)); | |
5a159ae7 JM |
136 | |
137 | case "(": form = read_list(rdr, new MalList(), '(' , ')'); break; | |
138 | case ")": throw new ParseError("unexpected ')'"); | |
139 | case "[": form = read_list(rdr, new MalVector(), '[' , ']'); break; | |
140 | case "]": throw new ParseError("unexpected ']'"); | |
141 | case "{": form = read_hash_map(rdr); break; | |
142 | case "}": throw new ParseError("unexpected '}'"); | |
53beaa0a JM |
143 | default: form = read_atom(rdr); break; |
144 | } | |
145 | return form; | |
146 | } | |
147 | ||
148 | ||
149 | public static MalVal read_str(string str) { | |
150 | return read_form(new Reader(tokenize(str))); | |
151 | } | |
152 | } | |
153 | } |