Commit | Line | Data |
---|---|---|
53beaa0a JM |
1 | using System; |
2 | using System.Collections; | |
3 | using System.Collections.Generic; | |
4 | using System.Text.RegularExpressions; | |
5 | using Mal; | |
6 | using MalVal = Mal.types.MalVal; | |
5a159ae7 | 7 | using MalSymbol = Mal.types.MalSymbol; |
53beaa0a JM |
8 | using MalList = Mal.types.MalList; |
9 | using MalVector = Mal.types.MalVector; | |
10 | using MalHashMap = Mal.types.MalHashMap; | |
11 | using MalThrowable = Mal.types.MalThrowable; | |
12 | using MalContinue = Mal.types.MalContinue; | |
13 | ||
14 | namespace Mal { | |
15 | public class reader { | |
16 | public class ParseError : MalThrowable { | |
17 | public ParseError(string msg) : base(msg) { } | |
18 | } | |
19 | ||
20 | public class Reader { | |
21 | List<string> tokens; | |
22 | int position; | |
23 | public Reader(List<string> t) { | |
24 | tokens = t; | |
25 | position = 0; | |
26 | } | |
27 | ||
28 | public string peek() { | |
29 | if (position >= tokens.Count) { | |
30 | return null; | |
31 | } else { | |
32 | return tokens[position]; | |
33 | } | |
34 | } | |
35 | public string next() { | |
36 | return tokens[position++]; | |
37 | } | |
38 | } | |
39 | ||
40 | public static List<string> tokenize(string str) { | |
41 | List<string> tokens = new List<string>(); | |
42 | string pattern = @"[\s ,]*(~@|[\[\]{}()'`~@]|""(?:[\\].|[^\\""])*""|;.*|[^\s \[\]{}()'""`~@,;]*)"; | |
43 | Regex regex = new Regex(pattern); | |
44 | foreach (Match match in regex.Matches(str)) { | |
45 | string token = match.Groups[1].Value; | |
46 | if ((token != null) && !(token == "") && !(token[0] == ';')) { | |
47 | //Console.WriteLine("match: ^" + match.Groups[1] + "$"); | |
48 | tokens.Add(token); | |
49 | } | |
50 | } | |
51 | return tokens; | |
52 | } | |
53 | ||
54 | public static MalVal read_atom(Reader rdr) { | |
55 | string token = rdr.next(); | |
b8ee29b2 | 56 | string pattern = @"(^-?[0-9]+$)|(^-?[0-9][0-9.]*$)|(^nil$)|(^true$)|(^false$)|^("".*"")$|:(.*)|(^[^""]*$)"; |
53beaa0a JM |
57 | Regex regex = new Regex(pattern); |
58 | Match match = regex.Match(token); | |
59 | //Console.WriteLine("token: ^" + token + "$"); | |
60 | if (!match.Success) { | |
61 | throw new ParseError("unrecognized token '" + token + "'"); | |
62 | } | |
63 | if (match.Groups[1].Value != String.Empty) { | |
c3b508af | 64 | return new Mal.types.MalInt(int.Parse(match.Groups[1].Value)); |
53beaa0a JM |
65 | } else if (match.Groups[3].Value != String.Empty) { |
66 | return Mal.types.Nil; | |
67 | } else if (match.Groups[4].Value != String.Empty) { | |
68 | return Mal.types.True; | |
69 | } else if (match.Groups[5].Value != String.Empty) { | |
70 | return Mal.types.False; | |
71 | } else if (match.Groups[6].Value != String.Empty) { | |
afdf531e JM |
72 | string str = match.Groups[6].Value; |
73 | str = str.Substring(1, str.Length-2) | |
74 | .Replace("\\\"", "\"") | |
8d78bc26 JM |
75 | .Replace("\\n", "\n") |
76 | .Replace("\\\\", "\\"); | |
afdf531e | 77 | return new Mal.types.MalString(str); |
53beaa0a | 78 | } else if (match.Groups[7].Value != String.Empty) { |
b8ee29b2 JM |
79 | return new Mal.types.MalString("\u029e" + match.Groups[7].Value); |
80 | } else if (match.Groups[8].Value != String.Empty) { | |
81 | return new Mal.types.MalSymbol(match.Groups[8].Value); | |
53beaa0a JM |
82 | } else { |
83 | throw new ParseError("unrecognized '" + match.Groups[0] + "'"); | |
84 | } | |
85 | } | |
86 | ||
87 | public static MalVal read_list(Reader rdr, MalList lst, char start, char end) { | |
88 | string token = rdr.next(); | |
89 | if (token[0] != start) { | |
90 | throw new ParseError("expected '" + start + "'"); | |
91 | } | |
92 | ||
93 | while ((token = rdr.peek()) != null && token[0] != end) { | |
94 | lst.conj_BANG(read_form(rdr)); | |
95 | } | |
96 | ||
97 | if (token == null) { | |
98 | throw new ParseError("expected '" + end + "', got EOF"); | |
99 | } | |
100 | rdr.next(); | |
101 | ||
102 | return lst; | |
103 | } | |
104 | ||
105 | public static MalVal read_hash_map(Reader rdr) { | |
106 | MalList lst = (MalList)read_list(rdr, new MalList(), '{', '}'); | |
107 | return new MalHashMap(lst); | |
108 | } | |
109 | ||
110 | ||
111 | public static MalVal read_form(Reader rdr) { | |
112 | string token = rdr.peek(); | |
113 | if (token == null) { throw new MalContinue(); } | |
114 | MalVal form = null; | |
115 | ||
5a159ae7 JM |
116 | switch (token) { |
117 | case "'": rdr.next(); | |
118 | return new MalList(new MalSymbol("quote"), | |
119 | read_form(rdr)); | |
120 | case "`": rdr.next(); | |
121 | return new MalList(new MalSymbol("quasiquote"), | |
122 | read_form(rdr)); | |
123 | case "~": | |
124 | rdr.next(); | |
125 | return new MalList(new MalSymbol("unquote"), | |
126 | read_form(rdr)); | |
127 | case "~@": | |
128 | rdr.next(); | |
129 | return new MalList(new MalSymbol("splice-unquote"), | |
130 | read_form(rdr)); | |
17ae845e JM |
131 | case "^": rdr.next(); |
132 | MalVal meta = read_form(rdr); | |
133 | return new MalList(new MalSymbol("with-meta"), | |
134 | read_form(rdr), | |
135 | meta); | |
136 | case "@": rdr.next(); | |
137 | return new MalList(new MalSymbol("deref"), | |
138 | read_form(rdr)); | |
5a159ae7 JM |
139 | |
140 | case "(": form = read_list(rdr, new MalList(), '(' , ')'); break; | |
141 | case ")": throw new ParseError("unexpected ')'"); | |
142 | case "[": form = read_list(rdr, new MalVector(), '[' , ']'); break; | |
143 | case "]": throw new ParseError("unexpected ']'"); | |
144 | case "{": form = read_hash_map(rdr); break; | |
145 | case "}": throw new ParseError("unexpected '}'"); | |
53beaa0a JM |
146 | default: form = read_atom(rdr); break; |
147 | } | |
148 | return form; | |
149 | } | |
150 | ||
151 | ||
152 | public static MalVal read_str(string str) { | |
153 | return read_form(new Reader(tokenize(str))); | |
154 | } | |
155 | } | |
156 | } |