Commit | Line | Data |
---|---|---|
53beaa0a JM |
1 | using System; |
2 | using System.Collections; | |
3 | using System.Collections.Generic; | |
4 | using System.Text.RegularExpressions; | |
5 | using Mal; | |
6 | using MalVal = Mal.types.MalVal; | |
5a159ae7 | 7 | using MalSymbol = Mal.types.MalSymbol; |
53beaa0a JM |
8 | using MalList = Mal.types.MalList; |
9 | using MalVector = Mal.types.MalVector; | |
10 | using MalHashMap = Mal.types.MalHashMap; | |
11 | using MalThrowable = Mal.types.MalThrowable; | |
12 | using MalContinue = Mal.types.MalContinue; | |
13 | ||
14 | namespace Mal { | |
15 | public class reader { | |
16 | public class ParseError : MalThrowable { | |
17 | public ParseError(string msg) : base(msg) { } | |
18 | } | |
19 | ||
20 | public class Reader { | |
21 | List<string> tokens; | |
22 | int position; | |
23 | public Reader(List<string> t) { | |
24 | tokens = t; | |
25 | position = 0; | |
26 | } | |
27 | ||
28 | public string peek() { | |
29 | if (position >= tokens.Count) { | |
30 | return null; | |
31 | } else { | |
32 | return tokens[position]; | |
33 | } | |
34 | } | |
35 | public string next() { | |
36 | return tokens[position++]; | |
37 | } | |
38 | } | |
39 | ||
40 | public static List<string> tokenize(string str) { | |
41 | List<string> tokens = new List<string>(); | |
42 | string pattern = @"[\s ,]*(~@|[\[\]{}()'`~@]|""(?:[\\].|[^\\""])*""|;.*|[^\s \[\]{}()'""`~@,;]*)"; | |
43 | Regex regex = new Regex(pattern); | |
44 | foreach (Match match in regex.Matches(str)) { | |
45 | string token = match.Groups[1].Value; | |
46 | if ((token != null) && !(token == "") && !(token[0] == ';')) { | |
47 | //Console.WriteLine("match: ^" + match.Groups[1] + "$"); | |
48 | tokens.Add(token); | |
49 | } | |
50 | } | |
51 | return tokens; | |
52 | } | |
53 | ||
54 | public static MalVal read_atom(Reader rdr) { | |
55 | string token = rdr.next(); | |
b8ee29b2 | 56 | string pattern = @"(^-?[0-9]+$)|(^-?[0-9][0-9.]*$)|(^nil$)|(^true$)|(^false$)|^("".*"")$|:(.*)|(^[^""]*$)"; |
53beaa0a JM |
57 | Regex regex = new Regex(pattern); |
58 | Match match = regex.Match(token); | |
59 | //Console.WriteLine("token: ^" + token + "$"); | |
60 | if (!match.Success) { | |
61 | throw new ParseError("unrecognized token '" + token + "'"); | |
62 | } | |
63 | if (match.Groups[1].Value != String.Empty) { | |
c3b508af | 64 | return new Mal.types.MalInt(int.Parse(match.Groups[1].Value)); |
53beaa0a JM |
65 | } else if (match.Groups[3].Value != String.Empty) { |
66 | return Mal.types.Nil; | |
67 | } else if (match.Groups[4].Value != String.Empty) { | |
68 | return Mal.types.True; | |
69 | } else if (match.Groups[5].Value != String.Empty) { | |
70 | return Mal.types.False; | |
71 | } else if (match.Groups[6].Value != String.Empty) { | |
afdf531e JM |
72 | string str = match.Groups[6].Value; |
73 | str = str.Substring(1, str.Length-2) | |
74 | .Replace("\\\"", "\"") | |
75 | .Replace("\\n", "\n"); | |
76 | return new Mal.types.MalString(str); | |
53beaa0a | 77 | } else if (match.Groups[7].Value != String.Empty) { |
b8ee29b2 JM |
78 | return new Mal.types.MalString("\u029e" + match.Groups[7].Value); |
79 | } else if (match.Groups[8].Value != String.Empty) { | |
80 | return new Mal.types.MalSymbol(match.Groups[8].Value); | |
53beaa0a JM |
81 | } else { |
82 | throw new ParseError("unrecognized '" + match.Groups[0] + "'"); | |
83 | } | |
84 | } | |
85 | ||
86 | public static MalVal read_list(Reader rdr, MalList lst, char start, char end) { | |
87 | string token = rdr.next(); | |
88 | if (token[0] != start) { | |
89 | throw new ParseError("expected '" + start + "'"); | |
90 | } | |
91 | ||
92 | while ((token = rdr.peek()) != null && token[0] != end) { | |
93 | lst.conj_BANG(read_form(rdr)); | |
94 | } | |
95 | ||
96 | if (token == null) { | |
97 | throw new ParseError("expected '" + end + "', got EOF"); | |
98 | } | |
99 | rdr.next(); | |
100 | ||
101 | return lst; | |
102 | } | |
103 | ||
104 | public static MalVal read_hash_map(Reader rdr) { | |
105 | MalList lst = (MalList)read_list(rdr, new MalList(), '{', '}'); | |
106 | return new MalHashMap(lst); | |
107 | } | |
108 | ||
109 | ||
110 | public static MalVal read_form(Reader rdr) { | |
111 | string token = rdr.peek(); | |
112 | if (token == null) { throw new MalContinue(); } | |
113 | MalVal form = null; | |
114 | ||
5a159ae7 JM |
115 | switch (token) { |
116 | case "'": rdr.next(); | |
117 | return new MalList(new MalSymbol("quote"), | |
118 | read_form(rdr)); | |
119 | case "`": rdr.next(); | |
120 | return new MalList(new MalSymbol("quasiquote"), | |
121 | read_form(rdr)); | |
122 | case "~": | |
123 | rdr.next(); | |
124 | return new MalList(new MalSymbol("unquote"), | |
125 | read_form(rdr)); | |
126 | case "~@": | |
127 | rdr.next(); | |
128 | return new MalList(new MalSymbol("splice-unquote"), | |
129 | read_form(rdr)); | |
17ae845e JM |
130 | case "^": rdr.next(); |
131 | MalVal meta = read_form(rdr); | |
132 | return new MalList(new MalSymbol("with-meta"), | |
133 | read_form(rdr), | |
134 | meta); | |
135 | case "@": rdr.next(); | |
136 | return new MalList(new MalSymbol("deref"), | |
137 | read_form(rdr)); | |
5a159ae7 JM |
138 | |
139 | case "(": form = read_list(rdr, new MalList(), '(' , ')'); break; | |
140 | case ")": throw new ParseError("unexpected ')'"); | |
141 | case "[": form = read_list(rdr, new MalVector(), '[' , ']'); break; | |
142 | case "]": throw new ParseError("unexpected ']'"); | |
143 | case "{": form = read_hash_map(rdr); break; | |
144 | case "}": throw new ParseError("unexpected '}'"); | |
53beaa0a JM |
145 | default: form = read_atom(rdr); break; |
146 | } | |
147 | return form; | |
148 | } | |
149 | ||
150 | ||
151 | public static MalVal read_str(string str) { | |
152 | return read_form(new Reader(tokenize(str))); | |
153 | } | |
154 | } | |
155 | } |