R: atom support, fixes for self-hosting.
[jackhill/mal.git] / r / reader.r
1 ..reader.. <- TRUE
2
3 if(!exists("..types..")) source("types.r")
4
5 new.Reader <- function(tokens) {
6 e <- structure(new.env(), class="Reader")
7 e$tokens <- tokens
8 e$position <- 1
9 e
10 }
11
12 Reader.peek <- function(rdr) {
13 if (rdr$position > length(rdr$tokens)) return(NULL)
14 rdr$tokens[[rdr$position]]
15 }
16
17 Reader.next <- function(rdr) {
18 if (rdr$position > length(rdr$tokens)) return(NULL)
19 rdr$position <- rdr$position + 1
20 rdr$tokens[[rdr$position-1]]
21 }
22
23 tokenize <- function(str) {
24 re <- "[\\s,]*(~@|[\\[\\]\\{\\}\\(\\)'`~^@]|\"(?:\\\\.|[^\\\\\"])*\"|;.*|[^\\s\\[\\]\\{\\}\\('\"`,;\\)]*)"
25 m <- lapply(regmatches(str, gregexpr(re, str, perl=TRUE)),
26 function(e) sub("^[\\s,]+", "", e, perl=TRUE))
27 res <- list()
28 i <- 1
29 for(v in m[[1]]) {
30 if (v == "" || substr(v,1,1) == ";") next
31 res[[i]] <- v
32 i <- i+1
33 }
34 res
35 }
36
37 re_match <- function(re, str) { length(grep(re, c(str))) > 0 }
38
39 read_atom <- function(rdr) {
40 token <- Reader.next(rdr)
41 if (re_match("^-?[0-9]+$", token)) {
42 as.integer(token)
43 } else if (re_match("^-?[0-9][0-9.]*$", token)) {
44 as.double(token)
45 } else if (substr(token,1,1) == "\"") {
46 gsub("\\\\n", "\\n",
47 gsub("\\\\\"", "\"",
48 substr(token, 2, nchar(token)-1)))
49 } else if (token == "nil") {
50 nil
51 } else if (token == "true") {
52 TRUE
53 } else if (token == "false") {
54 FALSE
55 } else {
56 new.symbol(token)
57 }
58 }
59
60 read_seq <- function(rdr, start="(", end=")") {
61 lst <- list()
62 token <- Reader.next(rdr)
63 if (token != start) {
64 throw(concat("expected '", start, "'"))
65 }
66 repeat {
67 token <- Reader.peek(rdr)
68 if (is.null(token)) {
69 throw(concat("expected '", end, "', got EOF"))
70 }
71 if (token == end) break
72 lst[[length(lst)+1]] <- read_form(rdr)
73 }
74 Reader.next(rdr)
75 new.listl(lst)
76 }
77
78 read_form <- function(rdr) {
79 token <- Reader.peek(rdr)
80 if (token == "'") {
81 . <- Reader.next(rdr);
82 new.list(new.symbol("quote"), read_form(rdr))
83 } else if (token == "`") {
84 . <- Reader.next(rdr);
85 new.list(new.symbol("quasiquote"), read_form(rdr))
86 } else if (token == "~") {
87 . <- Reader.next(rdr);
88 new.list(new.symbol("unquote"), read_form(rdr))
89 } else if (token == "~@") {
90 . <- Reader.next(rdr);
91 new.list(new.symbol("splice-unquote"), read_form(rdr))
92 } else if (token == "^") {
93 . <- Reader.next(rdr)
94 m <- read_form(rdr)
95 new.list(new.symbol("with-meta"), read_form(rdr), m)
96 } else if (token == "@") {
97 . <- Reader.next(rdr);
98 new.list(new.symbol("deref"), read_form(rdr))
99 } else if (token == ")") {
100 throw("unexpected ')'")
101 } else if (token == "(") {
102 new.listl(read_seq(rdr))
103 } else if (token == "]") {
104 throw("unexpected ']'")
105 } else if (token == "[") {
106 new.vectorl(read_seq(rdr, "[", "]"))
107 } else if (token == "}") {
108 throw("unexpected '}'")
109 } else if (token == "{") {
110 new.hash_mapl(read_seq(rdr, "{", "}"))
111 } else {
112 read_atom(rdr)
113 }
114 }
115
116 read_str <- function(str) {
117 tokens <- tokenize(str)
118 if (length(tokens) == 0) return(nil)
119 return(read_form(new.Reader(tokens)))
120 }
121
122 #cat("---\n")
123 #print(tokenize("123"))
124 #cat("---\n")
125 #print(tokenize(" ( 123 456 abc \"def\" ) "))
126
127 #rdr <- new.reader(tokenize(" ( 123 456 abc \"def\" ) "))
128 #Reader.peek(rdr)
129 #Reader.next(rdr)
130 #Reader.next(rdr)
131 #Reader.next(rdr)
132 #Reader.next(rdr)
133 #Reader.next(rdr)
134 #Reader.next(rdr)
135 #Reader.next(rdr)