Commit | Line | Data |
---|---|---|
4d1456b9 JM |
1 | ..reader.. <- TRUE |
2 | ||
3 | if(!exists("..types..")) source("types.r") | |
4 | ||
5 | new.Reader <- function(tokens) { | |
6 | e <- structure(new.env(), class="Reader") | |
7 | e$tokens <- tokens | |
8 | e$position <- 1 | |
9 | e | |
10 | } | |
11 | ||
12 | Reader.peek <- function(rdr) { | |
13 | if (rdr$position > length(rdr$tokens)) return(NULL) | |
14 | rdr$tokens[[rdr$position]] | |
15 | } | |
16 | ||
17 | Reader.next <- function(rdr) { | |
18 | if (rdr$position > length(rdr$tokens)) return(NULL) | |
19 | rdr$position <- rdr$position + 1 | |
20 | rdr$tokens[[rdr$position-1]] | |
21 | } | |
22 | ||
23 | tokenize <- function(str) { | |
24 | re <- "[\\s,]*(~@|[\\[\\]\\{\\}\\(\\)'`~^@]|\"(?:\\\\.|[^\\\\\"])*\"|;.*|[^\\s\\[\\]\\{\\}\\('\"`,;\\)]*)" | |
25 | m <- lapply(regmatches(str, gregexpr(re, str, perl=TRUE)), | |
26 | function(e) sub("^[\\s,]+", "", e, perl=TRUE)) | |
27 | res <- list() | |
28 | i <- 1 | |
29 | for(v in m[[1]]) { | |
c30efef4 | 30 | if (v == "" || substr(v,1,1) == ";") next |
4d1456b9 JM |
31 | res[[i]] <- v |
32 | i <- i+1 | |
33 | } | |
34 | res | |
35 | } | |
36 | ||
37 | re_match <- function(re, str) { length(grep(re, c(str))) > 0 } | |
38 | ||
39 | read_atom <- function(rdr) { | |
40 | token <- Reader.next(rdr) | |
8128c69a JM |
41 | if (re_match("^-?[0-9]+$", token)) { |
42 | as.integer(token) | |
43 | } else if (re_match("^-?[0-9][0-9.]*$", token)) { | |
44 | as.double(token) | |
4d1456b9 | 45 | } else if (substr(token,1,1) == "\"") { |
01feedfe JM |
46 | gsub("\\\\n", "\\n", |
47 | gsub("\\\\\"", "\"", | |
48 | substr(token, 2, nchar(token)-1))) | |
4d1456b9 | 49 | } else if (token == "nil") { |
01feedfe | 50 | nil |
4d1456b9 JM |
51 | } else if (token == "true") { |
52 | TRUE | |
53 | } else if (token == "false") { | |
54 | FALSE | |
55 | } else { | |
c30efef4 | 56 | new.symbol(token) |
4d1456b9 JM |
57 | } |
58 | } | |
59 | ||
60 | read_seq <- function(rdr, start="(", end=")") { | |
61 | lst <- list() | |
62 | token <- Reader.next(rdr) | |
63 | if (token != start) { | |
64 | throw(concat("expected '", start, "'")) | |
65 | } | |
66 | repeat { | |
67 | token <- Reader.peek(rdr) | |
68 | if (is.null(token)) { | |
69 | throw(concat("expected '", end, "', got EOF")) | |
70 | } | |
71 | if (token == end) break | |
72 | lst[[length(lst)+1]] <- read_form(rdr) | |
73 | } | |
74 | Reader.next(rdr) | |
75 | new.listl(lst) | |
76 | } | |
77 | ||
78 | read_form <- function(rdr) { | |
79 | token <- Reader.peek(rdr) | |
c30efef4 JM |
80 | if (token == "'") { |
81 | . <- Reader.next(rdr); | |
82 | new.list(new.symbol("quote"), read_form(rdr)) | |
83 | } else if (token == "`") { | |
84 | . <- Reader.next(rdr); | |
85 | new.list(new.symbol("quasiquote"), read_form(rdr)) | |
86 | } else if (token == "~") { | |
87 | . <- Reader.next(rdr); | |
88 | new.list(new.symbol("unquote"), read_form(rdr)) | |
89 | } else if (token == "~@") { | |
90 | . <- Reader.next(rdr); | |
91 | new.list(new.symbol("splice-unquote"), read_form(rdr)) | |
36737ae5 JM |
92 | } else if (token == "^") { |
93 | . <- Reader.next(rdr) | |
94 | m <- read_form(rdr) | |
95 | new.list(new.symbol("with-meta"), read_form(rdr), m) | |
96 | } else if (token == "@") { | |
97 | . <- Reader.next(rdr); | |
98 | new.list(new.symbol("deref"), read_form(rdr)) | |
c30efef4 | 99 | } else if (token == ")") { |
4d1456b9 JM |
100 | throw("unexpected ')'") |
101 | } else if (token == "(") { | |
102 | new.listl(read_seq(rdr)) | |
103 | } else if (token == "]") { | |
104 | throw("unexpected ']'") | |
105 | } else if (token == "[") { | |
106 | new.vectorl(read_seq(rdr, "[", "]")) | |
36737ae5 JM |
107 | } else if (token == "}") { |
108 | throw("unexpected '}'") | |
109 | } else if (token == "{") { | |
110 | new.hash_mapl(read_seq(rdr, "{", "}")) | |
4d1456b9 JM |
111 | } else { |
112 | read_atom(rdr) | |
113 | } | |
114 | } | |
115 | ||
116 | read_str <- function(str) { | |
117 | tokens <- tokenize(str) | |
01feedfe | 118 | if (length(tokens) == 0) return(nil) |
4d1456b9 JM |
119 | return(read_form(new.Reader(tokens))) |
120 | } | |
121 | ||
122 | #cat("---\n") | |
123 | #print(tokenize("123")) | |
124 | #cat("---\n") | |
125 | #print(tokenize(" ( 123 456 abc \"def\" ) ")) | |
126 | ||
127 | #rdr <- new.reader(tokenize(" ( 123 456 abc \"def\" ) ")) | |
128 | #Reader.peek(rdr) | |
129 | #Reader.next(rdr) | |
130 | #Reader.next(rdr) | |
131 | #Reader.next(rdr) | |
132 | #Reader.next(rdr) | |
133 | #Reader.next(rdr) | |
134 | #Reader.next(rdr) | |
135 | #Reader.next(rdr) |