Commit | Line | Data |
---|---|---|
4d1456b9 JM |
1 | ..reader.. <- TRUE |
2 | ||
3 | if(!exists("..types..")) source("types.r") | |
4 | ||
5 | new.Reader <- function(tokens) { | |
6 | e <- structure(new.env(), class="Reader") | |
7 | e$tokens <- tokens | |
8 | e$position <- 1 | |
9 | e | |
10 | } | |
11 | ||
12 | Reader.peek <- function(rdr) { | |
13 | if (rdr$position > length(rdr$tokens)) return(NULL) | |
14 | rdr$tokens[[rdr$position]] | |
15 | } | |
16 | ||
17 | Reader.next <- function(rdr) { | |
18 | if (rdr$position > length(rdr$tokens)) return(NULL) | |
19 | rdr$position <- rdr$position + 1 | |
20 | rdr$tokens[[rdr$position-1]] | |
21 | } | |
22 | ||
23 | tokenize <- function(str) { | |
24 | re <- "[\\s,]*(~@|[\\[\\]\\{\\}\\(\\)'`~^@]|\"(?:\\\\.|[^\\\\\"])*\"|;.*|[^\\s\\[\\]\\{\\}\\('\"`,;\\)]*)" | |
25 | m <- lapply(regmatches(str, gregexpr(re, str, perl=TRUE)), | |
26 | function(e) sub("^[\\s,]+", "", e, perl=TRUE)) | |
27 | res <- list() | |
28 | i <- 1 | |
29 | for(v in m[[1]]) { | |
c30efef4 | 30 | if (v == "" || substr(v,1,1) == ";") next |
4d1456b9 JM |
31 | res[[i]] <- v |
32 | i <- i+1 | |
33 | } | |
34 | res | |
35 | } | |
36 | ||
37 | re_match <- function(re, str) { length(grep(re, c(str))) > 0 } | |
38 | ||
39 | read_atom <- function(rdr) { | |
40 | token <- Reader.next(rdr) | |
8128c69a JM |
41 | if (re_match("^-?[0-9]+$", token)) { |
42 | as.integer(token) | |
43 | } else if (re_match("^-?[0-9][0-9.]*$", token)) { | |
44 | as.double(token) | |
4d1456b9 | 45 | } else if (substr(token,1,1) == "\"") { |
a821cd72 | 46 | gsub("\x7f", "\\\\", |
8d78bc26 JM |
47 | gsub("\\\\n", "\n", |
48 | gsub("\\\\\"", "\"", | |
a821cd72 JM |
49 | gsub("\\\\\\\\", "\x7f", |
50 | substr(token, 2, nchar(token)-1))))) | |
b8ee29b2 JM |
51 | } else if (substr(token,1,1) == ":") { |
52 | new.keyword(substring(token,2)) | |
4d1456b9 | 53 | } else if (token == "nil") { |
01feedfe | 54 | nil |
4d1456b9 JM |
55 | } else if (token == "true") { |
56 | TRUE | |
57 | } else if (token == "false") { | |
58 | FALSE | |
59 | } else { | |
c30efef4 | 60 | new.symbol(token) |
4d1456b9 JM |
61 | } |
62 | } | |
63 | ||
64 | read_seq <- function(rdr, start="(", end=")") { | |
65 | lst <- list() | |
66 | token <- Reader.next(rdr) | |
67 | if (token != start) { | |
68 | throw(concat("expected '", start, "'")) | |
69 | } | |
70 | repeat { | |
71 | token <- Reader.peek(rdr) | |
72 | if (is.null(token)) { | |
73 | throw(concat("expected '", end, "', got EOF")) | |
74 | } | |
75 | if (token == end) break | |
76 | lst[[length(lst)+1]] <- read_form(rdr) | |
77 | } | |
78 | Reader.next(rdr) | |
79 | new.listl(lst) | |
80 | } | |
81 | ||
82 | read_form <- function(rdr) { | |
83 | token <- Reader.peek(rdr) | |
c30efef4 JM |
84 | if (token == "'") { |
85 | . <- Reader.next(rdr); | |
86 | new.list(new.symbol("quote"), read_form(rdr)) | |
87 | } else if (token == "`") { | |
88 | . <- Reader.next(rdr); | |
89 | new.list(new.symbol("quasiquote"), read_form(rdr)) | |
90 | } else if (token == "~") { | |
91 | . <- Reader.next(rdr); | |
92 | new.list(new.symbol("unquote"), read_form(rdr)) | |
93 | } else if (token == "~@") { | |
94 | . <- Reader.next(rdr); | |
95 | new.list(new.symbol("splice-unquote"), read_form(rdr)) | |
36737ae5 JM |
96 | } else if (token == "^") { |
97 | . <- Reader.next(rdr) | |
98 | m <- read_form(rdr) | |
99 | new.list(new.symbol("with-meta"), read_form(rdr), m) | |
100 | } else if (token == "@") { | |
101 | . <- Reader.next(rdr); | |
102 | new.list(new.symbol("deref"), read_form(rdr)) | |
c30efef4 | 103 | } else if (token == ")") { |
4d1456b9 JM |
104 | throw("unexpected ')'") |
105 | } else if (token == "(") { | |
106 | new.listl(read_seq(rdr)) | |
107 | } else if (token == "]") { | |
108 | throw("unexpected ']'") | |
109 | } else if (token == "[") { | |
110 | new.vectorl(read_seq(rdr, "[", "]")) | |
36737ae5 JM |
111 | } else if (token == "}") { |
112 | throw("unexpected '}'") | |
113 | } else if (token == "{") { | |
114 | new.hash_mapl(read_seq(rdr, "{", "}")) | |
4d1456b9 JM |
115 | } else { |
116 | read_atom(rdr) | |
117 | } | |
118 | } | |
119 | ||
120 | read_str <- function(str) { | |
121 | tokens <- tokenize(str) | |
01feedfe | 122 | if (length(tokens) == 0) return(nil) |
4d1456b9 JM |
123 | return(read_form(new.Reader(tokens))) |
124 | } | |
125 | ||
126 | #cat("---\n") | |
127 | #print(tokenize("123")) | |
128 | #cat("---\n") | |
129 | #print(tokenize(" ( 123 456 abc \"def\" ) ")) | |
130 | ||
131 | #rdr <- new.reader(tokenize(" ( 123 456 abc \"def\" ) ")) | |
132 | #Reader.peek(rdr) | |
133 | #Reader.next(rdr) | |
134 | #Reader.next(rdr) | |
135 | #Reader.next(rdr) | |
136 | #Reader.next(rdr) | |
137 | #Reader.next(rdr) | |
138 | #Reader.next(rdr) | |
139 | #Reader.next(rdr) |