Commit | Line | Data |
---|---|---|
b1b2de81 C |
1 | (*pp cpp *) |
2 | ||
3 | (* File: sexp.ml | |
4 | ||
5 | Copyright (C) 2005- | |
6 | ||
7 | Jane Street Holding, LLC | |
8 | Author: Markus Mottl | |
9 | email: mmottl\@janestcapital.com | |
10 | WWW: http://www.janestcapital.com/ocaml | |
11 | ||
12 | This library is free software; you can redistribute it and/or | |
13 | modify it under the terms of the GNU Lesser General Public | |
14 | License as published by the Free Software Foundation; either | |
15 | version 2 of the License, or (at your option) any later version. | |
16 | ||
17 | This library is distributed in the hope that it will be useful, | |
18 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
20 | Lesser General Public License for more details. | |
21 | ||
22 | You should have received a copy of the GNU Lesser General Public | |
23 | License along with this library; if not, write to the Free Software | |
24 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
25 | *) | |
26 | ||
27 | (* Sexp: Module for handling S-expressions (I/O, etc.) *) | |
28 | ||
29 | open Format | |
30 | ||
31 | include Type | |
32 | ||
33 | (* Default indentation level for human-readable conversions *) | |
34 | ||
35 | let default_indent = ref 1 | |
36 | ||
37 | (* Escaping of strings used as atoms in S-expressions *) | |
38 | ||
39 | let is_special_char c = | |
40 | c <= ' ' || c = '"' || c = '(' || c = ')' || c = ';' || c = '\\' | |
41 | ||
42 | let must_escape str = | |
43 | let len = String.length str in | |
44 | len = 0 || | |
45 | let rec loop ix = is_special_char str.[ix] || ix > 0 && loop (ix - 1) in | |
46 | loop (len - 1) | |
47 | ||
48 | let maybe_esc_str str = | |
49 | if must_escape str then | |
50 | let estr = String.escaped str in | |
51 | let elen = String.length estr in | |
52 | let res = String.create (elen + 2) in | |
53 | String.blit estr 0 res 1 elen; | |
54 | res.[0] <- '"'; | |
55 | res.[elen + 1] <- '"'; | |
56 | res | |
57 | else str | |
58 | ||
59 | let pp_maybe_esc_str ppf str = pp_print_string ppf (maybe_esc_str str) | |
60 | ||
61 | (* Output of S-expressions to formatters *) | |
62 | ||
63 | let rec pp_hum_indent indent ppf = function | |
64 | | Atom str -> pp_maybe_esc_str ppf str | |
65 | | List (h :: t) -> | |
66 | pp_open_box ppf indent; | |
67 | pp_print_string ppf "("; | |
68 | pp_hum_indent indent ppf h; | |
69 | pp_hum_rest indent ppf t | |
70 | | List [] -> pp_print_string ppf "()" | |
71 | ||
72 | and pp_hum_rest indent ppf = function | |
73 | | h :: t -> | |
74 | pp_print_space ppf (); | |
75 | pp_hum_indent indent ppf h; | |
76 | pp_hum_rest indent ppf t | |
77 | | [] -> | |
78 | pp_print_string ppf ")"; | |
79 | pp_close_box ppf () | |
80 | ||
81 | let rec pp_mach_internal may_need_space ppf = function | |
82 | | Atom str -> | |
83 | let str' = maybe_esc_str str in | |
84 | let new_may_need_space = str' == str in | |
85 | if may_need_space && new_may_need_space then pp_print_string ppf " "; | |
86 | pp_print_string ppf str'; | |
87 | new_may_need_space | |
88 | | List (h :: t) -> | |
89 | pp_print_string ppf "("; | |
90 | let may_need_space = pp_mach_internal false ppf h in | |
91 | pp_mach_rest may_need_space ppf t; | |
92 | false | |
93 | | List [] -> pp_print_string ppf "()"; false | |
94 | ||
95 | and pp_mach_rest may_need_space ppf = function | |
96 | | h :: t -> | |
97 | let may_need_space = pp_mach_internal may_need_space ppf h in | |
98 | pp_mach_rest may_need_space ppf t | |
99 | | [] -> pp_print_string ppf ")" | |
100 | ||
101 | let pp_hum ppf sexp = pp_hum_indent !default_indent ppf sexp | |
102 | ||
103 | let pp_mach ppf sexp = ignore (pp_mach_internal false ppf sexp) | |
104 | let pp = pp_mach | |
105 | ||
106 | (* Sexp size *) | |
107 | ||
108 | let rec size_loop (v, c as acc) = function | |
109 | | Atom str -> v + 1, c + String.length str | |
110 | | List lst -> List.fold_left size_loop acc lst | |
111 | ||
112 | let size sexp = size_loop (0, 0) sexp | |
113 | ||
114 | ||
115 | (* Buffer conversions *) | |
116 | ||
117 | let to_buffer_hum ~buf ?(indent = !default_indent) sexp = | |
118 | Format.bprintf buf "%a@?" (pp_hum_indent indent) sexp | |
119 | ||
120 | let to_buffer_mach ~buf sexp = | |
121 | let rec loop may_need_space = function | |
122 | | Atom str -> | |
123 | let str' = maybe_esc_str str in | |
124 | let new_may_need_space = str' == str in | |
125 | if may_need_space && new_may_need_space then Buffer.add_char buf ' '; | |
126 | Buffer.add_string buf str'; | |
127 | new_may_need_space | |
128 | | List (h :: t) -> | |
129 | Buffer.add_char buf '('; | |
130 | let may_need_space = loop false h in | |
131 | loop_rest may_need_space t; | |
132 | false | |
133 | | List [] -> Buffer.add_string buf "()"; false | |
134 | and loop_rest may_need_space = function | |
135 | | h :: t -> | |
136 | let may_need_space = loop may_need_space h in | |
137 | loop_rest may_need_space t | |
138 | | [] -> Buffer.add_char buf ')' in | |
139 | ignore (loop false sexp) | |
140 | ||
141 | let to_buffer = to_buffer_mach | |
142 | ||
143 | ||
144 | (* Output of S-expressions to I/O-channels *) | |
145 | ||
146 | let buffer () = Buffer.create 4096 | |
147 | ||
148 | let with_new_buffer oc f = | |
149 | let buf = buffer () in | |
150 | f buf; | |
151 | Buffer.output_buffer oc buf | |
152 | ||
153 | let output_hum oc sexp = | |
154 | with_new_buffer oc (fun buf -> to_buffer_hum sexp ~buf) | |
155 | ||
156 | let output_hum_indent indent oc sexp = | |
157 | with_new_buffer oc (fun buf -> to_buffer_hum ~indent sexp ~buf) | |
158 | ||
159 | let output_mach oc sexp = | |
160 | with_new_buffer oc (fun buf -> to_buffer_mach sexp ~buf) | |
161 | ||
162 | let output = output_mach | |
163 | ||
164 | ||
165 | (* String conversions *) | |
166 | ||
167 | let to_string_hum ?indent sexp = | |
168 | let buf = buffer () in | |
169 | to_buffer_hum ?indent sexp ~buf; | |
170 | Buffer.contents buf | |
171 | ||
172 | let to_string_mach sexp = | |
173 | let buf = buffer () in | |
174 | to_buffer_mach sexp ~buf; | |
175 | Buffer.contents buf | |
176 | ||
177 | let to_string = to_string_mach | |
178 | ||
179 | ||
180 | (* Scan functions *) | |
181 | ||
182 | let scan_sexp ?buf lexbuf = Parser.sexp (Lexer.main ?buf) lexbuf | |
183 | let scan_sexps ?buf lexbuf = Parser.sexps (Lexer.main ?buf) lexbuf | |
184 | ||
185 | let get_main_buf buf = | |
186 | let buf = | |
187 | match buf with | |
188 | | None -> Buffer.create 64 | |
189 | | Some buf -> buf in | |
190 | Lexer.main ~buf | |
191 | ||
192 | let scan_fold_sexps ?buf ~f ~init lexbuf = | |
193 | let main = get_main_buf buf in | |
194 | let rec loop acc = | |
195 | match Parser.sexp_opt main lexbuf with | |
196 | | None -> acc | |
197 | | Some sexp -> loop (f sexp acc) in | |
198 | loop init | |
199 | ||
200 | let scan_iter_sexps ?buf ~f lexbuf = | |
201 | let main = get_main_buf buf in | |
202 | let rec loop () = | |
203 | match Parser.sexp_opt main lexbuf with | |
204 | | None -> () | |
205 | | Some sexp -> f sexp; loop () in | |
206 | loop () | |
207 | ||
208 | let scan_cnv_sexps ?buf ~f lexbuf = | |
209 | let coll sexp acc = f sexp :: acc in | |
210 | List.rev (scan_fold_sexps ?buf ~f:coll ~init:[] lexbuf) | |
211 | ||
212 | ||
213 | (* Partial parsing *) | |
214 | ||
215 | type parse_pos = | |
216 | { | |
217 | mutable text_line : int; (** Line position in parsed text *) | |
218 | mutable text_char : int; (** Character position in parsed text *) | |
219 | mutable buf_pos : int; (** Reading position in buffer *) | |
220 | } | |
221 | ||
222 | type 'a parse_result = Done of t * parse_pos | Cont of bool * 'a parse_fun | |
223 | and 'a parse_fun = pos : int -> len : int -> 'a -> 'a parse_result | |
224 | ||
225 | type parse_state = | |
226 | { | |
227 | parse_pos : parse_pos; | |
228 | mutable pstack : t list list; | |
229 | pbuf : Buffer.t; | |
230 | } | |
231 | ||
232 | type parse_error = | |
233 | { | |
234 | location : string; | |
235 | err_msg : string; | |
236 | parse_state : parse_state; | |
237 | } | |
238 | ||
239 | exception ParseError of parse_error | |
240 | ||
241 | let bump_text_line { parse_pos = parse_pos } = | |
242 | parse_pos.text_line <- parse_pos.text_line + 1; | |
243 | parse_pos.text_char <- 1 | |
244 | ||
245 | let bump_text_pos { parse_pos = parse_pos } = | |
246 | parse_pos.text_char <- parse_pos.text_char + 1 | |
247 | ||
248 | let bump_pos_cont state str ~max_pos ~pos cont = | |
249 | bump_text_pos state; | |
250 | cont state str ~max_pos ~pos:(pos + 1) | |
251 | ||
252 | let bump_line_cont state str ~max_pos ~pos cont = | |
253 | bump_text_line state; | |
254 | cont state str ~max_pos ~pos:(pos + 1) | |
255 | ||
256 | let add_bump bump state str ~max_pos ~pos c cont = | |
257 | Buffer.add_char state.pbuf c; | |
258 | bump state; | |
259 | cont state str ~max_pos ~pos:(pos + 1) | |
260 | ||
261 | let add_bump_pos state str ~max_pos ~pos c cont = | |
262 | add_bump bump_text_pos state str ~max_pos ~pos c cont | |
263 | ||
264 | let add_bump_line state str ~max_pos ~pos c cont = | |
265 | add_bump bump_text_line state str ~max_pos ~pos c cont | |
266 | ||
267 | let mk_parse_pos { parse_pos = parse_pos } buf_pos = | |
268 | parse_pos.buf_pos <- buf_pos; | |
269 | parse_pos | |
270 | ||
271 | let bump_found_atom bump state str ~max_pos ~pos cont = | |
272 | let pbuf = state.pbuf in | |
273 | let atom = Atom (Buffer.contents pbuf) in | |
274 | match state.pstack with | |
275 | | [] -> Done (atom, mk_parse_pos state pos) | |
276 | | rev_sexp_lst :: sexp_stack -> | |
277 | Buffer.clear pbuf; | |
278 | state.pstack <- (atom :: rev_sexp_lst) :: sexp_stack; | |
279 | bump state; | |
280 | cont state str ~max_pos ~pos:(pos + 1) | |
281 | ||
282 | let raise_parse_error state location err_msg = | |
283 | let parse_error = | |
284 | { | |
285 | location = location; | |
286 | err_msg = err_msg; | |
287 | parse_state = state; | |
288 | } | |
289 | in | |
290 | raise (ParseError parse_error) | |
291 | ||
292 | let raise_unexpected_char state ~loc pos c = | |
293 | let err_msg = sprintf "unexpected character: '%c'" c in | |
294 | let parse_pos = state.parse_pos in | |
295 | parse_pos.buf_pos <- pos; | |
296 | parse_pos.text_char <- parse_pos.text_char + 1; | |
297 | raise_parse_error state loc err_msg | |
298 | ||
299 | (* Macro for generating parsers *) | |
300 | #define MK_PARSER(TYPE, GET_LEN, PARSE, GET_CHAR) \ | |
301 | let check_str_bounds loc ~pos ~len (str : TYPE) = \ | |
302 | if pos < 0 then invalid_arg (loc ^ ": pos < 0"); \ | |
303 | if len < 0 then invalid_arg (loc ^ ": len < 0"); \ | |
304 | let str_len = GET_LEN str in \ | |
305 | let pos_len = pos + len in \ | |
306 | if pos_len > str_len then invalid_arg (loc ^ ": pos + len > str_len"); \ | |
307 | pos_len - 1 \ | |
308 | \ | |
309 | let mk_cont name cont state = \ | |
310 | let ws_only = state.pstack = [] && Buffer.length state.pbuf = 0 in \ | |
311 | let parse_fun ~pos ~len str = \ | |
312 | let max_pos = check_str_bounds name ~pos ~len str in \ | |
313 | cont state str ~max_pos ~pos \ | |
314 | in \ | |
315 | Cont (ws_only, parse_fun) \ | |
316 | \ | |
317 | let rec PARSE state str ~max_pos ~pos = \ | |
318 | if pos > max_pos then mk_cont "parse" PARSE state \ | |
319 | else \ | |
320 | match GET_CHAR with \ | |
321 | | '(' -> \ | |
322 | state.pstack <- [] :: state.pstack; \ | |
323 | bump_pos_cont state str ~max_pos ~pos PARSE \ | |
324 | | ')' as c -> \ | |
325 | (match state.pstack with \ | |
326 | | [] -> raise_unexpected_char state ~loc:"parse" pos c \ | |
327 | | rev_sexp_lst :: sexp_stack -> \ | |
328 | let sexp = List (List.rev rev_sexp_lst) in \ | |
329 | match sexp_stack with \ | |
330 | | [] -> Done (sexp, mk_parse_pos state (pos + 1)) \ | |
331 | | higher_rev_sexp_lst :: higher_sexp_stack -> \ | |
332 | state.pstack <- \ | |
333 | (sexp :: higher_rev_sexp_lst) :: higher_sexp_stack; \ | |
334 | bump_pos_cont state str ~max_pos ~pos PARSE) \ | |
335 | | ' ' | '\009' | '\012' -> bump_pos_cont state str ~max_pos ~pos PARSE \ | |
336 | | '\010' -> bump_line_cont state str ~max_pos ~pos PARSE \ | |
337 | | '\013' -> bump_line_cont state str ~max_pos ~pos parse_nl \ | |
338 | | ';' -> bump_pos_cont state str ~max_pos ~pos parse_comment \ | |
339 | | '"' -> bump_pos_cont state str ~max_pos ~pos parse_quoted \ | |
340 | | c -> add_bump_pos state str ~max_pos ~pos c parse_atom \ | |
341 | \ | |
342 | and parse_nl state str ~max_pos ~pos = \ | |
343 | if pos > max_pos then mk_cont "parse_nl" parse_nl state \ | |
344 | else \ | |
345 | let pos = if GET_CHAR = '\010' then pos + 1 else pos in \ | |
346 | PARSE state str ~max_pos ~pos \ | |
347 | \ | |
348 | and parse_comment state str ~max_pos ~pos = \ | |
349 | if pos > max_pos then mk_cont "parse_comment" parse_comment state \ | |
350 | else \ | |
351 | match GET_CHAR with \ | |
352 | | '\010' -> bump_line_cont state str ~max_pos ~pos PARSE \ | |
353 | | '\013' -> bump_line_cont state str ~max_pos ~pos parse_nl \ | |
354 | | _ -> bump_pos_cont state str ~max_pos ~pos parse_comment \ | |
355 | \ | |
356 | and parse_atom state str ~max_pos ~pos = \ | |
357 | if pos > max_pos then mk_cont "parse_atom" parse_atom state \ | |
358 | else \ | |
359 | match GET_CHAR with \ | |
360 | | ' ' | '\009' | '\012' -> \ | |
361 | bump_found_atom bump_text_pos state str ~max_pos ~pos PARSE \ | |
362 | | '(' -> \ | |
363 | let pbuf = state.pbuf in \ | |
364 | let atom = Atom (Buffer.contents pbuf) in \ | |
365 | (match state.pstack with \ | |
366 | | [] -> Done (atom, mk_parse_pos state pos) \ | |
367 | | rev_sexp_lst :: sexp_stack -> \ | |
368 | Buffer.clear pbuf; \ | |
369 | state.pstack <- [] :: (atom :: rev_sexp_lst) :: sexp_stack; \ | |
370 | bump_pos_cont state str ~max_pos ~pos PARSE) \ | |
371 | | ')' -> \ | |
372 | let pbuf = state.pbuf in \ | |
373 | let atom = Atom (Buffer.contents pbuf) in \ | |
374 | (match state.pstack with \ | |
375 | | [] -> Done (atom, mk_parse_pos state pos) \ | |
376 | | rev_sexp_lst :: sexp_stack -> \ | |
377 | let sexp = List (List.rev_append rev_sexp_lst [atom]) in \ | |
378 | match sexp_stack with \ | |
379 | | [] -> Done (sexp, mk_parse_pos state (pos + 1)) \ | |
380 | | higher_rev_sexp_lst :: higher_sexp_stack -> \ | |
381 | Buffer.clear pbuf; \ | |
382 | state.pstack <- \ | |
383 | (sexp :: higher_rev_sexp_lst) :: higher_sexp_stack; \ | |
384 | bump_pos_cont state str ~max_pos ~pos PARSE) \ | |
385 | | '\010' -> bump_found_atom bump_text_line state str ~max_pos ~pos PARSE \ | |
386 | | '\013' -> \ | |
387 | bump_found_atom bump_text_line state str ~max_pos ~pos parse_nl \ | |
388 | | ';' -> \ | |
389 | bump_found_atom bump_text_pos state str ~max_pos ~pos parse_comment \ | |
390 | | '"' -> \ | |
391 | bump_found_atom bump_text_pos state str ~max_pos ~pos parse_quoted \ | |
392 | | c -> add_bump_pos state str ~max_pos ~pos c parse_atom \ | |
393 | \ | |
394 | and parse_quoted state str ~max_pos ~pos = \ | |
395 | if pos > max_pos then mk_cont "parse_quoted" parse_quoted state \ | |
396 | else \ | |
397 | match GET_CHAR with \ | |
398 | | '"' -> \ | |
399 | let pbuf = state.pbuf in \ | |
400 | let atom = Atom (Buffer.contents pbuf) in \ | |
401 | (match state.pstack with \ | |
402 | | [] -> Done (atom, mk_parse_pos state (pos + 1)) \ | |
403 | | rev_sexp_lst :: sexp_stack -> \ | |
404 | Buffer.clear pbuf; \ | |
405 | state.pstack <- (atom :: rev_sexp_lst) :: sexp_stack; \ | |
406 | bump_pos_cont state str ~max_pos ~pos PARSE) \ | |
407 | | '\\' -> bump_pos_cont state str ~max_pos ~pos parse_escaped \ | |
408 | | '\010' as c -> add_bump_line state str ~max_pos ~pos c parse_quoted \ | |
409 | | '\013' as c -> add_bump_line state str ~max_pos ~pos c parse_quoted_nl \ | |
410 | | c -> add_bump_pos state str ~max_pos ~pos c parse_quoted \ | |
411 | \ | |
412 | and parse_quoted_nl state str ~max_pos ~pos = \ | |
413 | if pos > max_pos then mk_cont "parse_quoted_nl" parse_quoted_nl state \ | |
414 | else \ | |
415 | let pos = \ | |
416 | let c = '\010' in \ | |
417 | if GET_CHAR = c then ( \ | |
418 | Buffer.add_char state.pbuf c; \ | |
419 | pos + 1 \ | |
420 | ) \ | |
421 | else pos \ | |
422 | in \ | |
423 | parse_quoted state str ~max_pos ~pos \ | |
424 | \ | |
425 | and parse_escaped state str ~max_pos ~pos = \ | |
426 | if pos > max_pos then mk_cont "parse_escaped" parse_escaped state \ | |
427 | else \ | |
428 | match GET_CHAR with \ | |
429 | | '\010' -> bump_line_cont state str ~max_pos ~pos parse_skip_ws \ | |
430 | | '\013' -> bump_line_cont state str ~max_pos ~pos parse_skip_ws_nl \ | |
431 | | '0' .. '9' as c -> \ | |
432 | bump_text_pos state; \ | |
433 | let d = Char.code c - 48 in \ | |
434 | parse_dec state str ~max_pos ~pos:(pos + 1) ~count:2 ~d \ | |
435 | | 'x' -> \ | |
436 | bump_text_pos state; \ | |
437 | parse_hex state str ~max_pos ~pos:(pos + 1) ~count:2 ~d:0 \ | |
438 | | ('\\' | '"' | '\'' ) as c -> \ | |
439 | add_bump_pos state str ~max_pos ~pos c parse_quoted \ | |
440 | | 'n' -> add_bump_pos state str ~max_pos ~pos '\n' parse_quoted \ | |
441 | | 't' -> add_bump_pos state str ~max_pos ~pos '\t' parse_quoted \ | |
442 | | 'b' -> add_bump_pos state str ~max_pos ~pos '\b' parse_quoted \ | |
443 | | 'r' -> add_bump_pos state str ~max_pos ~pos '\r' parse_quoted \ | |
444 | | c -> \ | |
445 | Buffer.add_char state.pbuf '\\'; \ | |
446 | add_bump_pos state str ~max_pos ~pos c parse_quoted \ | |
447 | \ | |
448 | and parse_skip_ws state str ~max_pos ~pos = \ | |
449 | if pos > max_pos then mk_cont "parse_skip_ws" parse_skip_ws state \ | |
450 | else \ | |
451 | match GET_CHAR with \ | |
452 | | ' ' | '\009' -> bump_pos_cont state str ~max_pos ~pos parse_skip_ws \ | |
453 | | _ -> parse_quoted state str ~max_pos ~pos \ | |
454 | \ | |
455 | and parse_skip_ws_nl state str ~max_pos ~pos = \ | |
456 | if pos > max_pos then mk_cont "parse_skip_ws_nl" parse_skip_ws_nl state \ | |
457 | else \ | |
458 | let pos = if GET_CHAR = '\010' then pos + 1 else pos in \ | |
459 | parse_skip_ws state str ~max_pos ~pos \ | |
460 | \ | |
461 | and parse_dec state str ~max_pos ~pos ~count ~d = \ | |
462 | if pos > max_pos then mk_cont "parse_dec" (parse_dec ~count ~d) state \ | |
463 | else \ | |
464 | match GET_CHAR with \ | |
465 | | '0' .. '9' as c -> \ | |
466 | let d = 10 * d + Char.code c - 48 in \ | |
467 | if count = 1 then \ | |
468 | if d > 255 then \ | |
469 | let err_msg = sprintf "illegal decimal escape: \\%d" d in \ | |
470 | raise_parse_error state "parse_dec" err_msg \ | |
471 | else \ | |
472 | add_bump_pos state str ~max_pos ~pos (Char.chr d) parse_quoted \ | |
473 | else ( \ | |
474 | bump_text_pos state; \ | |
475 | parse_dec state str ~max_pos ~pos:(pos + 1) ~count:(count - 1) ~d) \ | |
476 | | c -> raise_unexpected_char state ~loc:"parse_dec" pos c \ | |
477 | \ | |
478 | and parse_hex state str ~max_pos ~pos ~count ~d = \ | |
479 | if pos > max_pos then mk_cont "parse_hex" (parse_hex ~count ~d) state \ | |
480 | else \ | |
481 | match GET_CHAR with \ | |
482 | | '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' as c -> \ | |
483 | let corr = \ | |
484 | if c >= 'a' then 87 \ | |
485 | else if c >= 'A' then 55 \ | |
486 | else 48 \ | |
487 | in \ | |
488 | let d = 16 * d + Char.code c - corr in \ | |
489 | if count = 1 then \ | |
490 | if d > 255 then \ | |
491 | let err_msg = sprintf "illegal hexadecimal escape: \\%x" d in \ | |
492 | raise_parse_error state "parse_hex" err_msg \ | |
493 | else \ | |
494 | add_bump_pos state str ~max_pos ~pos (Char.chr d) parse_quoted \ | |
495 | else ( \ | |
496 | bump_text_pos state; \ | |
497 | parse_hex state str ~max_pos ~pos:(pos + 1) ~count:(count - 1) ~d) \ | |
498 | | c -> raise_unexpected_char state ~loc:"parse_hex" pos c \ | |
499 | \ | |
500 | let PARSE ?(text_line = 1) ?(text_char = 1) ?(pos = 0) ?len str = \ | |
501 | let len = \ | |
502 | match len with \ | |
503 | | Some len -> len \ | |
504 | | None -> GET_LEN str - pos \ | |
505 | in \ | |
506 | let max_pos = check_str_bounds "parse" ~pos ~len str in \ | |
507 | let state = \ | |
508 | { \ | |
509 | parse_pos = \ | |
510 | { \ | |
511 | text_line = text_line; \ | |
512 | text_char = text_char; \ | |
513 | buf_pos = pos; \ | |
514 | }; \ | |
515 | pstack = []; \ | |
516 | pbuf = Buffer.create 128; \ | |
517 | } \ | |
518 | in \ | |
519 | PARSE state str ~max_pos ~pos | |
520 | ||
521 | MK_PARSER(string, String.length, parse_str, str.[pos]) | |
522 | ||
523 | let parse = parse_str | |
524 | ||
525 | let plain_parse ~pos ~len str = parse ~pos ~len str | |
526 | ||
527 | ||
528 | (* Partial parsing from bigstrings *) | |
529 | ||
530 | (* NOTE: this is really an awful duplication of the code for parsing | |
531 | strings, but since OCaml does not inline higher-order functions known | |
532 | at compile, other solutions would sacrifice a lot of efficiency. *) | |
533 | ||
534 | open Bigarray | |
535 | ||
536 | type bstr = (char, int8_unsigned_elt, c_layout) Array1.t | |
537 | ||
538 | MK_PARSER(bstr, Array1.dim, parse_bstr, str.{pos}) | |
539 | ||
540 | ||
541 | (* Input functions *) | |
542 | ||
543 | let reraise_parse_error pe global_pos = | |
544 | let ps = pe.parse_state in | |
545 | let ppos = ps.parse_pos in | |
546 | let new_ppos = { ppos with buf_pos = global_pos + ppos.buf_pos } in | |
547 | let new_ps = { ps with parse_pos = new_ppos } in | |
548 | let new_pe = { pe with parse_state = new_ps } in | |
549 | raise (ParseError new_pe) | |
550 | ||
551 | let input_sexp ?text_line ?text_char ?(buf_pos = 0) ic = | |
552 | let buf = String.create 1 in | |
553 | let rec loop this_parse = | |
554 | let c = input_char ic in | |
555 | buf.[0] <- c; | |
556 | let parse_res = | |
557 | try this_parse ~pos:0 ~len:1 buf | |
558 | with ParseError pe -> reraise_parse_error pe buf_pos | |
559 | in | |
560 | match parse_res with | |
561 | | Done (sexp, _) -> sexp | |
562 | | Cont (_, this_parse) -> loop this_parse | |
563 | in | |
564 | let this_parse ~pos ~len str = parse ?text_line ?text_char ~pos ~len str in | |
565 | loop this_parse | |
566 | ||
567 | let input_rev_sexps | |
568 | ?text_line ?text_char | |
569 | ?(buf_pos = 0) ?(buf = String.create 8192) ic = | |
570 | let rev_sexps_ref = ref [] in | |
571 | let buf_len = String.length buf in | |
572 | let is_incomplete_ref = ref false in | |
573 | let buf_pos_ref = ref buf_pos in | |
574 | let rec loop this_parse pos len = | |
575 | if len > 0 then | |
576 | let parse_res = | |
577 | try this_parse ~pos ~len buf | |
578 | with ParseError pe -> reraise_parse_error pe !buf_pos_ref | |
579 | in | |
580 | match parse_res with | |
581 | | Done (sexp, new_pos) -> | |
582 | rev_sexps_ref := sexp :: !rev_sexps_ref; | |
583 | let n_parsed = new_pos.buf_pos - pos in | |
584 | is_incomplete_ref := false; | |
585 | let text_line = new_pos.text_line in | |
586 | let text_char = new_pos.text_char in | |
587 | let this_parse ~pos ~len str = | |
588 | parse ~text_line ~text_char ~pos ~len str | |
589 | in | |
590 | if n_parsed = len then | |
591 | let new_len = input ic buf 0 buf_len in | |
592 | buf_pos_ref := !buf_pos_ref + new_pos.buf_pos; | |
593 | loop this_parse 0 new_len | |
594 | else loop this_parse new_pos.buf_pos (len - n_parsed) | |
595 | | Cont (ws_only, this_parse) -> | |
596 | is_incomplete_ref := not ws_only; | |
597 | buf_pos_ref := !buf_pos_ref + len + pos; | |
598 | loop this_parse 0 (input ic buf 0 buf_len) | |
599 | else if !is_incomplete_ref then raise End_of_file | |
600 | else !rev_sexps_ref | |
601 | in | |
602 | let this_parse ~pos ~len str = parse ?text_line ?text_char ~pos ~len str in | |
603 | loop this_parse 0 (input ic buf 0 buf_len) | |
604 | ||
605 | let input_sexps ?text_line ?text_char ?buf_pos ?buf ic = | |
606 | let rev_sexps = input_rev_sexps ?text_line ?text_char ?buf_pos ?buf ic in | |
607 | List.rev rev_sexps | |
608 | ||
609 | (* of_string and of_bstr *) | |
610 | ||
611 | let of_string_bstr loc this_parse ws_buf get_len get_sub str = | |
612 | match this_parse str with | |
613 | | Done (_, { buf_pos = buf_pos }) when buf_pos <> get_len str -> | |
614 | let prefix_len = min (get_len str - buf_pos) 20 in | |
615 | let prefix = get_sub str buf_pos prefix_len in | |
616 | let msg = | |
617 | sprintf "%s: S-expression followed by data at position %d: %S..." | |
618 | loc buf_pos prefix | |
619 | in | |
620 | failwith msg | |
621 | | Done (sexp, _) -> sexp | |
622 | | Cont (ws_only, this_parse) -> | |
623 | if ws_only then failwith (loc ^ ": whitespace only"); | |
624 | match this_parse ~pos:0 ~len:1 ws_buf with | |
625 | | Done (sexp, _) -> sexp | |
626 | | Cont _ -> failwith (loc ^ ": incomplete S-expression") | |
627 | ||
628 | let of_string str = | |
629 | of_string_bstr "Sexp.of_string" parse " " String.length String.sub str | |
630 | ||
631 | let get_bstr_sub_str bstr pos len = | |
632 | let str = String.create len in | |
633 | for i = 0 to len - 1 do str.[i] <- bstr.{pos + i} done; | |
634 | str | |
635 | ||
636 | let bstr_ws_buf = Array1.create char c_layout 1 | |
637 | let () = bstr_ws_buf.{0} <- ' ' | |
638 | ||
639 | let of_bstr bstr = | |
640 | of_string_bstr "Sexp.of_bstr" | |
641 | parse_bstr bstr_ws_buf Array1.dim get_bstr_sub_str bstr | |
642 | ||
643 | (* Loading *) | |
644 | ||
645 | let load_sexp ?(buf = String.create 8192) file = | |
646 | let buf_len = String.length buf in | |
647 | let ic = open_in file in | |
648 | let rec loop this_parse = | |
649 | let len = input ic buf 0 buf_len in | |
650 | if len = 0 then raise End_of_file | |
651 | else | |
652 | match this_parse ~pos:0 ~len buf with | |
653 | | Done (sexp, _) -> sexp | |
654 | | Cont (_, this_parse) -> loop this_parse | |
655 | in | |
656 | try | |
657 | let sexp = loop plain_parse in | |
658 | close_in ic; | |
659 | sexp | |
660 | with exc -> close_in_noerr ic; raise exc | |
661 | ||
662 | let load_rev_sexps ?buf file = | |
663 | let ic = open_in file in | |
664 | try | |
665 | let sexps = input_rev_sexps ?buf ic in | |
666 | close_in ic; | |
667 | sexps | |
668 | with exc -> close_in_noerr ic; raise exc | |
669 | ||
670 | let load_sexps ?buf file = | |
671 | let rev_sexps = load_rev_sexps ?buf file in | |
672 | List.rev rev_sexps | |
673 | ||
674 | ||
675 | (* Utilities for automated type conversions *) | |
676 | ||
677 | let unit = List [] | |
678 | ||
679 | external sexp_of_t : t -> t = "%identity" | |
680 | external t_of_sexp : t -> t = "%identity" |