Coccinelle release 1.0.0-rc12
[bpt/coccinelle.git] / bundles / sexplib / sexplib-7.0.5 / lib / pre_sexp.ml
CommitLineData
feec80c3
C
1(******************************************************************************
2 * Sexplib *
3 * *
4 * Copyright (C) 2005- Jane Street Holding, LLC *
5 * Contact: opensource@janestreet.com *
6 * WWW: http://www.janestreet.com/ocaml *
7 * Author: Markus Mottl *
8 * *
9 * This library is free software; you can redistribute it and/or *
10 * modify it under the terms of the GNU Lesser General Public *
11 * License as published by the Free Software Foundation; either *
12 * version 2 of the License, or (at your option) any later version. *
13 * *
14 * This library is distributed in the hope that it will be useful, *
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
17 * Lesser General Public License for more details. *
18 * *
19 * You should have received a copy of the GNU Lesser General Public *
20 * License along with this library; if not, write to the Free Software *
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *
22 * *
23 ******************************************************************************)
24
25(* Sexp: Module for handling S-expressions (I/O, etc.) *)
26
27open Format
28open Bigarray
29
30include Type
31
32exception Of_sexp_error of exn * t
33
34type bigstring = (char, int8_unsigned_elt, c_layout) Array1.t
35
36
37(* Default indentation level for human-readable conversions *)
38
39let default_indent = ref 1
40
41
42(* Escaping of strings used as atoms in S-expressions *)
43
44let is_special_char c =
45 c <= ' ' || c = '"' || c = '(' || c = ')' || c = ';' || c = '\\'
46
47let must_escape str =
48 let len = String.length str in
49 len = 0 ||
50 let rec loop ix = is_special_char str.[ix] || ix > 0 && loop (ix - 1) in
51 loop (len - 1)
52
53let maybe_esc_str str =
54 if must_escape str then
55 let estr = String.escaped str in
56 let elen = String.length estr in
57 let res = String.create (elen + 2) in
58 String.blit estr 0 res 1 elen;
59 res.[0] <- '"';
60 res.[elen + 1] <- '"';
61 res
62 else str
63
64let pp_maybe_esc_str ppf str = pp_print_string ppf (maybe_esc_str str)
65
66
67(* Output of S-expressions to formatters *)
68
69let rec pp_hum_indent indent ppf = function
70 | Atom str -> pp_maybe_esc_str ppf str
71 | List (h :: t) ->
72 pp_open_box ppf indent;
73 pp_print_string ppf "(";
74 pp_hum_indent indent ppf h;
75 pp_hum_rest indent ppf t
76 | List [] -> pp_print_string ppf "()"
77
78and pp_hum_rest indent ppf = function
79 | h :: t ->
80 pp_print_space ppf ();
81 pp_hum_indent indent ppf h;
82 pp_hum_rest indent ppf t
83 | [] ->
84 pp_print_string ppf ")";
85 pp_close_box ppf ()
86
87let rec pp_mach_internal may_need_space ppf = function
88 | Atom str ->
89 let str' = maybe_esc_str str in
90 let new_may_need_space = str' == str in
91 if may_need_space && new_may_need_space then pp_print_string ppf " ";
92 pp_print_string ppf str';
93 new_may_need_space
94 | List (h :: t) ->
95 pp_print_string ppf "(";
96 let may_need_space = pp_mach_internal false ppf h in
97 pp_mach_rest may_need_space ppf t;
98 false
99 | List [] -> pp_print_string ppf "()"; false
100
101and pp_mach_rest may_need_space ppf = function
102 | h :: t ->
103 let may_need_space = pp_mach_internal may_need_space ppf h in
104 pp_mach_rest may_need_space ppf t
105 | [] -> pp_print_string ppf ")"
106
107let pp_hum ppf sexp = pp_hum_indent !default_indent ppf sexp
108
109let pp_mach ppf sexp = ignore (pp_mach_internal false ppf sexp)
110let pp = pp_mach
111
112
113(* Sexp size *)
114
115let rec size_loop (v, c as acc) = function
116 | Atom str -> v + 1, c + String.length str
117 | List lst -> List.fold_left size_loop acc lst
118
119let size sexp = size_loop (0, 0) sexp
120
121
122(* Buffer conversions *)
123
124let to_buffer_hum ~buf ?(indent = !default_indent) sexp =
125 Format.bprintf buf "%a@?" (pp_hum_indent indent) sexp
126
127let to_buffer_mach ~buf sexp =
128 let rec loop may_need_space = function
129 | Atom str ->
130 let str' = maybe_esc_str str in
131 let new_may_need_space = str' == str in
132 if may_need_space && new_may_need_space then Buffer.add_char buf ' ';
133 Buffer.add_string buf str';
134 new_may_need_space
135 | List (h :: t) ->
136 Buffer.add_char buf '(';
137 let may_need_space = loop false h in
138 loop_rest may_need_space t;
139 false
140 | List [] -> Buffer.add_string buf "()"; false
141 and loop_rest may_need_space = function
142 | h :: t ->
143 let may_need_space = loop may_need_space h in
144 loop_rest may_need_space t
145 | [] -> Buffer.add_char buf ')' in
146 ignore (loop false sexp)
147
148let to_buffer = to_buffer_mach
149
150
151(* Output of S-expressions to I/O-channels *)
152
153let buffer () = Buffer.create 4096
154
155let with_new_buffer oc f =
156 let buf = buffer () in
157 f buf;
158 Buffer.output_buffer oc buf
159
160let output_hum oc sexp =
161 with_new_buffer oc (fun buf -> to_buffer_hum sexp ~buf)
162
163let output_hum_indent indent oc sexp =
164 with_new_buffer oc (fun buf -> to_buffer_hum ~indent sexp ~buf)
165
166let output_mach oc sexp =
167 with_new_buffer oc (fun buf -> to_buffer_mach sexp ~buf)
168
169let output = output_mach
170
171
172(* Output of S-expressions to file *)
173
174let save_of_output ?perm output_function file sexp =
175 let tmp_name, oc = Filename.open_temp_file file "tmp" in
176 try
177 output_function oc sexp;
178 close_out oc;
179 let perm =
180 match perm with
181 | Some perm -> perm
182 | None ->
183 let umask = Unix.umask 0 in
184 ignore (Unix.umask umask);
185 umask lxor 0o666
186 in
187 if perm <> 0o600 then Unix.chmod tmp_name perm;
188 Sys.rename tmp_name file
189 with
190 e ->
191 close_out_noerr oc;
192 raise e
193
194let output_sexp_nl do_output oc sexp =
195 do_output oc sexp;
196 output_string oc "\n"
197
198let save_hum ?perm file sexp =
199 save_of_output ?perm (output_sexp_nl output_hum) file sexp
200
201let save_mach ?perm file sexp = save_of_output ?perm output_mach file sexp
202let save = save_mach
203
204let output_sexps_nl do_output oc sexps =
205 List.iter (output_sexp_nl do_output oc) sexps
206
207let save_sexps_hum ?perm file sexps =
208 save_of_output ?perm (output_sexps_nl output_hum) file sexps
209
210let save_sexps_mach ?perm file sexps =
211 save_of_output ?perm (output_sexps_nl output_mach) file sexps
212
213let save_sexps = save_sexps_mach
214
215
216(* String conversions *)
217
218let to_string_hum ?indent = function
219 | Atom str -> maybe_esc_str str
220 | sexp ->
221 let buf = buffer () in
222 to_buffer_hum ?indent sexp ~buf;
223 Buffer.contents buf
224
225let to_string_mach = function
226 | Atom str -> maybe_esc_str str
227 | sexp ->
228 let buf = buffer () in
229 to_buffer_mach sexp ~buf;
230 Buffer.contents buf
231
232let to_string = to_string_mach
233
234
235(* Scan functions *)
236
237let scan_sexp ?buf lexbuf = Parser.sexp (Lexer.main ?buf) lexbuf
238let scan_sexps ?buf lexbuf = Parser.sexps (Lexer.main ?buf) lexbuf
239
240let get_main_buf buf =
241 let buf =
242 match buf with
243 | None -> Buffer.create 64
244 | Some buf -> buf in
245 Lexer.main ~buf
246
247let scan_fold_sexps ?buf ~f ~init lexbuf =
248 let main = get_main_buf buf in
249 let rec loop acc =
250 match Parser.sexp_opt main lexbuf with
251 | None -> acc
252 | Some sexp -> loop (f acc sexp) in
253 loop init
254
255let scan_iter_sexps ?buf ~f lexbuf =
256 scan_fold_sexps ?buf lexbuf ~init:() ~f:(fun () sexp -> f sexp)
257
258let scan_sexps_conv ?buf ~f lexbuf =
259 let coll acc sexp = f sexp :: acc in
260 List.rev (scan_fold_sexps ?buf ~f:coll ~init:[] lexbuf)
261
262
263(* Partial parsing *)
264
265module Annot = struct
266 type pos = { line : int; col : int; offset : int }
267 type range = { start_pos : pos; end_pos : pos }
268 type t = Atom of range * Type.t | List of range * t list * Type.t
269 type 'a conv = [ `Result of 'a | `Error of exn * t ]
270
271 exception Conv_exn of string * exn
272
273 type stack = {
274 mutable positions : pos list;
275 mutable stack : t list list;
276 }
277
278 let get_sexp = function Atom (_, sexp) | List (_, _, sexp) -> sexp
279 let get_range = function Atom (range, _) | List (range, _, _) -> range
280
281 exception Annot_sexp of t
282
283 let find_sexp annot_sexp sexp =
284 let rec loop annot_sexp =
285 match annot_sexp with
286 | Atom (_, sub_sexp)
287 | List (_, _, sub_sexp) when sexp == sub_sexp ->
288 raise (Annot_sexp annot_sexp)
289 | List (_, annots, _) -> List.iter loop annots
290 | Atom _ -> ()
291 in
292 try loop annot_sexp; None
293 with Annot_sexp res -> Some res
294end
295
296module Parse_pos = struct
297 type t =
298 {
299 mutable text_line : int;
300 mutable text_char : int;
301 mutable global_offset : int;
302 mutable buf_pos : int;
303 }
304
305 let create
306 ?(text_line = 1) ?(text_char = 0)
307 ?(buf_pos = 0) ?(global_offset = 0) () =
308 let fail msg = failwith ("Sexplib.Sexp.Parse_pos.create: " ^ msg) in
309 if text_line < 1 then fail "text_line < 1"
310 else if text_char < 0 then fail "text_char < 0"
311 else if global_offset < 0 then fail "global_offset < 0"
312 else if buf_pos < 0 then fail "buf_pos < 0"
313 else { text_line = text_line; text_char = text_char; global_offset = global_offset; buf_pos = buf_pos }
314
315 let with_buf_pos t buf_pos = { t with buf_pos = buf_pos }
316end
317
318type ('a, 't) parse_result =
319 | Done of 't * Parse_pos.t
320 | Cont of bool * ('a, 't) parse_fun
321
322and ('a, 't) parse_fun = pos : int -> len : int -> 'a -> ('a, 't) parse_result
323
324type 't parse_state =
325 {
326 parse_pos : Parse_pos.t;
327 mutable pstack : 't;
328 pbuf : Buffer.t;
329 }
330
331type parse_error =
332 {
333 location : string;
334 err_msg : string;
335 parse_state :
336 [
337 | `Sexp of t list list parse_state
338 | `Annot of Annot.stack parse_state
339 ]
340 }
341
342exception Parse_error of parse_error
343
344let bump_text_line { parse_pos = parse_pos } =
345 parse_pos.Parse_pos.text_line <- parse_pos.Parse_pos.text_line + 1;
346 parse_pos.Parse_pos.text_char <- 0
347
348let bump_text_pos { parse_pos = parse_pos } =
349 parse_pos.Parse_pos.text_char <- parse_pos.Parse_pos.text_char + 1
350
351let bump_pos_cont state str ~max_pos ~pos cont =
352 bump_text_pos state;
353 cont state str ~max_pos ~pos:(pos + 1)
354
355let bump_line_cont state str ~max_pos ~pos cont =
356 bump_text_line state;
357 cont state str ~max_pos ~pos:(pos + 1)
358
359let add_bump bump state str ~max_pos ~pos c cont =
360 Buffer.add_char state.pbuf c;
361 bump state;
362 cont state str ~max_pos ~pos:(pos + 1)
363
364let add_bump_pos state str ~max_pos ~pos c cont =
365 add_bump bump_text_pos state str ~max_pos ~pos c cont
366
367let add_bump_line state str ~max_pos ~pos c cont =
368 add_bump bump_text_line state str ~max_pos ~pos c cont
369
370let set_parse_pos parse_pos buf_pos =
371 let len = buf_pos - parse_pos.Parse_pos.buf_pos in
372 parse_pos.Parse_pos.buf_pos <- buf_pos;
373 parse_pos.Parse_pos.global_offset <- parse_pos.Parse_pos.global_offset + len
374
375let mk_parse_pos { parse_pos = parse_pos } buf_pos =
376 set_parse_pos parse_pos buf_pos;
377 parse_pos
378
379let raise_parse_error parse_state location buf_pos err_msg =
380 begin
381 match parse_state with
382 | `Sexp { parse_pos = parse_pos } | `Annot { parse_pos = parse_pos } ->
383 set_parse_pos parse_pos buf_pos;
384 parse_pos.Parse_pos.text_char <- parse_pos.Parse_pos.text_char + 1;
385 end;
386 let parse_error = { location = location; err_msg = err_msg; parse_state = parse_state } in
387 raise (Parse_error parse_error)
388
389let raise_unexpected_char parse_state location buf_pos c =
390 let err_msg = sprintf "unexpected character: '%c'" c in
391 raise_parse_error parse_state location buf_pos err_msg
392
393(* The code below is derived from lexer.mll in the OCaml distribution,
394 which also uses ASCII codes instead of escape sequences to denote
395 special characters. *)
396
397(* Macro for generating parsers *)
398#define MK_PARSER( \
399 TYPE, GET_LEN, PARSE, GET_CHAR, \
400 GET_PSTACK, SET_PSTACK, \
401 REGISTER_POS, REGISTER_POS1, \
402 MK_ATOM, MK_LIST, INIT_PSTACK, MK_PARSE_STATE) \
403 let bump_found_atom bump state str ~max_pos ~pos cont = \
404 let pbuf = state.pbuf in \
405 let pbuf_str = Buffer.contents pbuf in \
406 let atom = MK_ATOM in \
407 match GET_PSTACK with \
408 | [] -> Done (atom, mk_parse_pos state pos) \
409 | rev_sexp_lst :: sexp_stack -> \
410 Buffer.clear pbuf; \
411 let pstack = (atom :: rev_sexp_lst) :: sexp_stack in \
412 SET_PSTACK; \
413 bump state; \
414 cont state str ~max_pos ~pos:(pos + 1) \
415 \
416 let check_str_bounds loc ~pos ~len (str : TYPE) = \
417 if pos < 0 then invalid_arg (loc ^ ": pos < 0"); \
418 if len < 0 then invalid_arg (loc ^ ": len < 0"); \
419 let str_len = GET_LEN str in \
420 let pos_len = pos + len in \
421 if pos_len > str_len then invalid_arg (loc ^ ": pos + len > str_len"); \
422 pos_len - 1 \
423 \
424 let mk_cont name cont state = \
425 let ws_only = GET_PSTACK = [] && Buffer.length state.pbuf = 0 in \
426 let parse_fun = \
427 let used_ref = ref false in \
428 fun ~pos ~len str -> \
429 if !used_ref then \
430 failwith "Sexplib.Sexp: parser continuation called twice" \
431 else begin \
432 used_ref := true; \
433 let max_pos = check_str_bounds name ~pos ~len str in \
434 cont state str ~max_pos ~pos \
435 end \
436 in \
437 Cont (ws_only, parse_fun) \
438 \
439 let rec PARSE state str ~max_pos ~pos = \
440 if pos > max_pos then mk_cont "parse" PARSE state \
441 else \
442 match GET_CHAR with \
443 | '(' -> \
444 REGISTER_POS \
445 let pstack = [] :: GET_PSTACK in \
446 SET_PSTACK; \
447 bump_pos_cont state str ~max_pos ~pos PARSE \
448 | ')' as c -> \
449 (match GET_PSTACK with \
450 | [] -> raise_unexpected_char (MK_PARSE_STATE state) "parse" pos c \
451 | rev_sexp_lst :: sexp_stack -> \
452 let sexp_lst = List.rev rev_sexp_lst in \
453 let sexp = MK_LIST in \
454 match sexp_stack with \
455 | [] -> Done (sexp, mk_parse_pos state (pos + 1)) \
456 | higher_rev_sexp_lst :: higher_sexp_stack -> \
457 let pstack = \
458 (sexp :: higher_rev_sexp_lst) :: higher_sexp_stack \
459 in \
460 SET_PSTACK; \
461 bump_pos_cont state str ~max_pos ~pos PARSE) \
462 | ' ' | '\009' | '\012' -> bump_pos_cont state str ~max_pos ~pos PARSE \
463 | '\010' -> bump_line_cont state str ~max_pos ~pos PARSE \
464 | '\013' -> bump_line_cont state str ~max_pos ~pos parse_nl \
465 | ';' -> bump_pos_cont state str ~max_pos ~pos parse_comment \
466 | '"' -> \
467 REGISTER_POS1 \
468 bump_pos_cont state str ~max_pos ~pos parse_quoted \
469 | c -> \
470 REGISTER_POS \
471 add_bump_pos state str ~max_pos ~pos c parse_atom \
472 \
473 and parse_nl state str ~max_pos ~pos = \
474 if pos > max_pos then mk_cont "parse_nl" parse_nl state \
475 else \
476 let pos = if GET_CHAR = '\010' then pos + 1 else pos in \
477 PARSE state str ~max_pos ~pos \
478 \
479 and parse_comment state str ~max_pos ~pos = \
480 if pos > max_pos then mk_cont "parse_comment" parse_comment state \
481 else \
482 match GET_CHAR with \
483 | '\010' -> bump_line_cont state str ~max_pos ~pos PARSE \
484 | '\013' -> bump_line_cont state str ~max_pos ~pos parse_nl \
485 | _ -> bump_pos_cont state str ~max_pos ~pos parse_comment \
486 \
487 and parse_atom state str ~max_pos ~pos = \
488 if pos > max_pos then mk_cont "parse_atom" parse_atom state \
489 else \
490 match GET_CHAR with \
491 | ' ' | '\009' | '\012' -> \
492 bump_found_atom bump_text_pos state str ~max_pos ~pos PARSE \
493 | '(' -> \
494 let pbuf = state.pbuf in \
495 let pbuf_str = Buffer.contents pbuf in \
496 let atom = MK_ATOM in \
497 (match GET_PSTACK with \
498 | [] -> Done (atom, mk_parse_pos state pos) \
499 | rev_sexp_lst :: sexp_stack -> \
500 REGISTER_POS \
501 Buffer.clear pbuf; \
502 let pstack = [] :: (atom :: rev_sexp_lst) :: sexp_stack in \
503 SET_PSTACK; \
504 bump_pos_cont state str ~max_pos ~pos PARSE) \
505 | ')' -> \
506 let pbuf = state.pbuf in \
507 let pbuf_str = Buffer.contents pbuf in \
508 let atom = MK_ATOM in \
509 (match GET_PSTACK with \
510 | [] -> Done (atom, mk_parse_pos state pos) \
511 | rev_sexp_lst :: sexp_stack -> \
512 let sexp_lst = List.rev_append rev_sexp_lst [atom] in \
513 let sexp = MK_LIST in \
514 match sexp_stack with \
515 | [] -> Done (sexp, mk_parse_pos state (pos + 1)) \
516 | higher_rev_sexp_lst :: higher_sexp_stack -> \
517 Buffer.clear pbuf; \
518 let pstack = \
519 (sexp :: higher_rev_sexp_lst) :: higher_sexp_stack \
520 in \
521 SET_PSTACK; \
522 bump_pos_cont state str ~max_pos ~pos PARSE) \
523 | '\010' -> bump_found_atom bump_text_line state str ~max_pos ~pos PARSE \
524 | '\013' -> \
525 bump_found_atom bump_text_line state str ~max_pos ~pos parse_nl \
526 | ';' -> \
527 bump_found_atom bump_text_pos state str ~max_pos ~pos parse_comment \
528 | '"' -> \
529 bump_found_atom \
530 bump_text_pos state str ~max_pos ~pos reg_parse_quoted \
531 | c -> add_bump_pos state str ~max_pos ~pos c parse_atom \
532 \
533 and reg_parse_quoted state str ~max_pos ~pos = \
534 REGISTER_POS \
535 parse_quoted state str ~max_pos ~pos \
536 and parse_quoted state str ~max_pos ~pos = \
537 if pos > max_pos then mk_cont "parse_quoted" parse_quoted state \
538 else \
539 match GET_CHAR with \
540 | '"' -> \
541 let pbuf = state.pbuf in \
542 let pbuf_str = Buffer.contents pbuf in \
543 let atom = MK_ATOM in \
544 (match GET_PSTACK with \
545 | [] -> Done (atom, mk_parse_pos state (pos + 1)) \
546 | rev_sexp_lst :: sexp_stack -> \
547 Buffer.clear pbuf; \
548 let pstack = (atom :: rev_sexp_lst) :: sexp_stack in \
549 SET_PSTACK; \
550 bump_pos_cont state str ~max_pos ~pos PARSE) \
551 | '\\' -> bump_pos_cont state str ~max_pos ~pos parse_escaped \
552 | '\010' as c -> add_bump_line state str ~max_pos ~pos c parse_quoted \
553 | '\013' as c -> add_bump_line state str ~max_pos ~pos c parse_quoted_nl \
554 | c -> add_bump_pos state str ~max_pos ~pos c parse_quoted \
555 \
556 and parse_quoted_nl state str ~max_pos ~pos = \
557 if pos > max_pos then mk_cont "parse_quoted_nl" parse_quoted_nl state \
558 else \
559 let pos = \
560 let c = '\010' in \
561 if GET_CHAR = c then ( \
562 Buffer.add_char state.pbuf c; \
563 pos + 1 \
564 ) \
565 else pos \
566 in \
567 parse_quoted state str ~max_pos ~pos \
568 \
569 and parse_escaped state str ~max_pos ~pos = \
570 if pos > max_pos then mk_cont "parse_escaped" parse_escaped state \
571 else \
572 match GET_CHAR with \
573 | '\010' -> bump_line_cont state str ~max_pos ~pos parse_skip_ws \
574 | '\013' -> bump_line_cont state str ~max_pos ~pos parse_skip_ws_nl \
575 | '0' .. '9' as c -> \
576 bump_text_pos state; \
577 let d = Char.code c - 48 in \
578 parse_dec state str ~max_pos ~pos:(pos + 1) ~count:2 ~d \
579 | 'x' -> \
580 bump_text_pos state; \
581 parse_hex state str ~max_pos ~pos:(pos + 1) ~count:2 ~d:0 \
582 | ('\\' | '"' | '\'' ) as c -> \
583 add_bump_pos state str ~max_pos ~pos c parse_quoted \
584 | 'n' -> add_bump_pos state str ~max_pos ~pos '\n' parse_quoted \
585 | 't' -> add_bump_pos state str ~max_pos ~pos '\t' parse_quoted \
586 | 'b' -> add_bump_pos state str ~max_pos ~pos '\b' parse_quoted \
587 | 'r' -> add_bump_pos state str ~max_pos ~pos '\r' parse_quoted \
588 | c -> \
589 Buffer.add_char state.pbuf '\\'; \
590 add_bump_pos state str ~max_pos ~pos c parse_quoted \
591 \
592 and parse_skip_ws state str ~max_pos ~pos = \
593 if pos > max_pos then mk_cont "parse_skip_ws" parse_skip_ws state \
594 else \
595 match GET_CHAR with \
596 | ' ' | '\009' -> bump_pos_cont state str ~max_pos ~pos parse_skip_ws \
597 | _ -> parse_quoted state str ~max_pos ~pos \
598 \
599 and parse_skip_ws_nl state str ~max_pos ~pos = \
600 if pos > max_pos then mk_cont "parse_skip_ws_nl" parse_skip_ws_nl state \
601 else \
602 let pos = if GET_CHAR = '\010' then pos + 1 else pos in \
603 parse_skip_ws state str ~max_pos ~pos \
604 \
605 and parse_dec state str ~max_pos ~pos ~count ~d = \
606 if pos > max_pos then mk_cont "parse_dec" (parse_dec ~count ~d) state \
607 else \
608 match GET_CHAR with \
609 | '0' .. '9' as c -> \
610 let d = 10 * d + Char.code c - 48 in \
611 if count = 1 then \
612 if d > 255 then \
613 let err_msg = sprintf "illegal decimal escape: \\%d" d in \
614 raise_parse_error (MK_PARSE_STATE state) "parse_dec" pos err_msg \
615 else \
616 add_bump_pos state str ~max_pos ~pos (Char.chr d) parse_quoted \
617 else ( \
618 bump_text_pos state; \
619 parse_dec state str ~max_pos ~pos:(pos + 1) ~count:(count - 1) ~d) \
620 | c -> raise_unexpected_char (MK_PARSE_STATE state) "parse_dec" pos c \
621 \
622 and parse_hex state str ~max_pos ~pos ~count ~d = \
623 if pos > max_pos then mk_cont "parse_hex" (parse_hex ~count ~d) state \
624 else \
625 match GET_CHAR with \
626 | '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' as c -> \
627 let corr = \
628 if c >= 'a' then 87 \
629 else if c >= 'A' then 55 \
630 else 48 \
631 in \
632 let d = 16 * d + Char.code c - corr in \
633 if count = 1 then \
634 if d > 255 then \
635 let err_msg = sprintf "illegal hexadecimal escape: \\%x" d in \
636 raise_parse_error (MK_PARSE_STATE state) "parse_hex" pos err_msg \
637 else \
638 add_bump_pos state str ~max_pos ~pos (Char.chr d) parse_quoted \
639 else ( \
640 bump_text_pos state; \
641 parse_hex state str ~max_pos ~pos:(pos + 1) ~count:(count - 1) ~d) \
642 | c -> raise_unexpected_char (MK_PARSE_STATE state) "parse_hex" pos c \
643 \
644 let PARSE ?(parse_pos = Parse_pos.create ()) ?len str = \
645 let pos = parse_pos.Parse_pos.buf_pos in \
646 let len = \
647 match len with \
648 | Some len -> len \
649 | None -> GET_LEN str - pos \
650 in \
651 let max_pos = check_str_bounds "parse" ~pos ~len str in \
652 let state = \
653 { \
654 parse_pos = parse_pos; \
655 pstack = INIT_PSTACK; \
656 pbuf = Buffer.create 128; \
657 } \
658 in \
659 PARSE state str ~max_pos ~pos
660
661MK_PARSER(
662 string, String.length, parse_str, str.[pos],
663 state.pstack, state.pstack <- pstack,
664 ,,
665 Atom pbuf_str, List sexp_lst, [], `Sexp
666)
667
668let parse = parse_str
669
670
671(* Annot parsers *)
672
673let get_glob_ofs parse_pos pos =
674 parse_pos.Parse_pos.global_offset + pos - parse_pos.Parse_pos.buf_pos
675
676let mk_annot_pos
677 ({ Parse_pos.text_line = line; text_char = col } as parse_pos) pos =
678 { Annot.line = line; col = col; offset = get_glob_ofs parse_pos pos }
679
680let mk_annot_pos1
681 ({ Parse_pos.text_line = line; text_char = col } as parse_pos) pos =
682 { Annot.line = line; col = col + 1; offset = get_glob_ofs parse_pos pos }
683
684let add_annot_pos { parse_pos = parse_pos; pstack = pstack } pos =
685 pstack.Annot.positions <- mk_annot_pos parse_pos pos :: pstack.Annot.positions
686
687let add_annot_pos1 { parse_pos = parse_pos; pstack = pstack } pos =
688 pstack.Annot.positions <-
689 mk_annot_pos1 parse_pos pos :: pstack.Annot.positions
690
691let get_annot_range { parse_pos = parse_pos; pstack = pstack } pos =
692 let start_pos =
693 match pstack.Annot.positions with
694 | [] -> assert false (* impossible *)
695 | h :: t -> pstack.Annot.positions <- t; h
696 in
697 let end_pos =
698 {
699 Annot.
700 line = parse_pos.Parse_pos.text_line;
701 col = parse_pos.Parse_pos.text_char;
702 offset = get_glob_ofs parse_pos pos;
703 }
704 in
705 { Annot.start_pos = start_pos; end_pos = end_pos }
706
707let mk_annot_atom parse_state str pos =
708 Annot.Atom (get_annot_range parse_state pos, Atom str)
709
710let mk_annot_list parse_state annot_lst pos =
711 let range = get_annot_range parse_state pos in
712 let sexp = List (List.rev (List.rev_map Annot.get_sexp annot_lst)) in
713 Annot.List (range, annot_lst, sexp)
714
715let init_annot_pstate () = { Annot.positions = []; stack = [] }
716
717MK_PARSER(
718 string, String.length, parse_str_annot, str.[pos],
719 state.pstack.Annot.stack, state.pstack.Annot.stack <- pstack,
720 add_annot_pos state pos;,add_annot_pos1 state pos;,
721 mk_annot_atom state pbuf_str pos, mk_annot_list state sexp_lst pos,
722 init_annot_pstate (), `Annot
723)
724
725
726(* Partial parsing from bigstrings *)
727
728(* NOTE: this is really an awful duplication of the code for parsing
729 strings, but since OCaml does not inline higher-order functions known
730 at compile, other solutions would sacrifice a lot of efficiency. *)
731
732MK_PARSER(
733 bigstring, Array1.dim, parse_bigstring, str.{pos},
734 state.pstack, state.pstack <- pstack,
735 ,,
736 Atom pbuf_str, List sexp_lst, [], `Sexp
737)
738
739MK_PARSER(
740 bigstring, Array1.dim, parse_bigstring_annot, str.{pos},
741 state.pstack.Annot.stack, state.pstack.Annot.stack <- pstack,
742 add_annot_pos state pos;,add_annot_pos1 state pos;,
743 mk_annot_atom state pbuf_str pos, mk_annot_list state sexp_lst pos,
744 init_annot_pstate (), `Annot
745)
746
747
748(* Input functions *)
749
750let mk_this_parse ?parse_pos my_parse = (); fun ~pos ~len str ->
751 let parse_pos =
752 match parse_pos with
753 | None -> Parse_pos.create ~buf_pos:pos ()
754 | Some parse_pos -> parse_pos.Parse_pos.buf_pos <- pos; parse_pos
755 in
756 my_parse ?parse_pos:(Some parse_pos) ?len:(Some len) str
757
758let gen_input_sexp my_parse ?parse_pos ic =
759 let buf = String.create 1 in
760 let rec loop this_parse =
761 let c = input_char ic in
762 buf.[0] <- c;
763 match this_parse ~pos:0 ~len:1 buf with
764 | Done (sexp, _) -> sexp
765 | Cont (_, this_parse) -> loop this_parse
766 in
767 loop (mk_this_parse ?parse_pos my_parse)
768
769let input_sexp ?parse_pos ic = gen_input_sexp parse ?parse_pos ic
770
771let gen_input_rev_sexps my_parse ?parse_pos ?(buf = String.create 8192) ic =
772 let rev_sexps_ref = ref [] in
773 let buf_len = String.length buf in
774 let rec loop this_parse ~pos ~len ~is_incomplete =
775 if len > 0 then
776 match this_parse ~pos ~len buf with
777 | Done (sexp, ({ Parse_pos.buf_pos = buf_pos } as parse_pos)) ->
778 rev_sexps_ref := sexp :: !rev_sexps_ref;
779 let n_parsed = buf_pos - pos in
780 let this_parse = mk_this_parse ~parse_pos my_parse in
781 if n_parsed = len then
782 let new_len = input ic buf 0 buf_len in
783 loop this_parse ~pos:0 ~len:new_len ~is_incomplete:false
784 else
785 loop this_parse
786 ~pos:buf_pos ~len:(len - n_parsed) ~is_incomplete:false
787 | Cont (ws_only, this_parse) ->
788 loop this_parse
789 ~pos:0 ~len:(input ic buf 0 buf_len) ~is_incomplete:(not ws_only)
790 else if is_incomplete then
791 failwith
792 "Sexplib.Sexp.input_rev_sexps: reached EOF with incomplete S-expression"
793 else !rev_sexps_ref
794 in
795 let len = input ic buf 0 buf_len in
796 let this_parse = mk_this_parse ?parse_pos my_parse in
797 loop this_parse ~pos:0 ~len ~is_incomplete:false
798
799let input_rev_sexps ?parse_pos ?buf ic =
800 gen_input_rev_sexps parse ?parse_pos ?buf ic
801
802let input_sexps ?parse_pos ?buf ic =
803 List.rev (input_rev_sexps ?parse_pos ?buf ic)
804
805
806(* of_string and of_bigstring *)
807
808let of_string_bigstring loc this_parse ws_buf get_len get_sub str =
809 match this_parse str with
810 | Done (_, { Parse_pos.buf_pos = buf_pos }) when buf_pos <> get_len str ->
811 let prefix_len = min (get_len str - buf_pos) 20 in
812 let prefix = get_sub str buf_pos prefix_len in
813 let msg =
814 sprintf
815 "Sexplib.Sexp.%s: S-expression followed by data at position %d: %S..."
816 loc buf_pos prefix
817 in
818 failwith msg
819 | Done (sexp, _) -> sexp
820 | Cont (ws_only, this_parse) ->
821 if ws_only then failwith (sprintf "Sexplib.Sexp.%s: whitespace only" loc);
822 (* When parsing atoms, the incremental parser cannot tell whether
823 it is at the end until it hits whitespace. We therefore feed
824 it one space to determine whether it is finished. *)
825 match this_parse ~pos:0 ~len:1 ws_buf with
826 | Done (sexp, _) -> sexp
827 | Cont _ ->
828 failwith (
829 sprintf "Sexplib.Sexp.%s: got incomplete S-expression: %s"
830 loc (get_sub str 0 (get_len str)))
831
832let of_string str =
833 of_string_bigstring "of_string" parse " " String.length String.sub str
834
835let get_bstr_sub_str bstr pos len =
836 let str = String.create len in
837 for i = 0 to len - 1 do str.[i] <- bstr.{pos + i} done;
838 str
839
840let bstr_ws_buf = Array1.create char c_layout 1
841let () = bstr_ws_buf.{0} <- ' '
842
843let of_bigstring bstr =
844 of_string_bigstring
845 "of_bigstring" parse_bigstring bstr_ws_buf Array1.dim get_bstr_sub_str bstr
846
847
848(* Loading *)
849
850let gen_load_rev_sexps input_rev_sexps ?buf file =
851 let ic = open_in file in
852 try
853 let sexps = input_rev_sexps ?parse_pos:None ?buf ic in
854 close_in ic;
855 sexps
856 with exc -> close_in_noerr ic; raise exc
857
858let load_rev_sexps ?buf file = gen_load_rev_sexps input_rev_sexps ?buf file
859
860let load_sexps ?buf file = List.rev (load_rev_sexps ?buf file)
861
862let gen_load_sexp my_parse ?(strict = true) ?(buf = String.create 8192) file =
863 let buf_len = String.length buf in
864 let ic = open_in file in
865 let rec loop this_parse =
866 let len = input ic buf 0 buf_len in
867 if len = 0 then
868 failwith (sprintf "Sexplib.Sexp.gen_load_sexp: end of file: %s" file)
869 else
870 match this_parse ~pos:0 ~len buf with
871 | Done (sexp, ({ Parse_pos.buf_pos = buf_pos } as parse_pos))
872 when strict ->
873 let rec strict_loop this_parse ~pos ~len =
874 match this_parse ~pos ~len buf with
875 | Done _ | Cont (false, _) ->
876 failwith (
877 sprintf
878 "Sexplib.Sexp.gen_load_sexp: more than one S-expression: %s"
879 file)
880 | Cont (true, this_parse) ->
881 let len = input ic buf 0 buf_len in
882 if len = 0 then sexp
883 else strict_loop this_parse ~pos:0 ~len
884 in
885 let this_parse = mk_this_parse ~parse_pos my_parse in
886 strict_loop this_parse ~pos:buf_pos ~len:(len - buf_pos)
887 | Done (sexp, _) -> sexp
888 | Cont (_, this_parse) -> loop this_parse
889 in
890 try
891 let sexp = loop (mk_this_parse my_parse) in
892 close_in ic;
893 sexp
894 with exc -> close_in_noerr ic; raise exc
895
896let load_sexp ?strict ?buf file = gen_load_sexp parse ?strict ?buf file
897
898module Annotated = struct
899 include Annot
900
901 let parse = parse_str_annot
902 let parse_bigstring = parse_bigstring_annot
903
904 let input_rev_sexps ?parse_pos ?buf ic =
905 gen_input_rev_sexps parse ?parse_pos ?buf ic
906
907 let input_sexp ?parse_pos ic = gen_input_sexp parse ?parse_pos ic
908
909 let input_sexps ?parse_pos ?buf ic =
910 List.rev (input_rev_sexps ?parse_pos ?buf ic)
911
912 let of_string str =
913 of_string_bigstring
914 "Annotated.of_string" parse " " String.length String.sub str
915
916 let of_bigstring bstr =
917 of_string_bigstring
918 "Annotated.of_bigstring"
919 parse_bigstring bstr_ws_buf Array1.dim get_bstr_sub_str bstr
920
921 let load_rev_sexps ?buf file = gen_load_rev_sexps input_rev_sexps ?buf file
922 let load_sexps ?buf file = List.rev (load_rev_sexps ?buf file)
923 let load_sexp ?strict ?buf file = gen_load_sexp parse ?strict ?buf file
924
925 let conv f annot_sexp =
926 let sexp = get_sexp annot_sexp in
927 try `Result (f sexp)
928 with Of_sexp_error (exc, bad_sexp) as e ->
929 match find_sexp annot_sexp bad_sexp with
930 | None -> raise e
931 | Some bad_annot_sexp -> `Error (exc, bad_annot_sexp)
932
933 let get_conv_exn ~file ~exc annot_sexp =
934 let range = get_range annot_sexp in
935 let { start_pos = { line = line; col = col } } = range in
936 let loc = sprintf "%s:%d:%d" file line col in
937 Of_sexp_error (Annot.Conv_exn (loc, exc), get_sexp annot_sexp)
938end
939
940let load_sexp_conv ?(strict = true) ?(buf = String.create 8192) file f =
941 let sexp = load_sexp ~strict ~buf file in
942 try `Result (f sexp)
943 with Of_sexp_error _ ->
944 Annotated.conv f (Annotated.load_sexp ~strict ~buf file)
945
946let raise_conv_exn ~file = function
947 | `Result res -> res
948 | `Error (exc, annot_sexp) ->
949 raise (Annotated.get_conv_exn ~file ~exc annot_sexp)
950
951let load_sexp_conv_exn ?strict ?buf file f =
952 raise_conv_exn ~file (load_sexp_conv ?strict ?buf file f)
953
954let load_sexps_conv ?(buf = String.create 8192) file f =
955 let rev_sexps = load_rev_sexps ~buf file in
956 try List.rev_map (fun sexp -> `Result (f sexp)) rev_sexps
957 with Of_sexp_error _ as e ->
958 match Annotated.load_rev_sexps ~buf file with
959 | [] ->
960 (* File is now empty - perhaps it was a temporary file handle? *)
961 raise e
962 | rev_annot_sexps ->
963 List.rev_map (fun annot_sexp -> Annotated.conv f annot_sexp)
964 rev_annot_sexps
965
966let load_sexps_conv_exn ?(buf = String.create 8192) file f =
967 let rev_sexps = load_rev_sexps ~buf file in
968 try List.rev_map f rev_sexps
969 with Of_sexp_error _ as e ->
970 match Annotated.load_rev_sexps ~buf file with
971 | [] ->
972 (* File is now empty - perhaps it was a temporary file handle? *)
973 raise e
974 | rev_annot_sexps ->
975 List.rev_map
976 (fun annot_sexp -> raise_conv_exn ~file (Annotated.conv f annot_sexp))
977 rev_annot_sexps
978
979let gen_of_string_conv of_string annot_of_string str f =
980 let sexp = of_string str in
981 try `Result (f sexp)
982 with Of_sexp_error _ -> Annotated.conv f (annot_of_string str)
983
984let of_string_conv str f =
985 gen_of_string_conv of_string Annotated.of_string str f
986
987let of_bigstring_conv bstr f =
988 gen_of_string_conv of_bigstring Annotated.of_bigstring bstr f
989
990module Of_string_conv_exn = struct
991 type t = { exc : exn; sexp : Type.t; sub_sexp : Type.t }
992
993 exception E of t
994end
995
996let gen_of_string_conv_exn of_string str f =
997 let sexp = of_string str in
998 try f sexp
999 with Of_sexp_error (exc, sub_sexp) ->
1000 raise (Of_string_conv_exn.E { Of_string_conv_exn.exc = exc; sexp = sexp; sub_sexp = sub_sexp })
1001
1002let of_string_conv_exn str f = gen_of_string_conv_exn of_string str f
1003let of_bigstring_conv_exn bstr f = gen_of_string_conv_exn of_bigstring bstr f
1004
1005
1006(* Utilities for automated type conversions *)
1007
1008let unit = List []
1009
1010external sexp_of_t : t -> t = "%identity"
1011external t_of_sexp : t -> t = "%identity"
1012
1013
1014(* Utilities for conversion error handling *)
1015
1016type found = [ `Found | `Pos of int * found ]
1017type search_result = [ `Not_found | found ]
1018
1019let rec search_physical sexp ~contained =
1020 if sexp == contained then `Found
1021 else
1022 match sexp with
1023 | Atom _ -> `Not_found
1024 | List lst ->
1025 let rec loop i = function
1026 | [] -> `Not_found
1027 | h :: t ->
1028 let res = search_physical h ~contained in
1029 match res with
1030 | `Not_found -> loop (i + 1) t
1031 | #found as found -> `Pos (i, found)
1032 in
1033 loop 0 lst
1034
1035let rec subst_found sexp ~subst = function
1036 | `Found -> subst
1037 | `Pos (pos, found) ->
1038 match sexp with
1039 | Atom _ ->
1040 failwith
1041 "Sexplib.Sexp.subst_search_result: atom when position requested"
1042 | List lst ->
1043 let rec loop acc pos = function
1044 | [] ->
1045 failwith
1046 "Sexplib.Sexp.subst_search_result: \
1047 short list when position requested"
1048 | h :: t when pos <> 0 -> loop (h :: acc) (pos - 1) t
1049 | h :: t ->
1050 List (List.rev_append acc (subst_found h ~subst found :: t))
1051 in
1052 loop [] pos lst