1 (******************************************************************************
4 * Copyright (C) 2005- Jane Street Holding, LLC *
5 * Contact: opensource@janestreet.com *
6 * WWW: http://www.janestreet.com/ocaml *
7 * Author: Markus Mottl *
9 * This library is free software; you can redistribute it and/or *
10 * modify it under the terms of the GNU Lesser General Public *
11 * License as published by the Free Software Foundation; either *
12 * version 2 of the License, or (at your option) any later version. *
14 * This library is distributed in the hope that it will be useful, *
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
17 * Lesser General Public License for more details. *
19 * You should have received a copy of the GNU Lesser General Public *
20 * License along with this library; if not, write to the Free Software *
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *
23 ******************************************************************************)
25 (* Sexp: Module for handling S-expressions (I/O, etc.) *)
32 exception Of_sexp_error
of exn
* t
34 type bigstring
= (char
, int8_unsigned_elt
, c_layout
) Array1.t
37 (* Default indentation level for human-readable conversions *)
39 let default_indent = ref 1
42 (* Escaping of strings used as atoms in S-expressions *)
44 let is_special_char c
=
45 c
<= ' '
|| c
= '
"' || c = '(' || c = ')' || c = ';' || c = '\\'
48 let len = String.length str in
50 let rec loop ix = is_special_char str.[ix] || ix > 0 && loop (ix - 1) in
53 let maybe_esc_str str =
54 if must_escape str then
55 let estr = String.escaped str in
56 let elen = String.length estr in
57 let res = String.create (elen + 2) in
58 String.blit estr 0 res 1 elen;
60 res.[elen + 1] <- '
"';
64 let pp_maybe_esc_str ppf str = pp_print_string ppf (maybe_esc_str str)
67 (* Output of S-expressions to formatters *)
69 let rec pp_hum_indent indent ppf = function
70 | Atom str -> pp_maybe_esc_str ppf str
72 pp_open_box ppf indent;
73 pp_print_string ppf "(";
74 pp_hum_indent indent ppf h;
75 pp_hum_rest indent ppf t
76 | List [] -> pp_print_string ppf "()"
78 and pp_hum_rest indent ppf = function
80 pp_print_space ppf ();
81 pp_hum_indent indent ppf h;
82 pp_hum_rest indent ppf t
84 pp_print_string ppf ")";
87 let rec pp_mach_internal may_need_space ppf = function
89 let str' = maybe_esc_str str in
90 let new_may_need_space = str' == str in
91 if may_need_space && new_may_need_space then pp_print_string ppf " ";
92 pp_print_string ppf str';
95 pp_print_string ppf "(";
96 let may_need_space = pp_mach_internal false ppf h in
97 pp_mach_rest may_need_space ppf t;
99 | List [] -> pp_print_string ppf "()"; false
101 and pp_mach_rest may_need_space ppf = function
103 let may_need_space = pp_mach_internal may_need_space ppf h in
104 pp_mach_rest may_need_space ppf t
105 | [] -> pp_print_string ppf ")"
107 let pp_hum ppf sexp = pp_hum_indent !default_indent ppf sexp
109 let pp_mach ppf sexp = ignore (pp_mach_internal false ppf sexp)
115 let rec size_loop (v, c as acc) = function
116 | Atom str -> v + 1, c + String.length str
117 | List lst -> List.fold_left size_loop acc lst
119 let size sexp = size_loop (0, 0) sexp
122 (* Buffer conversions *)
124 let to_buffer_hum ~buf ?(indent = !default_indent) sexp =
125 Format.bprintf buf "%a
@?
" (pp_hum_indent indent) sexp
127 let to_buffer_mach ~buf sexp =
128 let rec loop may_need_space = function
130 let str' = maybe_esc_str str in
131 let new_may_need_space = str' == str in
132 if may_need_space && new_may_need_space then Buffer.add_char buf ' ';
133 Buffer.add_string buf str';
136 Buffer.add_char buf '(';
137 let may_need_space = loop false h in
138 loop_rest may_need_space t;
140 | List [] -> Buffer.add_string buf "()"; false
141 and loop_rest may_need_space = function
143 let may_need_space = loop may_need_space h in
144 loop_rest may_need_space t
145 | [] -> Buffer.add_char buf ')' in
146 ignore (loop false sexp)
148 let to_buffer = to_buffer_mach
151 (* Output of S-expressions to I/O-channels *)
153 let buffer () = Buffer.create 4096
155 let with_new_buffer oc f =
156 let buf = buffer () in
158 Buffer.output_buffer oc buf
160 let output_hum oc sexp =
161 with_new_buffer oc (fun buf -> to_buffer_hum sexp ~buf)
163 let output_hum_indent indent oc sexp =
164 with_new_buffer oc (fun buf -> to_buffer_hum ~indent sexp ~buf)
166 let output_mach oc sexp =
167 with_new_buffer oc (fun buf -> to_buffer_mach sexp ~buf)
169 let output = output_mach
172 (* Output of S-expressions to file *)
174 let save_of_output ?perm output_function file sexp =
175 let tmp_name, oc = Filename.open_temp_file file "tmp
" in
177 output_function oc sexp;
183 let umask = Unix.umask 0 in
184 ignore (Unix.umask umask);
187 if perm <> 0o600 then Unix.chmod tmp_name perm;
188 Sys.rename tmp_name file
194 let output_sexp_nl do_output oc sexp =
196 output_string oc "\n"
198 let save_hum ?perm file sexp =
199 save_of_output ?perm (output_sexp_nl output_hum) file sexp
201 let save_mach ?perm file sexp = save_of_output ?perm output_mach file sexp
204 let output_sexps_nl do_output oc sexps =
205 List.iter (output_sexp_nl do_output oc) sexps
207 let save_sexps_hum ?perm file sexps =
208 save_of_output ?perm (output_sexps_nl output_hum) file sexps
210 let save_sexps_mach ?perm file sexps =
211 save_of_output ?perm (output_sexps_nl output_mach) file sexps
213 let save_sexps = save_sexps_mach
216 (* String conversions *)
218 let to_string_hum ?indent = function
219 | Atom str -> maybe_esc_str str
221 let buf = buffer () in
222 to_buffer_hum ?indent sexp ~buf;
225 let to_string_mach = function
226 | Atom str -> maybe_esc_str str
228 let buf = buffer () in
229 to_buffer_mach sexp ~buf;
232 let to_string = to_string_mach
237 let scan_sexp ?buf lexbuf = Parser.sexp (Lexer.main ?buf) lexbuf
238 let scan_sexps ?buf lexbuf = Parser.sexps (Lexer.main ?buf) lexbuf
240 let get_main_buf buf =
243 | None -> Buffer.create 64
247 let scan_fold_sexps ?buf ~f ~init lexbuf =
248 let main = get_main_buf buf in
250 match Parser.sexp_opt main lexbuf with
252 | Some sexp -> loop (f acc sexp) in
255 let scan_iter_sexps ?buf ~f lexbuf =
256 scan_fold_sexps ?buf lexbuf ~init:() ~f:(fun () sexp -> f sexp)
258 let scan_sexps_conv ?buf ~f lexbuf =
259 let coll acc sexp = f sexp :: acc in
260 List.rev (scan_fold_sexps ?buf ~f:coll ~init:[] lexbuf)
263 (* Partial parsing *)
265 module Annot = struct
266 type pos = { line : int; col : int; offset : int }
267 type range = { start_pos : pos; end_pos : pos }
268 type t = Atom of range * Type.t | List of range * t list * Type.t
269 type 'a conv = [ `Result of 'a | `Error of exn * t ]
271 exception Conv_exn of string * exn
274 mutable positions : pos list;
275 mutable stack : t list list;
278 let get_sexp = function Atom (_, sexp) | List (_, _, sexp) -> sexp
279 let get_range = function Atom (range, _) | List (range, _, _) -> range
281 exception Annot_sexp of t
283 let find_sexp annot_sexp sexp =
284 let rec loop annot_sexp =
285 match annot_sexp with
287 | List (_, _, sub_sexp) when sexp == sub_sexp ->
288 raise (Annot_sexp annot_sexp)
289 | List (_, annots, _) -> List.iter loop annots
292 try loop annot_sexp; None
293 with Annot_sexp res -> Some res
296 module Parse_pos = struct
299 mutable text_line : int;
300 mutable text_char : int;
301 mutable global_offset : int;
302 mutable buf_pos : int;
306 ?(text_line = 1) ?(text_char = 0)
307 ?(buf_pos = 0) ?(global_offset = 0) () =
308 let fail msg = failwith ("Sexplib.Sexp.Parse_pos.create: " ^ msg) in
309 if text_line < 1 then fail "text_line
< 1"
310 else if text_char < 0 then fail "text_char
< 0"
311 else if global_offset < 0 then fail "global_offset
< 0"
312 else if buf_pos < 0 then fail "buf_pos
< 0"
313 else { text_line = text_line; text_char = text_char; global_offset = global_offset; buf_pos = buf_pos }
315 let with_buf_pos t buf_pos = { t with buf_pos = buf_pos }
318 type ('a, 't) parse_result =
319 | Done of 't * Parse_pos.t
320 | Cont of bool * ('a, 't) parse_fun
322 and ('a, 't) parse_fun = pos : int -> len : int -> 'a -> ('a, 't) parse_result
324 type 't parse_state =
326 parse_pos : Parse_pos.t;
337 | `Sexp of t list list parse_state
338 | `Annot of Annot.stack parse_state
342 exception Parse_error of parse_error
344 let bump_text_line { parse_pos = parse_pos } =
345 parse_pos.Parse_pos.text_line <- parse_pos.Parse_pos.text_line + 1;
346 parse_pos.Parse_pos.text_char <- 0
348 let bump_text_pos { parse_pos = parse_pos } =
349 parse_pos.Parse_pos.text_char <- parse_pos.Parse_pos.text_char + 1
351 let bump_pos_cont state str ~max_pos ~pos cont =
353 cont state str ~max_pos ~pos:(pos + 1)
355 let bump_line_cont state str ~max_pos ~pos cont =
356 bump_text_line state;
357 cont state str ~max_pos ~pos:(pos + 1)
359 let add_bump bump state str ~max_pos ~pos c cont =
360 Buffer.add_char state.pbuf c;
362 cont state str ~max_pos ~pos:(pos + 1)
364 let add_bump_pos state str ~max_pos ~pos c cont =
365 add_bump bump_text_pos state str ~max_pos ~pos c cont
367 let add_bump_line state str ~max_pos ~pos c cont =
368 add_bump bump_text_line state str ~max_pos ~pos c cont
370 let set_parse_pos parse_pos buf_pos =
371 let len = buf_pos - parse_pos.Parse_pos.buf_pos in
372 parse_pos.Parse_pos.buf_pos <- buf_pos;
373 parse_pos.Parse_pos.global_offset <- parse_pos.Parse_pos.global_offset + len
375 let mk_parse_pos { parse_pos = parse_pos } buf_pos =
376 set_parse_pos parse_pos buf_pos;
379 let raise_parse_error parse_state location buf_pos err_msg =
381 match parse_state with
382 | `Sexp { parse_pos = parse_pos } | `Annot { parse_pos = parse_pos } ->
383 set_parse_pos parse_pos buf_pos;
384 parse_pos.Parse_pos.text_char <- parse_pos.Parse_pos.text_char + 1;
386 let parse_error = { location = location; err_msg = err_msg; parse_state = parse_state } in
387 raise (Parse_error parse_error)
389 let raise_unexpected_char parse_state location buf_pos c =
390 let err_msg = sprintf "unexpected character
: '%c'
" c in
391 raise_parse_error parse_state location buf_pos err_msg
393 (* The code below is derived from lexer.mll in the OCaml distribution,
394 which also uses ASCII codes instead of escape sequences to denote
395 special characters. *)
397 (* Macro for generating parsers *)
399 TYPE, GET_LEN, PARSE, GET_CHAR, \
400 GET_PSTACK, SET_PSTACK, \
401 REGISTER_POS, REGISTER_POS1, \
402 MK_ATOM, MK_LIST, INIT_PSTACK, MK_PARSE_STATE) \
403 let bump_found_atom bump state str ~max_pos ~pos cont = \
404 let pbuf = state.pbuf in \
405 let pbuf_str = Buffer.contents pbuf in \
406 let atom = MK_ATOM in \
407 match GET_PSTACK with \
408 | [] -> Done (atom, mk_parse_pos state pos) \
409 | rev_sexp_lst :: sexp_stack -> \
411 let pstack = (atom :: rev_sexp_lst) :: sexp_stack in \
414 cont state str ~max_pos ~pos:(pos + 1) \
416 let check_str_bounds loc ~pos ~len (str : TYPE) = \
417 if pos < 0 then invalid_arg (loc ^ ": pos
< 0"); \
418 if len < 0 then invalid_arg (loc ^ ": len < 0"); \
419 let str_len = GET_LEN str in \
420 let pos_len = pos + len in \
421 if pos_len > str_len then invalid_arg (loc ^ ": pos
+ len > str_len"); \
424 let mk_cont name cont state = \
425 let ws_only = GET_PSTACK = [] && Buffer.length state.pbuf = 0 in \
427 let used_ref = ref false in \
428 fun ~pos ~len str -> \
430 failwith "Sexplib.Sexp
: parser continuation called twice
" \
433 let max_pos = check_str_bounds name ~pos ~len str in \
434 cont state str ~max_pos ~pos \
437 Cont (ws_only, parse_fun) \
439 let rec PARSE state str ~max_pos ~pos = \
440 if pos > max_pos then mk_cont "parse
" PARSE state \
442 match GET_CHAR with \
445 let pstack = [] :: GET_PSTACK in \
447 bump_pos_cont state str ~max_pos ~pos PARSE \
449 (match GET_PSTACK with \
450 | [] -> raise_unexpected_char (MK_PARSE_STATE state) "parse
" pos c \
451 | rev_sexp_lst :: sexp_stack -> \
452 let sexp_lst = List.rev rev_sexp_lst in \
453 let sexp = MK_LIST in \
454 match sexp_stack with \
455 | [] -> Done (sexp, mk_parse_pos state (pos + 1)) \
456 | higher_rev_sexp_lst :: higher_sexp_stack -> \
458 (sexp :: higher_rev_sexp_lst) :: higher_sexp_stack \
461 bump_pos_cont state str ~max_pos ~pos PARSE) \
462 | ' ' | '\009' | '\012' -> bump_pos_cont state str ~max_pos ~pos PARSE \
463 | '\010' -> bump_line_cont state str ~max_pos ~pos PARSE \
464 | '\013' -> bump_line_cont state str ~max_pos ~pos parse_nl \
465 | ';' -> bump_pos_cont state str ~max_pos ~pos parse_comment \
468 bump_pos_cont state
str ~
max_pos ~pos parse_quoted \
471 add_bump_pos state
str ~
max_pos ~pos c parse_atom \
473 and parse_nl state
str ~
max_pos ~pos
= \
474 if pos
> max_pos then mk_cont "parse_nl" parse_nl state \
476 let pos = if GET_CHAR
= '
\010'
then pos + 1 else pos in \
477 PARSE state
str ~
max_pos ~
pos \
479 and parse_comment state
str ~
max_pos ~
pos = \
480 if pos > max_pos then mk_cont "parse_comment" parse_comment state \
482 match GET_CHAR
with \
483 | '
\010'
-> bump_line_cont state
str ~
max_pos ~
pos PARSE \
484 | '
\013'
-> bump_line_cont state
str ~
max_pos ~
pos parse_nl \
485 | _
-> bump_pos_cont state
str ~
max_pos ~
pos parse_comment \
487 and parse_atom state
str ~
max_pos ~
pos = \
488 if pos > max_pos then mk_cont "parse_atom" parse_atom state \
490 match GET_CHAR
with \
491 | ' '
| '
\009'
| '
\012'
-> \
492 bump_found_atom bump_text_pos state
str ~
max_pos ~
pos PARSE \
494 let pbuf = state
.pbuf in \
495 let pbuf_str = Buffer.contents
pbuf in \
496 let atom = MK_ATOM
in \
497 (match GET_PSTACK
with \
498 | [] -> Done
(atom, mk_parse_pos state
pos) \
499 | rev_sexp_lst
:: sexp_stack
-> \
502 let pstack = [] :: (atom :: rev_sexp_lst
) :: sexp_stack
in \
504 bump_pos_cont state
str ~
max_pos ~
pos PARSE) \
506 let pbuf = state
.pbuf in \
507 let pbuf_str = Buffer.contents
pbuf in \
508 let atom = MK_ATOM
in \
509 (match GET_PSTACK
with \
510 | [] -> Done
(atom, mk_parse_pos state
pos) \
511 | rev_sexp_lst
:: sexp_stack
-> \
512 let sexp_lst = List.rev_append rev_sexp_lst
[atom] in \
513 let sexp = MK_LIST
in \
514 match sexp_stack
with \
515 | [] -> Done
(sexp, mk_parse_pos state
(pos + 1)) \
516 | higher_rev_sexp_lst
:: higher_sexp_stack
-> \
519 (sexp :: higher_rev_sexp_lst
) :: higher_sexp_stack \
522 bump_pos_cont state
str ~
max_pos ~
pos PARSE) \
523 | '
\010'
-> bump_found_atom bump_text_line state
str ~
max_pos ~
pos PARSE \
525 bump_found_atom bump_text_line state
str ~
max_pos ~
pos parse_nl \
527 bump_found_atom bump_text_pos state
str ~
max_pos ~
pos parse_comment \
530 bump_text_pos state str ~max_pos ~pos reg_parse_quoted \
531 | c -> add_bump_pos state str ~max_pos ~pos c parse_atom \
533 and reg_parse_quoted state str ~max_pos ~pos = \
535 parse_quoted state str ~max_pos ~pos \
536 and parse_quoted state str ~max_pos ~pos = \
537 if pos > max_pos then mk_cont "parse_quoted
" parse_quoted state \
539 match GET_CHAR with \
541 let pbuf = state
.pbuf in \
542 let pbuf_str = Buffer.contents
pbuf in \
543 let atom = MK_ATOM
in \
544 (match GET_PSTACK
with \
545 | [] -> Done
(atom, mk_parse_pos state
(pos + 1)) \
546 | rev_sexp_lst
:: sexp_stack
-> \
548 let pstack = (atom :: rev_sexp_lst
) :: sexp_stack
in \
550 bump_pos_cont state
str ~
max_pos ~
pos PARSE) \
551 | '
\\'
-> bump_pos_cont state
str ~
max_pos ~
pos parse_escaped \
552 | '
\010'
as c
-> add_bump_line state
str ~
max_pos ~
pos c parse_quoted \
553 | '
\013'
as c
-> add_bump_line state
str ~
max_pos ~
pos c parse_quoted_nl \
554 | c
-> add_bump_pos state
str ~
max_pos ~
pos c parse_quoted \
556 and parse_quoted_nl state
str ~
max_pos ~
pos = \
557 if pos > max_pos then mk_cont "parse_quoted_nl" parse_quoted_nl state \
561 if GET_CHAR
= c then ( \
562 Buffer.add_char state
.pbuf c; \
567 parse_quoted state
str ~
max_pos ~
pos \
569 and parse_escaped state
str ~
max_pos ~
pos = \
570 if pos > max_pos then mk_cont "parse_escaped" parse_escaped state \
572 match GET_CHAR
with \
573 | '
\010'
-> bump_line_cont state
str ~
max_pos ~
pos parse_skip_ws \
574 | '
\013'
-> bump_line_cont state
str ~
max_pos ~
pos parse_skip_ws_nl \
575 | '
0'
.. '
9'
as c -> \
576 bump_text_pos state
; \
577 let d = Char.code
c - 48 in \
578 parse_dec state
str ~
max_pos ~
pos:(pos + 1) ~count
:2 ~
d \
580 bump_text_pos state
; \
581 parse_hex state
str ~
max_pos ~
pos:(pos + 1) ~count
:2 ~
d:0 \
582 | ('
\\'
| '
"' | '\'' ) as c -> \
583 add_bump_pos state str ~max_pos ~pos c parse_quoted \
584 | 'n' -> add_bump_pos state str ~max_pos ~pos '\n' parse_quoted \
585 | 't' -> add_bump_pos state str ~max_pos ~pos '\t' parse_quoted \
586 | 'b' -> add_bump_pos state str ~max_pos ~pos '\b' parse_quoted \
587 | 'r' -> add_bump_pos state str ~max_pos ~pos '\r' parse_quoted \
589 Buffer.add_char state.pbuf '\\'; \
590 add_bump_pos state str ~max_pos ~pos c parse_quoted \
592 and parse_skip_ws state str ~max_pos ~pos = \
593 if pos > max_pos then mk_cont "parse_skip_ws
" parse_skip_ws state \
595 match GET_CHAR with \
596 | ' ' | '\009' -> bump_pos_cont state str ~max_pos ~pos parse_skip_ws \
597 | _ -> parse_quoted state str ~max_pos ~pos \
599 and parse_skip_ws_nl state str ~max_pos ~pos = \
600 if pos > max_pos then mk_cont "parse_skip_ws_nl
" parse_skip_ws_nl state \
602 let pos = if GET_CHAR = '\010' then pos + 1 else pos in \
603 parse_skip_ws state str ~max_pos ~pos \
605 and parse_dec state str ~max_pos ~pos ~count ~d = \
606 if pos > max_pos then mk_cont "parse_dec
" (parse_dec ~count ~d) state \
608 match GET_CHAR with \
609 | '0' .. '9' as c -> \
610 let d = 10 * d + Char.code c - 48 in \
613 let err_msg = sprintf "illegal decimal escape
: \\%d
" d in \
614 raise_parse_error (MK_PARSE_STATE state) "parse_dec
" pos err_msg \
616 add_bump_pos state str ~max_pos ~pos (Char.chr d) parse_quoted \
618 bump_text_pos state; \
619 parse_dec state str ~max_pos ~pos:(pos + 1) ~count:(count - 1) ~d) \
620 | c -> raise_unexpected_char (MK_PARSE_STATE state) "parse_dec
" pos c \
622 and parse_hex state str ~max_pos ~pos ~count ~d = \
623 if pos > max_pos then mk_cont "parse_hex
" (parse_hex ~count ~d) state \
625 match GET_CHAR with \
626 | '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' as c -> \
628 if c >= 'a' then 87 \
629 else if c >= 'A' then 55 \
632 let d = 16 * d + Char.code c - corr in \
635 let err_msg = sprintf "illegal hexadecimal escape
: \\%x
" d in \
636 raise_parse_error (MK_PARSE_STATE state) "parse_hex
" pos err_msg \
638 add_bump_pos state str ~max_pos ~pos (Char.chr d) parse_quoted \
640 bump_text_pos state; \
641 parse_hex state str ~max_pos ~pos:(pos + 1) ~count:(count - 1) ~d) \
642 | c -> raise_unexpected_char (MK_PARSE_STATE state) "parse_hex
" pos c \
644 let PARSE ?(parse_pos = Parse_pos.create ()) ?len str = \
645 let pos = parse_pos.Parse_pos.buf_pos in \
649 | None -> GET_LEN str - pos \
651 let max_pos = check_str_bounds "parse
" ~pos ~len str in \
654 parse_pos = parse_pos; \
655 pstack = INIT_PSTACK; \
656 pbuf = Buffer.create 128; \
659 PARSE state str ~max_pos ~pos
662 string, String.length, parse_str, str.[pos],
663 state.pstack, state.pstack <- pstack,
665 Atom pbuf_str, List sexp_lst, [], `Sexp
668 let parse = parse_str
673 let get_glob_ofs parse_pos pos =
674 parse_pos.Parse_pos.global_offset + pos - parse_pos.Parse_pos.buf_pos
677 ({ Parse_pos.text_line = line; text_char = col } as parse_pos) pos =
678 { Annot.line = line; col = col; offset = get_glob_ofs parse_pos pos }
681 ({ Parse_pos.text_line = line; text_char = col } as parse_pos) pos =
682 { Annot.line = line; col = col + 1; offset = get_glob_ofs parse_pos pos }
684 let add_annot_pos { parse_pos = parse_pos; pstack = pstack } pos =
685 pstack.Annot.positions <- mk_annot_pos parse_pos pos :: pstack.Annot.positions
687 let add_annot_pos1 { parse_pos = parse_pos; pstack = pstack } pos =
688 pstack.Annot.positions <-
689 mk_annot_pos1 parse_pos pos :: pstack.Annot.positions
691 let get_annot_range { parse_pos = parse_pos; pstack = pstack } pos =
693 match pstack.Annot.positions with
694 | [] -> assert false (* impossible *)
695 | h :: t -> pstack.Annot.positions <- t; h
700 line = parse_pos.Parse_pos.text_line;
701 col = parse_pos.Parse_pos.text_char;
702 offset = get_glob_ofs parse_pos pos;
705 { Annot.start_pos = start_pos; end_pos = end_pos }
707 let mk_annot_atom parse_state str pos =
708 Annot.Atom (get_annot_range parse_state pos, Atom str)
710 let mk_annot_list parse_state annot_lst pos =
711 let range = get_annot_range parse_state pos in
712 let sexp = List (List.rev (List.rev_map Annot.get_sexp annot_lst)) in
713 Annot.List (range, annot_lst, sexp)
715 let init_annot_pstate () = { Annot.positions = []; stack = [] }
718 string, String.length, parse_str_annot, str.[pos],
719 state.pstack.Annot.stack, state.pstack.Annot.stack <- pstack,
720 add_annot_pos state pos;,add_annot_pos1 state pos;,
721 mk_annot_atom state pbuf_str pos, mk_annot_list state sexp_lst pos,
722 init_annot_pstate (), `Annot
726 (* Partial parsing from bigstrings *)
728 (* NOTE: this is really an awful duplication of the code for parsing
729 strings, but since OCaml does not inline higher-order functions known
730 at compile, other solutions would sacrifice a lot of efficiency. *)
733 bigstring, Array1.dim, parse_bigstring, str.{pos},
734 state.pstack, state.pstack <- pstack,
736 Atom pbuf_str, List sexp_lst, [], `Sexp
740 bigstring, Array1.dim, parse_bigstring_annot, str.{pos},
741 state.pstack.Annot.stack, state.pstack.Annot.stack <- pstack,
742 add_annot_pos state pos;,add_annot_pos1 state pos;,
743 mk_annot_atom state pbuf_str pos, mk_annot_list state sexp_lst pos,
744 init_annot_pstate (), `Annot
748 (* Input functions *)
750 let mk_this_parse ?parse_pos my_parse = (); fun ~pos ~len str ->
753 | None -> Parse_pos.create ~buf_pos:pos ()
754 | Some parse_pos -> parse_pos.Parse_pos.buf_pos <- pos; parse_pos
756 my_parse ?parse_pos:(Some parse_pos) ?len:(Some len) str
758 let gen_input_sexp my_parse ?parse_pos ic =
759 let buf = String.create 1 in
760 let rec loop this_parse =
761 let c = input_char ic in
763 match this_parse ~pos:0 ~len:1 buf with
764 | Done (sexp, _) -> sexp
765 | Cont (_, this_parse) -> loop this_parse
767 loop (mk_this_parse ?parse_pos my_parse)
769 let input_sexp ?parse_pos ic = gen_input_sexp parse ?parse_pos ic
771 let gen_input_rev_sexps my_parse ?parse_pos ?(buf = String.create 8192) ic =
772 let rev_sexps_ref = ref [] in
773 let buf_len = String.length buf in
774 let rec loop this_parse ~pos ~len ~is_incomplete =
776 match this_parse ~pos ~len buf with
777 | Done (sexp, ({ Parse_pos.buf_pos = buf_pos } as parse_pos)) ->
778 rev_sexps_ref := sexp :: !rev_sexps_ref;
779 let n_parsed = buf_pos - pos in
780 let this_parse = mk_this_parse ~parse_pos my_parse in
781 if n_parsed = len then
782 let new_len = input ic buf 0 buf_len in
783 loop this_parse ~pos:0 ~len:new_len ~is_incomplete:false
786 ~pos:buf_pos ~len:(len - n_parsed) ~is_incomplete:false
787 | Cont (ws_only, this_parse) ->
789 ~pos:0 ~len:(input ic buf 0 buf_len) ~is_incomplete:(not ws_only)
790 else if is_incomplete then
792 "Sexplib.Sexp.input_rev_sexps
: reached EOF
with incomplete S
-expression
"
795 let len = input ic buf 0 buf_len in
796 let this_parse = mk_this_parse ?parse_pos my_parse in
797 loop this_parse ~pos:0 ~len ~is_incomplete:false
799 let input_rev_sexps ?parse_pos ?buf ic =
800 gen_input_rev_sexps parse ?parse_pos ?buf ic
802 let input_sexps ?parse_pos ?buf ic =
803 List.rev (input_rev_sexps ?parse_pos ?buf ic)
806 (* of_string and of_bigstring *)
808 let of_string_bigstring loc this_parse ws_buf get_len get_sub str =
809 match this_parse str with
810 | Done (_, { Parse_pos.buf_pos = buf_pos }) when buf_pos <> get_len str ->
811 let prefix_len = min (get_len str - buf_pos) 20 in
812 let prefix = get_sub str buf_pos prefix_len in
815 "Sexplib.Sexp.%s
: S
-expression followed by data at position %d
: %S
..."
819 | Done (sexp, _) -> sexp
820 | Cont (ws_only, this_parse) ->
821 if ws_only then failwith (sprintf "Sexplib.Sexp.%s
: whitespace only
" loc);
822 (* When parsing atoms, the incremental parser cannot tell whether
823 it is at the end until it hits whitespace. We therefore feed
824 it one space to determine whether it is finished. *)
825 match this_parse ~pos:0 ~len:1 ws_buf with
826 | Done (sexp, _) -> sexp
829 sprintf "Sexplib.Sexp.%s
: got incomplete S
-expression
: %s
"
830 loc (get_sub str 0 (get_len str)))
833 of_string_bigstring "of_string" parse " " String.length String.sub str
835 let get_bstr_sub_str bstr pos len =
836 let str = String.create len in
837 for i = 0 to len - 1 do str.[i] <- bstr.{pos + i} done;
840 let bstr_ws_buf = Array1.create char c_layout 1
841 let () = bstr_ws_buf.{0} <- ' '
843 let of_bigstring bstr =
845 "of_bigstring" parse_bigstring bstr_ws_buf Array1.dim get_bstr_sub_str bstr
850 let gen_load_rev_sexps input_rev_sexps ?buf file =
851 let ic = open_in file in
853 let sexps = input_rev_sexps ?parse_pos:None ?buf ic in
856 with exc -> close_in_noerr ic; raise exc
858 let load_rev_sexps ?buf file = gen_load_rev_sexps input_rev_sexps ?buf file
860 let load_sexps ?buf file = List.rev (load_rev_sexps ?buf file)
862 let gen_load_sexp my_parse ?(strict = true) ?(buf = String.create 8192) file =
863 let buf_len = String.length buf in
864 let ic = open_in file in
865 let rec loop this_parse =
866 let len = input ic buf 0 buf_len in
868 failwith (sprintf "Sexplib.Sexp.gen_load_sexp: end of file
: %s
" file)
870 match this_parse ~pos:0 ~len buf with
871 | Done (sexp, ({ Parse_pos.buf_pos = buf_pos } as parse_pos))
873 let rec strict_loop this_parse ~pos ~len =
874 match this_parse ~pos ~len buf with
875 | Done _ | Cont (false, _) ->
878 "Sexplib.Sexp.gen_load_sexp: more than one S
-expression
: %s
"
880 | Cont (true, this_parse) ->
881 let len = input ic buf 0 buf_len in
883 else strict_loop this_parse ~pos:0 ~len
885 let this_parse = mk_this_parse ~parse_pos my_parse in
886 strict_loop this_parse ~pos:buf_pos ~len:(len - buf_pos)
887 | Done (sexp, _) -> sexp
888 | Cont (_, this_parse) -> loop this_parse
891 let sexp = loop (mk_this_parse my_parse) in
894 with exc -> close_in_noerr ic; raise exc
896 let load_sexp ?strict ?buf file = gen_load_sexp parse ?strict ?buf file
898 module Annotated = struct
901 let parse = parse_str_annot
902 let parse_bigstring = parse_bigstring_annot
904 let input_rev_sexps ?parse_pos ?buf ic =
905 gen_input_rev_sexps parse ?parse_pos ?buf ic
907 let input_sexp ?parse_pos ic = gen_input_sexp parse ?parse_pos ic
909 let input_sexps ?parse_pos ?buf ic =
910 List.rev (input_rev_sexps ?parse_pos ?buf ic)
914 "Annotated.of_string" parse " " String.length String.sub str
916 let of_bigstring bstr =
918 "Annotated.of_bigstring"
919 parse_bigstring bstr_ws_buf Array1.dim get_bstr_sub_str bstr
921 let load_rev_sexps ?buf file = gen_load_rev_sexps input_rev_sexps ?buf file
922 let load_sexps ?buf file = List.rev (load_rev_sexps ?buf file)
923 let load_sexp ?strict ?buf file = gen_load_sexp parse ?strict ?buf file
925 let conv f annot_sexp =
926 let sexp = get_sexp annot_sexp in
928 with Of_sexp_error (exc, bad_sexp) as e ->
929 match find_sexp annot_sexp bad_sexp with
931 | Some bad_annot_sexp -> `Error (exc, bad_annot_sexp)
933 let get_conv_exn ~file ~exc annot_sexp =
934 let range = get_range annot_sexp in
935 let { start_pos = { line = line; col = col } } = range in
936 let loc = sprintf "%s
:%d
:%d
" file line col in
937 Of_sexp_error (Annot.Conv_exn (loc, exc), get_sexp annot_sexp)
940 let load_sexp_conv ?(strict = true) ?(buf = String.create 8192) file f =
941 let sexp = load_sexp ~strict ~buf file in
943 with Of_sexp_error _ ->
944 Annotated.conv f (Annotated.load_sexp ~strict ~buf file)
946 let raise_conv_exn ~file = function
948 | `Error (exc, annot_sexp) ->
949 raise (Annotated.get_conv_exn ~file ~exc annot_sexp)
951 let load_sexp_conv_exn ?strict ?buf file f =
952 raise_conv_exn ~file (load_sexp_conv ?strict ?buf file f)
954 let load_sexps_conv ?(buf = String.create 8192) file f =
955 let rev_sexps = load_rev_sexps ~buf file in
956 try List.rev_map (fun sexp -> `Result (f sexp)) rev_sexps
957 with Of_sexp_error _ as e ->
958 match Annotated.load_rev_sexps ~buf file with
960 (* File is now empty - perhaps it was a temporary file handle? *)
963 List.rev_map (fun annot_sexp -> Annotated.conv f annot_sexp)
966 let load_sexps_conv_exn ?(buf = String.create 8192) file f =
967 let rev_sexps = load_rev_sexps ~buf file in
968 try List.rev_map f rev_sexps
969 with Of_sexp_error _ as e ->
970 match Annotated.load_rev_sexps ~buf file with
972 (* File is now empty - perhaps it was a temporary file handle? *)
976 (fun annot_sexp -> raise_conv_exn ~file (Annotated.conv f annot_sexp))
979 let gen_of_string_conv of_string annot_of_string str f =
980 let sexp = of_string str in
982 with Of_sexp_error _ -> Annotated.conv f (annot_of_string str)
984 let of_string_conv str f =
985 gen_of_string_conv of_string Annotated.of_string str f
987 let of_bigstring_conv bstr f =
988 gen_of_string_conv of_bigstring Annotated.of_bigstring bstr f
990 module Of_string_conv_exn = struct
991 type t = { exc : exn; sexp : Type.t; sub_sexp : Type.t }
996 let gen_of_string_conv_exn of_string str f =
997 let sexp = of_string str in
999 with Of_sexp_error (exc, sub_sexp) ->
1000 raise (Of_string_conv_exn.E { Of_string_conv_exn.exc = exc; sexp = sexp; sub_sexp = sub_sexp })
1002 let of_string_conv_exn str f = gen_of_string_conv_exn of_string str f
1003 let of_bigstring_conv_exn bstr f = gen_of_string_conv_exn of_bigstring bstr f
1006 (* Utilities for automated type conversions *)
1010 external sexp_of_t : t -> t = "%identity
"
1011 external t_of_sexp : t -> t = "%identity
"
1014 (* Utilities for conversion error handling *)
1016 type found = [ `Found | `Pos of int * found ]
1017 type search_result = [ `Not_found | found ]
1019 let rec search_physical sexp ~contained =
1020 if sexp == contained then `Found
1023 | Atom _ -> `Not_found
1025 let rec loop i = function
1028 let res = search_physical h ~contained in
1030 | `Not_found -> loop (i + 1) t
1031 | #found as found -> `Pos (i, found)
1035 let rec subst_found sexp ~subst = function
1037 | `Pos (pos, found) ->
1041 "Sexplib.Sexp.subst_search_result
: atom when position requested
"
1043 let rec loop acc pos = function
1046 "Sexplib.Sexp.subst_search_result
: \
1047 short list
when position requested
"
1048 | h :: t when pos <> 0 -> loop (h :: acc) (pos - 1) t
1050 List (List.rev_append acc (subst_found h ~subst found :: t))