Commit | Line | Data |
---|---|---|
b1b2de81 C |
1 | (* File: sexp_intf.ml |
2 | ||
3 | Copyright (C) 2005- | |
4 | ||
5 | Jane Street Holding, LLC | |
6 | Author: Markus Mottl | |
7 | email: mmottl\@janestcapital.com | |
8 | WWW: http://www.janestcapital.com/ocaml | |
9 | ||
10 | This library is free software; you can redistribute it and/or | |
11 | modify it under the terms of the GNU Lesser General Public | |
12 | License as published by the Free Software Foundation; either | |
13 | version 2 of the License, or (at your option) any later version. | |
14 | ||
15 | This library is distributed in the hope that it will be useful, | |
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
18 | Lesser General Public License for more details. | |
19 | ||
20 | You should have received a copy of the GNU Lesser General Public | |
21 | License along with this library; if not, write to the Free Software | |
22 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
23 | *) | |
24 | ||
25 | (** Sexp_intf: interface specification for handling S-expressions (I/O, etc.) *) | |
26 | ||
27 | open Format | |
28 | open Lexing | |
29 | ||
30 | module type S = sig | |
31 | ||
32 | (** Type of S-expressions *) | |
33 | type t = Type.t = Atom of string | List of t list | |
34 | ||
35 | ||
36 | (** {6 Defaults} *) | |
37 | ||
38 | val default_indent : int ref | |
39 | (** [default_indent] reference to default indentation level for | |
40 | human-readable conversions. Initialisation value: 2. *) | |
41 | ||
42 | ||
43 | (** {6 S-expression size} *) | |
44 | ||
45 | val size : t -> int * int | |
46 | (** [size sexp] @return [(n_atoms, n_chars)], where [n_atoms] is | |
47 | the number of atoms in S-expression [sexp], and [n_chars] is the | |
48 | number of characters in the atoms of the S-expression. *) | |
49 | ||
50 | ||
51 | (** {6 Scan functions} *) | |
52 | ||
53 | val scan_sexp : ?buf : Buffer.t -> lexbuf -> t | |
54 | (** [scan_sexp ?buf lexbuf] scans an S-expression from lex buffer | |
55 | [lexbuf] using the optional string buffer [buf] for storing | |
56 | intermediate strings. *) | |
57 | ||
58 | val scan_sexps : ?buf : Buffer.t -> lexbuf -> t list | |
59 | (** [scan_sexps ?buf lexbuf] reads a list of whitespace separated | |
60 | S-expressions from lex buffer [lexbuf] using the optional string | |
61 | buffer [buf] for storing intermediate strings. *) | |
62 | ||
63 | val scan_iter_sexps : ?buf : Buffer.t -> f : (t -> unit) -> lexbuf -> unit | |
64 | (** [scan_iter_sexps ?buf ~f lexbuf] iterates over all whitespace | |
65 | separated S-expressions scanned from lex buffer [lexbuf] using | |
66 | function [f], and the optional string buffer [buf] for storing | |
67 | intermediate strings. *) | |
68 | ||
69 | val scan_fold_sexps : | |
70 | ?buf : Buffer.t -> f : (t -> 'a -> 'a) -> init : 'a -> lexbuf -> 'a | |
71 | (** [scan_fold_sexps ?buf ~f ~init lexbuf] folds over all whitespace | |
72 | separated S-expressions scanned from lex buffer [lexbuf] using | |
73 | function [f], initial state [init], and the optional string buffer | |
74 | [buf] for storing intermediate strings. *) | |
75 | ||
76 | val scan_cnv_sexps : ?buf : Buffer.t -> f : (t -> 'a) -> lexbuf -> 'a list | |
77 | (** [scan_cnv_sexps ?buf ~f lexbuf] maps all whitespace separated | |
78 | S-expressions scanned from lex buffer [lexbuf] to some list using | |
79 | function [f], and the optional string buffer [buf] for storing | |
80 | intermediate strings. *) | |
81 | ||
82 | ||
83 | (** {6 (Partial) parsing} *) | |
84 | ||
85 | (** Position information after complete parse *) | |
86 | type parse_pos = Pre_sexp.parse_pos = | |
87 | private | |
88 | { | |
89 | mutable text_line : int; (** Line position in parsed text *) | |
90 | mutable text_char : int; (** Character position in parsed text *) | |
91 | mutable buf_pos : int; (** Reading position in text buffer *) | |
92 | } | |
93 | ||
94 | (** Type of result from calling {!Sexp.parse}. *) | |
95 | type 'a parse_result = 'a Pre_sexp.parse_result = | |
96 | | Done of t * parse_pos (** [Done (sexp, parse_pos)] finished | |
97 | parsing an S-expression. Current parse | |
98 | position is [parse_pos]. *) | |
99 | | Cont of bool * 'a parse_fun (** [Cont (ws_only, parse_fun)] met the | |
100 | end of input before completely | |
101 | parsing an S-expression. The user | |
102 | has to call [parse_fun] to continue | |
103 | parsing the S-expression in another | |
104 | buffer. If [ws_only] is true, only | |
105 | whitespace has been parsed so far (or | |
106 | comments!). NOTE: the continuation | |
107 | may only be called once! *) | |
108 | ||
109 | and 'a parse_fun = pos : int -> len : int -> 'a -> 'a parse_result | |
110 | (** Type of parsing functions with given offsets and lengths. *) | |
111 | ||
112 | (** Type of state maintained during parsing *) | |
113 | type parse_state = Pre_sexp.parse_state = | |
114 | private | |
115 | { | |
116 | parse_pos : parse_pos; (** Current parse position *) | |
117 | mutable pstack : t list list; (** Stack of found S-expression lists *) | |
118 | pbuf : Buffer.t; (** Current atom buffer *) | |
119 | } | |
120 | ||
121 | (** Type of parse errors *) | |
122 | type parse_error = Pre_sexp.parse_error = | |
123 | { | |
124 | location : string; (** Function in which the parse failed *) | |
125 | err_msg : string; (** Reason why parsing failed *) | |
126 | parse_state : parse_state; (** State of parser *) | |
127 | } | |
128 | ||
129 | (** Exception raised during partial parsing *) | |
130 | exception ParseError of parse_error | |
131 | ||
132 | val parse : | |
133 | ?text_line : int -> ?text_char : int -> ?pos : int -> ?len : int -> string | |
134 | -> string parse_result | |
135 | (** [parse ?text_line ?text_char ?pos ?len str] (partially) parses an | |
136 | S-expression in string buffer [str] starting at position [pos] | |
137 | and reading at most [len] characters. The text position can be | |
138 | initialized with [text_line] and [text_char]. To parse a single | |
139 | atom that is not delimited by whitespace it is necessary to call | |
140 | this function a second time with the returned continuation, and a | |
141 | dummy buffer that contains whitespace. | |
142 | ||
143 | @param text_line default = 1 | |
144 | @param text_char default = 1 | |
145 | @param pos default = 0 | |
146 | @param len default = [String.length str - pos] | |
147 | *) | |
148 | ||
149 | open Bigarray | |
150 | ||
151 | type bstr = (char, int8_unsigned_elt, c_layout) Array1.t | |
152 | ||
153 | val parse_bstr : | |
154 | ?text_line : int -> ?text_char : int -> ?pos : int -> ?len : int -> bstr | |
155 | -> bstr parse_result | |
156 | (** [parse_bstr ?text_line ?text_char ?pos ?len str] same as [parse], | |
157 | but operates on bigstrings. *) | |
158 | ||
159 | val input_sexp : | |
160 | ?text_line : int -> ?text_char : int -> ?buf_pos : int -> in_channel -> t | |
161 | (** [input_sexp ?text_line ?text_char ?buf_pos ic] parses an S-expression | |
162 | from input channel [ic] using initial position information | |
163 | [text_line], [text_char], and [buf_pos]. NOTE: this function is not | |
164 | as fast on files as {!Sexp.load_sexp}, and is also slightly slower | |
165 | than the scan-functions. But it is guaranteed that [input_sexp] | |
166 | is only going to read data parseable as an S-expression. Thus, | |
167 | subsequent input functions will see the data immediately following it. | |
168 | ||
169 | @param text_line default = [1] | |
170 | @param text_char default = [1] | |
171 | @param buf_pos default = [0] | |
172 | *) | |
173 | ||
174 | val input_sexps : | |
175 | ?text_line : int -> ?text_char : int -> ?buf_pos : int -> | |
176 | ?buf : string -> in_channel -> t list | |
177 | (** [input_sexps ?text_line ?text_char ?buf_pos ??buf ic] parses | |
178 | whitespace separated S-expressions from input channel [ic] until | |
179 | EOF is reached. Faster than the scan-functions. NOTE: [buf_pos] | |
180 | is the initial global buffer position used for locating errors and | |
181 | does not refer to [buf]. | |
182 | ||
183 | @param text_line default = [1] | |
184 | @param text_char default = [1] | |
185 | @param buf_pos default = [0] | |
186 | *) | |
187 | ||
188 | val input_rev_sexps : | |
189 | ?text_line : int -> ?text_char : int -> ?buf_pos : int -> | |
190 | ?buf : string -> in_channel -> t list | |
191 | (** [input_rev_sexps ?buf ic] same as {!Sexp.input_sexps}, but returns a | |
192 | reversed list of S-expressions, which is slightly more efficient. *) | |
193 | ||
194 | ||
195 | (** {6 Loading} *) | |
196 | ||
197 | val load_sexp : ?buf : string -> string -> t | |
198 | (** [load_sexp ?buf file] reads one S-expression from file [file] using | |
199 | buffer [buf] for storing intermediate data. Ignores any trailing | |
200 | data. Faster than the scan-functions. | |
201 | ||
202 | @raise ParseError if the S-expression is unparseable. | |
203 | @raise End_of_file if no S-expression could be read. | |
204 | *) | |
205 | ||
206 | val load_sexps : ?buf : string -> string -> t list | |
207 | (** [load_sexps file] reads a list of whitespace separated S-expressions | |
208 | from file [file] using buffer [buf] for storing intermediate data. | |
209 | Faster than the scan-functions. | |
210 | ||
211 | @raise ParseError if there is unparseable data in the file. | |
212 | @raise End_of_file if the last S-expression is incomplete. | |
213 | *) | |
214 | ||
215 | val load_rev_sexps : ?buf : string -> string -> t list | |
216 | (** [load_rev_sexps file] same as {!Sexp.load_sexps}, but returns a | |
217 | reversed list of S-expressions, which is slightly more efficient. *) | |
218 | ||
219 | ||
220 | (** {6 Output of S-expressions to I/O-channels} *) | |
221 | ||
222 | val output_hum : out_channel -> t -> unit | |
223 | (** [output_hum oc sexp] outputs S-expression [sexp] to output channel | |
224 | [oc] in human readable form. *) | |
225 | ||
226 | val output_hum_indent : int -> out_channel -> t -> unit | |
227 | (** [output_hum_indent indent oc sexp] outputs S-expression [sexp] | |
228 | to output channel [oc] in human readable form using indentation level | |
229 | [indent]. | |
230 | *) | |
231 | ||
232 | val output_mach : out_channel -> t -> unit | |
233 | (** [output_mach oc sexp] outputs S-expression [sexp] to output channel | |
234 | [oc] in machine readable (i.e. most compact) form. *) | |
235 | ||
236 | val output : out_channel -> t -> unit | |
237 | (** [output oc sexp] same as [output_mach]. *) | |
238 | ||
239 | ||
240 | (** {6 Output of S-expressions to formatters} *) | |
241 | ||
242 | val pp_hum : formatter -> t -> unit | |
243 | (** [pp_hum ppf sexp] outputs S-expression [sexp] to formatter [ppf] | |
244 | in human readable form. *) | |
245 | ||
246 | val pp_hum_indent : int -> formatter -> t -> unit | |
247 | (** [pp_hum_indent n ppf sexp] outputs S-expression [sexp] to formatter | |
248 | [ppf] in human readable form and indentation level [n]. *) | |
249 | ||
250 | val pp_mach : formatter -> t -> unit | |
251 | (** [pp_mach ppf sexp] outputs S-expression [sexp] to formatter [ppf] | |
252 | in machine readable (i.e. most compact) form. *) | |
253 | ||
254 | val pp : formatter -> t -> unit | |
255 | (** [pp ppf sexp] same as [pp_mach]. *) | |
256 | ||
257 | ||
258 | (** {6 String and bigstring conversions} *) | |
259 | ||
260 | val of_string : string -> t | |
261 | (** [of_string str] converts string [str] to an S-expression. *) | |
262 | ||
263 | val of_bstr : bstr -> t | |
264 | (** [of_bstr bstr] converts bigstring [bstr] to an S-expression. *) | |
265 | ||
266 | val to_string_hum : ?indent : int -> t -> string | |
267 | (** [to_string_hum ?indent sexp] converts S-expression [sexp] to a | |
268 | string in human readable form with indentation level [indent]. | |
269 | ||
270 | @param indent default = [!default_indent] | |
271 | *) | |
272 | ||
273 | val to_string_mach : t -> string | |
274 | (** [to_string_mach sexp] converts S-expression [sexp] to a string in | |
275 | machine readable (i.e. most compact) form. *) | |
276 | ||
277 | val to_string : t -> string | |
278 | (** [to_string sexp] same as [to_string_mach]. *) | |
279 | ||
280 | ||
281 | (** {6 Buffer conversions} *) | |
282 | ||
283 | val to_buffer_hum : buf : Buffer.t -> ?indent : int -> t -> unit | |
284 | (** [to_buffer_hum ~buf ?indent sexp] outputs the S-expression [sexp] | |
285 | converted to a string in human readable form to buffer [buf]. | |
286 | ||
287 | @param indent default = [!default_indent] | |
288 | *) | |
289 | ||
290 | val to_buffer_mach : buf : Buffer.t -> t -> unit | |
291 | (** [to_buffer_mach ~buf sexp] outputs the S-expression [sexp] converted | |
292 | to a string in machine readable (i.e. most compact) form to buffer [buf]. | |
293 | *) | |
294 | ||
295 | val to_buffer : buf : Buffer.t -> t -> unit | |
296 | (** [to_buffer ~buf sexp] same as {!to_buffer_mach}. *) | |
297 | ||
298 | ||
299 | (** {6 Utilities for automated type conversions} *) | |
300 | ||
301 | val unit : t | |
302 | (** [unit] the unit-value as expressed by an S-expression. *) | |
303 | ||
304 | external sexp_of_t : t -> t = "%identity" | |
305 | (** [sexp_of_t sexp] maps S-expressions which are part of a type with | |
306 | automated S-expression conversion to themselves. *) | |
307 | ||
308 | external t_of_sexp : t -> t = "%identity" | |
309 | (** [t_of_sexp sexp] maps S-expressions which are part of a type with | |
310 | automated S-expression conversion to themselves. *) | |
311 | ||
312 | end |