6 Jane Street Holding, LLC
8 email: mmottl@janestcapital.com
9 WWW: http://www.janestcapital.com/ocaml
11 This library is free software; you can redistribute it and/or
12 modify it under the terms of the GNU Lesser General Public
13 License as published by the Free Software Foundation; either
14 version 2 of the License, or (at your option) any later version.
16 This library is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License for more details.
21 You should have received a copy of the GNU Lesser General Public
22 License along with this library; if not, write to the Free Software
23 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 (** Lexer: Lexer Specification for S-expressions *)
32 let char_for_backslash = function
39 let double_nl = "\013\010"
41 let dec_code c1 c2 c3 =
42 100 * (Char.code c1 - 48) + 10 * (Char.code c2 - 48) + (Char.code c3 - 48)
45 let d1 = Char.code c1 in
47 if d1 >= 97 then d1 - 87
48 else if d1 >= 65 then d1 - 55
50 let d2 = Char.code c2 in
52 if d2 >= 97 then d2 - 87
53 else if d2 >= 65 then d2 - 55
57 let found_newline lexbuf diff =
58 let curr_p = lexbuf.lex_curr_p in
62 pos_lnum = curr_p.pos_lnum + 1;
63 pos_bol = max 1 (curr_p.pos_cnum - diff);
66 let get_lexeme_len lexbuf = lexbuf.lex_curr_pos - lexbuf.lex_start_pos
69 let newline = ('\010' | '\013' | "\013\010")
70 let space = [' ' '\009' '\012']
71 let whitespace = [' ' '\010' '\013' '\009' '\012']
72 let backslash_escapes = ['\\' '"' '\'' 'n' 't' 'b' 'r']
75 | newline { found_newline lexbuf 1; main buf lexbuf }
76 | space+ { main buf lexbuf }
77 | ';' [^ '\n' '\r']+ { main buf lexbuf }
82 scan_string buf lexbuf;
83 let str = Buffer.contents buf in
87 | ([^ ';' '(' ')' '"'] # whitespace)+ as str { STRING str }
90 and scan_string buf = parse
92 | '\\' ['\010' '\013'] [' ' '\009']*
94 let len = get_lexeme_len lexbuf in
95 found_newline lexbuf (len - 2);
96 scan_string buf lexbuf
98 | '\\' "\013\010" [' ' '\009']*
100 let len = get_lexeme_len lexbuf in
101 found_newline lexbuf (len - 3);
102 scan_string buf lexbuf
104 | '\\' (backslash_escapes as c)
106 Buffer.add_char buf (char_for_backslash c);
107 scan_string buf lexbuf
109 | '\\' (['0'-'9'] as c1) (['0'-'9'] as c2) (['0'-'9'] as c3)
111 let v = dec_code c1 c2 c3 in
113 let pos = lexbuf.lex_curr_p in
116 "Sexplib.Lexer.scan_string: \
117 illegal escape at line %d char %d: `\\%c%c%c'"
118 pos.pos_lnum (pos.pos_cnum - pos.pos_bol - 3)
121 Buffer.add_char buf (Char.chr v);
122 scan_string buf lexbuf
124 | '\\' 'x' (['0'-'9' 'a'-'f' 'A'-'F'] as c1) (['0'-'9' 'a'-'f' 'A'-'F'] as c2)
126 let v = hex_code c1 c2 in
128 let pos = lexbuf.lex_curr_p in
131 "Sexplib.Lexer.scan_string: \
132 illegal escape at line %d char %d: `\\x%c%c'"
133 pos.pos_lnum (pos.pos_cnum - pos.pos_bol - 3)
136 Buffer.add_char buf (Char.chr v);
137 scan_string buf lexbuf
141 Buffer.add_char buf '\\';
142 Buffer.add_char buf c;
143 scan_string buf lexbuf
145 | ['\010' '\013'] as c
147 found_newline lexbuf 1;
148 Buffer.add_char buf c;
149 scan_string buf lexbuf
153 found_newline lexbuf 2;
154 Buffer.add_string buf double_nl;
155 scan_string buf lexbuf
159 let ofs = lexbuf.lex_start_pos in
160 let len = lexbuf.lex_curr_pos - ofs in
161 Buffer.add_substring buf lexbuf.lex_buffer ofs len;
162 scan_string buf lexbuf
164 | eof { failwith "Sexplib.Lexer.scan_string: unterminated string" }
170 | None -> Buffer.create 64
171 | Some buf -> Buffer.clear buf; buf