Coccinelle release 1.0.0-rc12
[bpt/coccinelle.git] / bundles / sexplib / sexplib-7.0.5 / lib / lexer.mll
CommitLineData
b1b2de81 1{
feec80c3
C
2 (*****************************************************************************
3 * Sexplib *
4 * *
5 * Copyright (C) 2005- Jane Street Holding, LLC *
6 * Contact: opensource@janestreet.com *
7 * WWW: http://www.janestreet.com/ocaml *
8 * Author: Markus Mottl *
9 * *
10 * This library is free software; you can redistribute it and/or *
11 * modify it under the terms of the GNU Lesser General Public *
12 * License as published by the Free Software Foundation; either *
13 * version 2 of the License, or (at your option) any later version. *
14 * *
15 * This library is distributed in the hope that it will be useful, *
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
18 * Lesser General Public License for more details. *
19 * *
20 * You should have received a copy of the GNU Lesser General Public *
21 * License along with this library; if not, write to the Free Software *
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *
23 * *
24 *****************************************************************************)
b1b2de81
C
25
26 (** Lexer: Lexer Specification for S-expressions *)
27
28 open Printf
29 open Lexing
30 open Parser
31
32 let char_for_backslash = function
33 | 'n' -> '\n'
34 | 't' -> '\t'
35 | 'b' -> '\b'
36 | 'r' -> '\r'
37 | c -> c
38
39 let double_nl = "\013\010"
40
41 let dec_code c1 c2 c3 =
42 100 * (Char.code c1 - 48) + 10 * (Char.code c2 - 48) + (Char.code c3 - 48)
43
44 let hex_code c1 c2 =
45 let d1 = Char.code c1 in
46 let val1 =
47 if d1 >= 97 then d1 - 87
48 else if d1 >= 65 then d1 - 55
49 else d1 - 48 in
50 let d2 = Char.code c2 in
51 let val2 =
52 if d2 >= 97 then d2 - 87
53 else if d2 >= 65 then d2 - 55
54 else d2 - 48 in
55 val1 * 16 + val2
56
57 let found_newline lexbuf diff =
58 let curr_p = lexbuf.lex_curr_p in
59 lexbuf.lex_curr_p <-
60 {
61 curr_p with
62 pos_lnum = curr_p.pos_lnum + 1;
63 pos_bol = max 1 (curr_p.pos_cnum - diff);
64 }
65
66 let get_lexeme_len lexbuf = lexbuf.lex_curr_pos - lexbuf.lex_start_pos
67}
68
69let newline = ('\010' | '\013' | "\013\010")
70let space = [' ' '\009' '\012']
71let whitespace = [' ' '\010' '\013' '\009' '\012']
72let backslash_escapes = ['\\' '"' '\'' 'n' 't' 'b' 'r']
73
74rule main buf = parse
75 | newline { found_newline lexbuf 1; main buf lexbuf }
feec80c3 76 | space+ | ';' [^ '\n' '\r']* { main buf lexbuf }
b1b2de81
C
77 | '(' { LPAREN }
78 | ')' { RPAREN }
79 | '"'
80 {
81 scan_string buf lexbuf;
82 let str = Buffer.contents buf in
83 Buffer.clear buf;
84 STRING str
85 }
86 | ([^ ';' '(' ')' '"'] # whitespace)+ as str { STRING str }
87 | eof { EOF }
88
89and scan_string buf = parse
90 | '"' { () }
91 | '\\' ['\010' '\013'] [' ' '\009']*
92 {
93 let len = get_lexeme_len lexbuf in
94 found_newline lexbuf (len - 2);
95 scan_string buf lexbuf
96 }
97 | '\\' "\013\010" [' ' '\009']*
98 {
99 let len = get_lexeme_len lexbuf in
100 found_newline lexbuf (len - 3);
101 scan_string buf lexbuf
102 }
103 | '\\' (backslash_escapes as c)
104 {
105 Buffer.add_char buf (char_for_backslash c);
106 scan_string buf lexbuf
107 }
108 | '\\' (['0'-'9'] as c1) (['0'-'9'] as c2) (['0'-'9'] as c3)
109 {
110 let v = dec_code c1 c2 c3 in
111 if v > 255 then (
112 let pos = lexbuf.lex_curr_p in
113 let msg =
114 sprintf
115 "Sexplib.Lexer.scan_string: \
116 illegal escape at line %d char %d: `\\%c%c%c'"
117 pos.pos_lnum (pos.pos_cnum - pos.pos_bol - 3)
118 c1 c2 c3 in
119 failwith msg);
120 Buffer.add_char buf (Char.chr v);
121 scan_string buf lexbuf
122 }
123 | '\\' 'x' (['0'-'9' 'a'-'f' 'A'-'F'] as c1) (['0'-'9' 'a'-'f' 'A'-'F'] as c2)
124 {
125 let v = hex_code c1 c2 in
126 if v > 255 then (
127 let pos = lexbuf.lex_curr_p in
128 let msg =
129 sprintf
130 "Sexplib.Lexer.scan_string: \
131 illegal escape at line %d char %d: `\\x%c%c'"
132 pos.pos_lnum (pos.pos_cnum - pos.pos_bol - 3)
133 c1 c2 in
134 failwith msg);
135 Buffer.add_char buf (Char.chr v);
136 scan_string buf lexbuf
137 }
138 | '\\' (_ as c)
139 {
140 Buffer.add_char buf '\\';
141 Buffer.add_char buf c;
142 scan_string buf lexbuf
143 }
144 | ['\010' '\013'] as c
145 {
146 found_newline lexbuf 1;
147 Buffer.add_char buf c;
148 scan_string buf lexbuf
149 }
150 | "\013\010"
151 {
152 found_newline lexbuf 2;
153 Buffer.add_string buf double_nl;
154 scan_string buf lexbuf
155 }
156 | [^ '\\' '"']+
157 {
158 let ofs = lexbuf.lex_start_pos in
159 let len = lexbuf.lex_curr_pos - ofs in
160 Buffer.add_substring buf lexbuf.lex_buffer ofs len;
161 scan_string buf lexbuf
162 }
163 | eof { failwith "Sexplib.Lexer.scan_string: unterminated string" }
164
165{
166 let main ?buf =
167 let buf =
168 match buf with
169 | None -> Buffer.create 64
170 | Some buf -> Buffer.clear buf; buf
171 in
172 main buf
173}