938a5d2cf46626573e181d3e2ad281c60a669197
[bpt/guile.git] / libguile / c-tokenize.lex
1 %option noyywrap
2 %option nounput
3 %pointer
4
5 EOL \n
6 SPACE [ \t\v\f]
7 WS [ \t\v\n\f]
8 DIGIT [0-9]
9 LETTER [a-zA-Z_]
10 OCTDIGIT [0-7]
11 HEXDIGIT [a-fA-F0-9]
12 EXPONENT [Ee][+-]?{DIGIT}+
13 FLOQUAL (f|F|l|L)
14 INTQUAL (l|L|ll|LL|lL|Ll|u|U)
15
16 %{
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21
22 /* Prevent compilation of static input() function in generated scanner
23 code. This function is never actually used, and GCC 4.3 will emit
24 an error for that. */
25 #define YY_NO_INPUT
26
27 int yylex(void);
28
29 int yyget_lineno (void);
30 FILE *yyget_in (void);
31 FILE *yyget_out (void);
32 int yyget_leng (void);
33 char *yyget_text (void);
34 void yyset_lineno (int line_number);
35 void yyset_in (FILE * in_str);
36 void yyset_out (FILE * out_str);
37 int yyget_debug (void);
38 void yyset_debug (int bdebug);
39 int yylex_destroy (void);
40
41 int filter_snarfage = 0;
42 int print = 1;
43
44 enum t_state {
45 SKIP,
46 MULTILINE,
47 MULTILINE_COOKIE,
48 COOKIE
49 };
50
51 enum t_state state = SKIP;
52 int cookie_was_last = 0;
53
54 #define OUT_RAW(type,text) if (print) printf ("(%s . \"%s\")\n", #type, text)
55
56 #define OUT_T(type) OUT_RAW (type, yytext)
57 #define OUT_S if (print) printf ("%s\n", yytext)
58 #define OUT(type) if (print) printf ("%s\n", #type)
59
60 #define IS_COOKIE cookie_was_last = 1
61 #define IS_NOT_COOKIE cookie_was_last = 0
62
63 %}
64
65 %%
66
67 \/\*(\n|[^*]|\*[^/])*\*\/ { OUT_T (comment); }
68
69 ({SPACE}*(\\\n)*{SPACE}*)+ ;
70
71 ({SPACE}*\n*{SPACE}*)+ { OUT(eol); }
72
73 #.*\n { OUT(hash); IS_NOT_COOKIE; }
74
75 {LETTER}({LETTER}|{DIGIT})* { OUT_T (id); IS_NOT_COOKIE; }
76
77 0[xX]{HEXDIGIT}+{INTQUAL}? { OUT_RAW (int_hex, yytext + 2); IS_NOT_COOKIE; }
78 0{OCTDIGIT}+{INTQUAL}? { OUT_RAW (int_oct, yytext + 1); IS_NOT_COOKIE; }
79 {DIGIT}+{INTQUAL}? { OUT_T (int_dec); IS_NOT_COOKIE; }
80
81 L?\'(\\.|[^\\\'])+\' { OUT_T (char); IS_NOT_COOKIE; }
82
83 {DIGIT}+{EXPONENT}{FLOQUAL}? { OUT_T (flo_dec); IS_NOT_COOKIE; }
84 {DIGIT}*"."{DIGIT}+({EXPONENT})?{FLOQUAL}? { OUT_T (flo_dec); IS_NOT_COOKIE; }
85 {DIGIT}+"."{DIGIT}*({EXPONENT})?{FLOQUAL}? { OUT_T (flo_dec); IS_NOT_COOKIE; }
86
87 L?\"(\\.|[^\\\"])*\" { OUT_S; IS_NOT_COOKIE; }
88
89 "..." { OUT (ellipsis); IS_NOT_COOKIE; }
90
91 ">>=" { OUT (shift_right_assign); IS_NOT_COOKIE; }
92 "<<=" { OUT (shift_left_assign); IS_NOT_COOKIE; }
93 "+=" { OUT (add_assign); IS_NOT_COOKIE; }
94 "-=" { OUT (sub_assign); IS_NOT_COOKIE; }
95 "*=" { OUT (mul-assign); IS_NOT_COOKIE; }
96 "/=" { OUT (div_assign); IS_NOT_COOKIE; }
97 "%=" { OUT (mod_assign); IS_NOT_COOKIE; }
98 "&=" { OUT (logand_assign); IS_NOT_COOKIE; }
99 "^=" { OUT (logxor_assign); IS_NOT_COOKIE; }
100 "|=" { OUT (logior_assign); IS_NOT_COOKIE; }
101 ">>" { OUT (right_shift); IS_NOT_COOKIE; }
102 "<<" { OUT (left_shift); IS_NOT_COOKIE; }
103 "++" { OUT (inc); IS_NOT_COOKIE; }
104 "--" { OUT (dec); IS_NOT_COOKIE; }
105 "->" { OUT (ptr); IS_NOT_COOKIE; }
106 "&&" { OUT (and); IS_NOT_COOKIE; }
107 "||" { OUT (or); IS_NOT_COOKIE; }
108 "<=" { OUT (le); IS_NOT_COOKIE; }
109 ">=" { OUT (ge); IS_NOT_COOKIE; }
110 "==" { OUT (eq); IS_NOT_COOKIE; }
111 "!=" { OUT (ne); IS_NOT_COOKIE; }
112 ";" { OUT (semicolon); IS_NOT_COOKIE; }
113
114 ("{"|"<%") {
115 OUT (brace_open);
116 if (filter_snarfage && cookie_was_last && state == COOKIE)
117 state = MULTILINE;
118 IS_NOT_COOKIE; }
119
120 ("}"|"%>") {
121 OUT (brace_close);
122 if (filter_snarfage && cookie_was_last && state == MULTILINE_COOKIE) {
123 state = SKIP;
124 print = 0;
125 }
126 IS_NOT_COOKIE; }
127
128 "," { OUT (comma); IS_NOT_COOKIE; }
129 ":" { OUT (colon); IS_NOT_COOKIE; }
130 "=" { OUT (assign); IS_NOT_COOKIE; }
131 "(" { OUT (paren_open); IS_NOT_COOKIE; }
132 ")" { OUT (paren_close); IS_NOT_COOKIE; }
133 ("["|"<:") { OUT (bracket_open); IS_NOT_COOKIE; }
134 ("]"|":>") { OUT (bracket_close); IS_NOT_COOKIE; }
135 "." { OUT (dot); IS_NOT_COOKIE; }
136 "&" { OUT (amp); IS_NOT_COOKIE; }
137 "!" { OUT (bang); IS_NOT_COOKIE; }
138 "~" { OUT (tilde); IS_NOT_COOKIE; }
139 "-" { OUT (minus); IS_NOT_COOKIE; }
140 "+" { OUT (plus); IS_NOT_COOKIE; }
141 "*" { OUT (star); IS_NOT_COOKIE; }
142 "/" { OUT (slash); IS_NOT_COOKIE; }
143 "%" { OUT (percent); IS_NOT_COOKIE; }
144 "<" { OUT (lt); IS_NOT_COOKIE; }
145 ">" { OUT (gt); IS_NOT_COOKIE; }
146
147 \^{WS}*\^ {
148 if (filter_snarfage)
149 switch (state) {
150 case SKIP:
151 state = COOKIE;
152 print = 1;
153 OUT (snarf_cookie);
154 break;
155 case MULTILINE:
156 case MULTILINE_COOKIE:
157 state = MULTILINE_COOKIE;
158 OUT (snarf_cookie);
159 break;
160 case COOKIE:
161 state = SKIP;
162 OUT (snarf_cookie);
163 print = 0;
164 break;
165 default:
166 /* whoops */
167 abort ();
168 break;
169 }
170 else
171 OUT (snarf_cookie);
172
173 IS_COOKIE; }
174
175 "^" { OUT (caret); IS_NOT_COOKIE; }
176 "|" { OUT (pipe); IS_NOT_COOKIE; }
177 "?" { OUT (question); IS_NOT_COOKIE; }
178
179 . { fprintf (stderr, "*%s", yytext); fflush (stderr); IS_NOT_COOKIE; }
180
181 %%
182
183 int
184 main (int argc, char *argv[])
185 {
186 if (argc > 1 && !strcmp (argv[1], "--filter-snarfage")) {
187 filter_snarfage = 1;
188 print = 0;
189 }
190
191 yylex ();
192
193 return EXIT_SUCCESS;
194 }
195
196 /*
197 Local Variables:
198 c-file-style: "gnu"
199 End:
200 */