(scan_lisp_file): Don't add newline at end of string.
[bpt/emacs.git] / lib-src / make-docfile.c
1 /* Generate doc-string file for GNU Emacs from source files.
2 Copyright (C) 1985, 1986, 1992 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20 /* The arguments given to this program are all the C and Lisp source files
21 of GNU Emacs. .elc and .el and .c files are allowed.
22 A .o file can also be specified; the .c file it was made from is used.
23 This helps the makefile pass the correct list of files.
24
25 The results, which go to standard output or to a file
26 specified with -a or -o (-a to append, -o to start from nothing),
27 are entries containing function or variable names and their documentation.
28 Each entry starts with a ^_ character.
29 Then comes F for a function or V for a variable.
30 Then comes the function or variable name, terminated with a newline.
31 Then comes the documentation for that function or variable.
32 */
33
34 #include <stdio.h>
35
36 FILE *outfile;
37
38 main (argc, argv)
39 int argc;
40 char **argv;
41 {
42 int i;
43 int err_count = 0;
44
45 outfile = stdout;
46
47 /* If first two args are -o FILE, output to FILE. */
48 i = 1;
49 if (argc > i + 1 && !strcmp (argv[i], "-o"))
50 {
51 outfile = fopen (argv[i + 1], "w");
52 i += 2;
53 }
54 if (argc > i + 1 && !strcmp (argv[i], "-a"))
55 {
56 outfile = fopen (argv[i + 1], "a");
57 i += 2;
58 }
59 if (argc > i + 1 && !strcmp (argv[i], "-d"))
60 {
61 chdir (argv[i + 1]);
62 i += 2;
63 }
64
65 for (; i < argc; i++)
66 err_count += scan_file (argv[i]); /* err_count seems to be {mis,un}used */
67 #ifndef VMS
68 exit (err_count); /* see below - shane */
69 #endif /* VMS */
70 }
71
72 /* Read file FILENAME and output its doc strings to outfile. */
73 /* Return 1 if file is not found, 0 if it is found. */
74
75 scan_file (filename)
76 char *filename;
77 {
78 int len = strlen (filename);
79 if (!strcmp (filename + len - 4, ".elc"))
80 return scan_lisp_file (filename);
81 else if (!strcmp (filename + len - 3, ".el"))
82 return scan_lisp_file (filename);
83 else
84 return scan_c_file (filename);
85 }
86 \f
87 char buf[128];
88
89 /* Skip a C string from INFILE,
90 and return the character that follows the closing ".
91 If printflag is positive, output string contents to outfile.
92 If it is negative, store contents in buf.
93 Convert escape sequences \n and \t to newline and tab;
94 discard \ followed by newline. */
95
96 read_c_string (infile, printflag)
97 FILE *infile;
98 int printflag;
99 {
100 register int c;
101 char *p = buf;
102
103 c = getc (infile);
104 while (c != EOF)
105 {
106 while (c != '"' && c != EOF)
107 {
108 if (c == '\\')
109 {
110 c = getc (infile);
111 if (c == '\n')
112 {
113 c = getc (infile);
114 continue;
115 }
116 if (c == 'n')
117 c = '\n';
118 if (c == 't')
119 c = '\t';
120 }
121 if (printflag > 0)
122 putc (c, outfile);
123 else if (printflag < 0)
124 *p++ = c;
125 c = getc (infile);
126 }
127 c = getc (infile);
128 if (c != '"')
129 break;
130 /* If we had a "", concatenate the two strings. */
131 c = getc (infile);
132 }
133
134 if (printflag < 0)
135 *p = 0;
136
137 return c;
138 }
139 \f
140 /* Write to file OUT the argument names of the function whose text is in BUF.
141 MINARGS and MAXARGS are the minimum and maximum number of arguments. */
142
143 write_c_args (out, buf, minargs, maxargs)
144 FILE *out;
145 char *buf;
146 int minargs, maxargs;
147 {
148 register char *p;
149 int in_ident = 0;
150 int just_spaced = 0;
151
152 fprintf (out, "arguments: ");
153
154 for (p = buf; *p; p++)
155 {
156 char c = *p;
157 int ident_start = 0;
158
159 /* Notice when we start printing a new identifier. */
160 if ((('A' <= c && c <= 'Z')
161 || ('a' <= c && c <= 'z')
162 || ('0' <= c && c <= '9')
163 || c == '_')
164 != in_ident)
165 {
166 if (!in_ident)
167 {
168 in_ident = 1;
169 ident_start = 1;
170
171 if (minargs == 0 && maxargs > 0)
172 fprintf (out, "&optional ");
173 just_spaced = 1;
174
175 minargs--;
176 maxargs--;
177 }
178 else
179 in_ident = 0;
180 }
181
182 /* Print the C argument list as it would appear in lisp:
183 print underscores as hyphens, and print commas as spaces.
184 Collapse adjacent spaces into one. */
185 if (c == '_') c = '-';
186 if (c == ',') c = ' ';
187
188 /* In C code, `default' is a reserved word, so we spell it
189 `defalt'; unmangle that here. */
190 if (ident_start
191 && strncmp (p, "defalt", 6) == 0
192 && ! (('A' <= p[6] && p[6] <= 'Z')
193 || ('a' <= p[6] && p[6] <= 'z')
194 || ('0' <= p[6] && p[6] <= '9')
195 || p[6] == '_'))
196 {
197 fprintf (out, "default");
198 p += 5;
199 in_ident = 0;
200 just_spaced = 0;
201 }
202 else if (c != ' ' || ! just_spaced)
203 putc (c, out);
204
205 just_spaced = (c == ' ');
206 }
207 }
208 \f
209 /* Read through a c file. If a .o file is named,
210 the corresponding .c file is read instead.
211 Looks for DEFUN constructs such as are defined in ../src/lisp.h.
212 Accepts any word starting DEF... so it finds DEFSIMPLE and DEFPRED. */
213
214 scan_c_file (filename)
215 char *filename;
216 {
217 FILE *infile;
218 register int c;
219 register int commas;
220 register int defunflag;
221 register int defvarperbufferflag;
222 register int defvarflag;
223 int minargs, maxargs;
224
225 if (filename[strlen (filename) - 1] == 'o')
226 filename[strlen (filename) - 1] = 'c';
227
228 infile = fopen (filename, "r");
229
230 /* No error if non-ex input file */
231 if (infile == NULL)
232 {
233 perror (filename);
234 return 0;
235 }
236
237 c = '\n';
238 while (!feof (infile))
239 {
240 if (c != '\n')
241 {
242 c = getc (infile);
243 continue;
244 }
245 c = getc (infile);
246 if (c == ' ')
247 {
248 while (c == ' ')
249 c = getc (infile);
250 if (c != 'D')
251 continue;
252 c = getc (infile);
253 if (c != 'E')
254 continue;
255 c = getc (infile);
256 if (c != 'F')
257 continue;
258 c = getc (infile);
259 if (c != 'V')
260 continue;
261 c = getc (infile);
262 if (c != 'A')
263 continue;
264 c = getc (infile);
265 if (c != 'R')
266 continue;
267 c = getc (infile);
268 if (c != '_')
269 continue;
270
271 defvarflag = 1;
272 defunflag = 0;
273
274 c = getc (infile);
275 defvarperbufferflag = (c == 'P');
276
277 c = getc (infile);
278 }
279 else if (c == 'D')
280 {
281 c = getc (infile);
282 if (c != 'E')
283 continue;
284 c = getc (infile);
285 if (c != 'F')
286 continue;
287 c = getc (infile);
288 defunflag = c == 'U';
289 defvarflag = 0;
290 }
291 else continue;
292
293 while (c != '(')
294 {
295 if (c < 0)
296 goto eof;
297 c = getc (infile);
298 }
299
300 c = getc (infile);
301 if (c != '"')
302 continue;
303 c = read_c_string (infile, -1);
304
305 if (defunflag)
306 commas = 5;
307 else if (defvarperbufferflag)
308 commas = 2;
309 else if (defvarflag)
310 commas = 1;
311 else /* For DEFSIMPLE and DEFPRED */
312 commas = 2;
313
314 while (commas)
315 {
316 if (c == ',')
317 {
318 commas--;
319 if (defunflag && (commas == 1 || commas == 2))
320 {
321 do
322 c = getc (infile);
323 while (c == ' ' || c == '\n' || c == '\t');
324 if (c < 0)
325 goto eof;
326 ungetc (c, infile);
327 if (commas == 2) /* pick up minargs */
328 fscanf (infile, "%d", &minargs);
329 else /* pick up maxargs */
330 if (c == 'M' || c == 'U') /* MANY || UNEVALLED */
331 maxargs = -1;
332 else
333 fscanf (infile, "%d", &maxargs);
334 }
335 }
336 if (c < 0)
337 goto eof;
338 c = getc (infile);
339 }
340 while (c == ' ' || c == '\n' || c == '\t')
341 c = getc (infile);
342 if (c == '"')
343 c = read_c_string (infile, 0);
344 while (c != ',')
345 c = getc (infile);
346 c = getc (infile);
347 while (c == ' ' || c == '\n' || c == '\t')
348 c = getc (infile);
349
350 if (c == '"')
351 {
352 putc (037, outfile);
353 putc (defvarflag ? 'V' : 'F', outfile);
354 fprintf (outfile, "%s\n", buf);
355 c = read_c_string (infile, 1);
356
357 /* If this is a defun, find the arguments and print them. If
358 this function takes MANY or UNEVALLED args, then the C source
359 won't give the names of the arguments, so we shouldn't bother
360 trying to find them. */
361 if (defunflag && maxargs != -1)
362 {
363 char argbuf[1024], *p = argbuf;
364 while (c != ')')
365 {
366 if (c < 0)
367 goto eof;
368 c = getc (infile);
369 }
370 /* Skip into arguments. */
371 while (c != '(')
372 {
373 if (c < 0)
374 goto eof;
375 c = getc (infile);
376 }
377 /* Copy arguments into ARGBUF. */
378 *p++ = c;
379 do
380 *p++ = c = getc (infile);
381 while (c != ')');
382 *p = '\0';
383 /* Output them. */
384 fprintf (outfile, "\n\n");
385 write_c_args (outfile, argbuf, minargs, maxargs);
386 }
387 }
388 }
389 eof:
390 fclose (infile);
391 return 0;
392 }
393 \f
394 /* Read a file of Lisp code, compiled or interpreted.
395 Looks for
396 (defun NAME ARGS DOCSTRING ...)
397 (defmacro NAME ARGS DOCSTRING ...)
398 (autoload (quote NAME) FILE DOCSTRING ...)
399 (defvar NAME VALUE DOCSTRING)
400 (defconst NAME VALUE DOCSTRING)
401 (fset (quote NAME) (make-byte-code ... DOCSTRING ...))
402 (fset (quote NAME) #[... DOCSTRING ...])
403 (defalias (quote NAME) #[... DOCSTRING ...])
404 starting in column zero.
405 (quote NAME) may appear as 'NAME as well.
406 For defun, defmacro, and autoload, we know how to skip over the arglist.
407 For defvar, defconst, and fset we skip to the docstring with a kludgy
408 formatting convention: all docstrings must appear on the same line as the
409 initial open-paren (the one in column zero) and must contain a backslash
410 and a double-quote immediately after the initial double-quote. No newlines
411 must appear between the beginning of the form and the first double-quote.
412 The only source file that must follow this convention is loaddefs.el; aside
413 from that, it is always the .elc file that we look at, and they are no
414 problem because byte-compiler output follows this convention.
415 The NAME and DOCSTRING are output.
416 NAME is preceded by `F' for a function or `V' for a variable.
417 An entry is output only if DOCSTRING has \ newline just after the opening "
418 */
419
420 void
421 skip_white (infile)
422 FILE *infile;
423 {
424 char c = ' ';
425 while (c == ' ' || c == '\t' || c == '\n')
426 c = getc (infile);
427 ungetc (c, infile);
428 }
429
430 void
431 read_lisp_symbol (infile, buffer)
432 FILE *infile;
433 char *buffer;
434 {
435 char c;
436 char *fillp = buffer;
437
438 skip_white (infile);
439 while (1)
440 {
441 c = getc (infile);
442 if (c == '\\')
443 *(++fillp) = getc (infile);
444 else if (c == ' ' || c == '\t' || c == '\n' || c == '(' || c == ')')
445 {
446 ungetc (c, infile);
447 *fillp = 0;
448 break;
449 }
450 else
451 *fillp++ = c;
452 }
453
454 if (! buffer[0])
455 fprintf (stderr, "## expected a symbol, got '%c'\n", c);
456
457 skip_white (infile);
458 }
459
460
461 scan_lisp_file (filename)
462 char *filename;
463 {
464 FILE *infile;
465 register int c;
466
467 infile = fopen (filename, "r");
468 if (infile == NULL)
469 {
470 perror (filename);
471 return 0; /* No error */
472 }
473
474 c = '\n';
475 while (!feof (infile))
476 {
477 char buffer [BUFSIZ];
478 char *fillp = buffer;
479 char type;
480
481 if (c != '\n')
482 {
483 c = getc (infile);
484 continue;
485 }
486 c = getc (infile);
487 if (c != '(')
488 continue;
489
490 read_lisp_symbol (infile, buffer);
491
492 if (! strcmp (buffer, "defun") ||
493 ! strcmp (buffer, "defmacro"))
494 {
495 type = 'F';
496 read_lisp_symbol (infile, buffer);
497
498 /* Skip the arguments: either "nil" or a list in parens */
499
500 c = getc (infile);
501 if (c == 'n') /* nil */
502 {
503 if ((c = getc (infile)) != 'i' ||
504 (c = getc (infile)) != 'l')
505 {
506 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
507 buffer, filename);
508 continue;
509 }
510 }
511 else if (c != '(')
512 {
513 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
514 buffer, filename);
515 continue;
516 }
517 else
518 while (c != ')')
519 c = getc (infile);
520 skip_white (infile);
521
522 /* If the next three characters aren't `dquote bslash newline'
523 then we're not reading a docstring.
524 */
525 if ((c = getc (infile)) != '"' ||
526 (c = getc (infile)) != '\\' ||
527 (c = getc (infile)) != '\n')
528 {
529 #ifdef DEBUG
530 fprintf (stderr, "## non-docstring in %s (%s)\n",
531 buffer, filename);
532 #endif
533 continue;
534 }
535 }
536
537 else if (! strcmp (buffer, "defvar") ||
538 ! strcmp (buffer, "defconst"))
539 {
540 char c1 = 0, c2 = 0;
541 type = 'V';
542 read_lisp_symbol (infile, buffer);
543
544 /* Skip until the first newline; remember the two previous chars. */
545 while (c != '\n' && c >= 0)
546 {
547 c2 = c1;
548 c1 = c;
549 c = getc (infile);
550 }
551
552 /* If two previous characters were " and \,
553 this is a doc string. Otherwise, there is none. */
554 if (c2 != '"' || c1 != '\\')
555 {
556 #ifdef DEBUG
557 fprintf (stderr, "## non-docstring in %s (%s)\n",
558 buffer, filename);
559 #endif
560 continue;
561 }
562 }
563
564 else if (! strcmp (buffer, "fset") || ! strcmp (buffer, "defalias"))
565 {
566 char c1 = 0, c2 = 0;
567 type = 'F';
568
569 c = getc (infile);
570 if (c == '\'')
571 read_lisp_symbol (infile, buffer);
572 else
573 {
574 if (c != '(')
575 {
576 fprintf (stderr, "## unparsable name in fset in %s\n",
577 filename);
578 continue;
579 }
580 read_lisp_symbol (infile, buffer);
581 if (strcmp (buffer, "quote"))
582 {
583 fprintf (stderr, "## unparsable name in fset in %s\n",
584 filename);
585 continue;
586 }
587 read_lisp_symbol (infile, buffer);
588 c = getc (infile);
589 if (c != ')')
590 {
591 fprintf (stderr,
592 "## unparsable quoted name in fset in %s\n",
593 filename);
594 continue;
595 }
596 }
597
598 /* Skip until the first newline; remember the two previous chars. */
599 while (c != '\n' && c >= 0)
600 {
601 c2 = c1;
602 c1 = c;
603 c = getc (infile);
604 }
605
606 /* If two previous characters were " and \,
607 this is a doc string. Otherwise, there is none. */
608 if (c2 != '"' || c1 != '\\')
609 {
610 #ifdef DEBUG
611 fprintf (stderr, "## non-docstring in %s (%s)\n",
612 buffer, filename);
613 #endif
614 continue;
615 }
616 }
617
618 else if (! strcmp (buffer, "autoload"))
619 {
620 type = 'F';
621 c = getc (infile);
622 if (c == '\'')
623 read_lisp_symbol (infile, buffer);
624 else
625 {
626 if (c != '(')
627 {
628 fprintf (stderr, "## unparsable name in autoload in %s\n",
629 filename);
630 continue;
631 }
632 read_lisp_symbol (infile, buffer);
633 if (strcmp (buffer, "quote"))
634 {
635 fprintf (stderr, "## unparsable name in autoload in %s\n",
636 filename);
637 continue;
638 }
639 read_lisp_symbol (infile, buffer);
640 c = getc (infile);
641 if (c != ')')
642 {
643 fprintf (stderr,
644 "## unparsable quoted name in autoload in %s\n",
645 filename);
646 continue;
647 }
648 }
649 skip_white (infile);
650 if ((c = getc (infile)) != '\"')
651 {
652 fprintf (stderr, "## autoload of %s unparsable (%s)\n",
653 buffer, filename);
654 continue;
655 }
656 read_c_string (infile, 0);
657 skip_white (infile);
658
659 /* If the next three characters aren't `dquote bslash newline'
660 then we're not reading a docstring.
661 */
662 if ((c = getc (infile)) != '"' ||
663 (c = getc (infile)) != '\\' ||
664 (c = getc (infile)) != '\n')
665 {
666 #ifdef DEBUG
667 fprintf (stderr, "## non-docstring in %s (%s)\n",
668 buffer, filename);
669 #endif
670 continue;
671 }
672 }
673
674 #ifdef DEBUG
675 else if (! strcmp (buffer, "if") ||
676 ! strcmp (buffer, "byte-code"))
677 ;
678 #endif
679
680 else
681 {
682 #ifdef DEBUG
683 fprintf (stderr, "## unrecognised top-level form, %s (%s)\n",
684 buffer, filename);
685 #endif
686 continue;
687 }
688
689 /* At this point, there is a docstring that we should gobble.
690 The opening quote (and leading backslash-newline) have already
691 been read.
692 */
693 putc (037, outfile);
694 putc (type, outfile);
695 fprintf (outfile, "%s\n", buffer);
696 read_c_string (infile, 1);
697 }
698 fclose (infile);
699 return 0;
700 }