(scan_lisp_file): Handle dynamic doc strings.
[bpt/emacs.git] / lib-src / make-docfile.c
1 /* Generate doc-string file for GNU Emacs from source files.
2 Copyright (C) 1985, 1986, 1992, 1993, 1994 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20 /* The arguments given to this program are all the C and Lisp source files
21 of GNU Emacs. .elc and .el and .c files are allowed.
22 A .o file can also be specified; the .c file it was made from is used.
23 This helps the makefile pass the correct list of files.
24
25 The results, which go to standard output or to a file
26 specified with -a or -o (-a to append, -o to start from nothing),
27 are entries containing function or variable names and their documentation.
28 Each entry starts with a ^_ character.
29 Then comes F for a function or V for a variable.
30 Then comes the function or variable name, terminated with a newline.
31 Then comes the documentation for that function or variable.
32 */
33
34 #include <stdio.h>
35 #ifdef MSDOS
36 #include <fcntl.h>
37 #endif /* MSDOS */
38 #ifdef WINDOWSNT
39 #include <stdlib.h>
40 #include <fcntl.h>
41 #include <direct.h>
42 #endif /* WINDOWSNT */
43
44 #ifdef DOS_NT
45 #define READ_TEXT "rt"
46 #define READ_BINARY "rb"
47 #else /* not DOS_NT */
48 #define READ_TEXT "r"
49 #define READ_BINARY "r"
50 #endif /* not DOS_NT */
51
52 int scan_file ();
53 int scan_lisp_file ();
54 int scan_c_file ();
55
56 /* Stdio stream for output to the DOC file. */
57 FILE *outfile;
58
59 /* Name this program was invoked with. */
60 char *progname;
61
62 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
63
64 /* VARARGS1 */
65 void
66 error (s1, s2)
67 char *s1, *s2;
68 {
69 fprintf (stderr, "%s: ", progname);
70 fprintf (stderr, s1, s2);
71 fprintf (stderr, "\n");
72 }
73
74 /* Print error message and exit. */
75
76 /* VARARGS1 */
77 void
78 fatal (s1, s2)
79 char *s1, *s2;
80 {
81 error (s1, s2);
82 exit (1);
83 }
84
85 /* Like malloc but get fatal error if memory is exhausted. */
86
87 char *
88 xmalloc (size)
89 unsigned int size;
90 {
91 char *result = (char *) malloc (size);
92 if (result == NULL)
93 fatal ("virtual memory exhausted", 0);
94 return result;
95 }
96 \f
97 int
98 main (argc, argv)
99 int argc;
100 char **argv;
101 {
102 int i;
103 int err_count = 0;
104 int first_infile;
105
106 progname = argv[0];
107
108 /* Don't put CRs in the DOC file. */
109 #ifdef MSDOS
110 _fmode = O_BINARY;
111 (stdout)->_flag &= ~_IOTEXT;
112 _setmode (fileno (stdout), O_BINARY);
113 #endif /* MSDOS */
114 #ifdef WINDOWSNT
115 _fmode = O_BINARY;
116 _setmode (fileno (stdout), O_BINARY);
117 #endif /* WINDOWSNT */
118
119 outfile = stdout;
120
121 /* If first two args are -o FILE, output to FILE. */
122 i = 1;
123 if (argc > i + 1 && !strcmp (argv[i], "-o"))
124 {
125 outfile = fopen (argv[i + 1], "w");
126 i += 2;
127 }
128 if (argc > i + 1 && !strcmp (argv[i], "-a"))
129 {
130 outfile = fopen (argv[i + 1], "a");
131 i += 2;
132 }
133 if (argc > i + 1 && !strcmp (argv[i], "-d"))
134 {
135 chdir (argv[i + 1]);
136 i += 2;
137 }
138
139 first_infile = i;
140 for (; i < argc; i++)
141 {
142 int j;
143 /* Don't process one file twice. */
144 for (j = first_infile; j < i; j++)
145 if (! strcmp (argv[i], argv[j]))
146 break;
147 if (j == i)
148 err_count += scan_file (argv[i]);
149 }
150 #ifndef VMS
151 exit (err_count > 0);
152 #endif /* VMS */
153 return err_count > 0;
154 }
155
156 /* Read file FILENAME and output its doc strings to outfile. */
157 /* Return 1 if file is not found, 0 if it is found. */
158
159 int
160 scan_file (filename)
161 char *filename;
162 {
163 int len = strlen (filename);
164 if (!strcmp (filename + len - 4, ".elc"))
165 return scan_lisp_file (filename, READ_BINARY);
166 else if (!strcmp (filename + len - 3, ".el"))
167 return scan_lisp_file (filename, READ_TEXT);
168 else
169 return scan_c_file (filename, READ_TEXT);
170 }
171 \f
172 char buf[128];
173
174 /* Skip a C string from INFILE,
175 and return the character that follows the closing ".
176 If printflag is positive, output string contents to outfile.
177 If it is negative, store contents in buf.
178 Convert escape sequences \n and \t to newline and tab;
179 discard \ followed by newline. */
180
181 int
182 read_c_string (infile, printflag)
183 FILE *infile;
184 int printflag;
185 {
186 register int c;
187 char *p = buf;
188
189 c = getc (infile);
190 while (c != EOF)
191 {
192 while (c != '"' && c != EOF)
193 {
194 if (c == '\\')
195 {
196 c = getc (infile);
197 if (c == '\n')
198 {
199 c = getc (infile);
200 continue;
201 }
202 if (c == 'n')
203 c = '\n';
204 if (c == 't')
205 c = '\t';
206 }
207 if (printflag > 0)
208 putc (c, outfile);
209 else if (printflag < 0)
210 *p++ = c;
211 c = getc (infile);
212 }
213 c = getc (infile);
214 if (c != '"')
215 break;
216 /* If we had a "", concatenate the two strings. */
217 c = getc (infile);
218 }
219
220 if (printflag < 0)
221 *p = 0;
222
223 return c;
224 }
225 \f
226 /* Write to file OUT the argument names of function FUNC, whose text is in BUF.
227 MINARGS and MAXARGS are the minimum and maximum number of arguments. */
228
229 void
230 write_c_args (out, func, buf, minargs, maxargs)
231 FILE *out;
232 char *func, *buf;
233 int minargs, maxargs;
234 {
235 register char *p;
236 int in_ident = 0;
237 int just_spaced = 0;
238 int need_space = 1;
239
240 fprintf (out, "(%s", func);
241
242 if (*buf == '(')
243 ++buf;
244
245 for (p = buf; *p; p++)
246 {
247 char c = *p;
248 int ident_start = 0;
249
250 /* Notice when we start printing a new identifier. */
251 if ((('A' <= c && c <= 'Z')
252 || ('a' <= c && c <= 'z')
253 || ('0' <= c && c <= '9')
254 || c == '_')
255 != in_ident)
256 {
257 if (!in_ident)
258 {
259 in_ident = 1;
260 ident_start = 1;
261
262 if (need_space)
263 putc (' ', out);
264
265 if (minargs == 0 && maxargs > 0)
266 fprintf (out, "&optional ");
267 just_spaced = 1;
268
269 minargs--;
270 maxargs--;
271 }
272 else
273 in_ident = 0;
274 }
275
276 /* Print the C argument list as it would appear in lisp:
277 print underscores as hyphens, and print commas as spaces.
278 Collapse adjacent spaces into one. */
279 if (c == '_') c = '-';
280 if (c == ',') c = ' ';
281
282 /* In C code, `default' is a reserved word, so we spell it
283 `defalt'; unmangle that here. */
284 if (ident_start
285 && strncmp (p, "defalt", 6) == 0
286 && ! (('A' <= p[6] && p[6] <= 'Z')
287 || ('a' <= p[6] && p[6] <= 'z')
288 || ('0' <= p[6] && p[6] <= '9')
289 || p[6] == '_'))
290 {
291 fprintf (out, "DEFAULT");
292 p += 5;
293 in_ident = 0;
294 just_spaced = 0;
295 }
296 else if (c != ' ' || ! just_spaced)
297 {
298 if (c >= 'a' && c <= 'z')
299 /* Upcase the letter. */
300 c += 'A' - 'a';
301 putc (c, out);
302 }
303
304 just_spaced = (c == ' ');
305 need_space = 0;
306 }
307 }
308 \f
309 /* Read through a c file. If a .o file is named,
310 the corresponding .c file is read instead.
311 Looks for DEFUN constructs such as are defined in ../src/lisp.h.
312 Accepts any word starting DEF... so it finds DEFSIMPLE and DEFPRED. */
313
314 int
315 scan_c_file (filename, mode)
316 char *filename, *mode;
317 {
318 FILE *infile;
319 register int c;
320 register int commas;
321 register int defunflag;
322 register int defvarperbufferflag;
323 register int defvarflag;
324 int minargs, maxargs;
325
326 if (filename[strlen (filename) - 1] == 'o')
327 filename[strlen (filename) - 1] = 'c';
328
329 infile = fopen (filename, mode);
330
331 /* No error if non-ex input file */
332 if (infile == NULL)
333 {
334 perror (filename);
335 return 0;
336 }
337
338 c = '\n';
339 while (!feof (infile))
340 {
341 if (c != '\n')
342 {
343 c = getc (infile);
344 continue;
345 }
346 c = getc (infile);
347 if (c == ' ')
348 {
349 while (c == ' ')
350 c = getc (infile);
351 if (c != 'D')
352 continue;
353 c = getc (infile);
354 if (c != 'E')
355 continue;
356 c = getc (infile);
357 if (c != 'F')
358 continue;
359 c = getc (infile);
360 if (c != 'V')
361 continue;
362 c = getc (infile);
363 if (c != 'A')
364 continue;
365 c = getc (infile);
366 if (c != 'R')
367 continue;
368 c = getc (infile);
369 if (c != '_')
370 continue;
371
372 defvarflag = 1;
373 defunflag = 0;
374
375 c = getc (infile);
376 defvarperbufferflag = (c == 'P');
377
378 c = getc (infile);
379 }
380 else if (c == 'D')
381 {
382 c = getc (infile);
383 if (c != 'E')
384 continue;
385 c = getc (infile);
386 if (c != 'F')
387 continue;
388 c = getc (infile);
389 defunflag = c == 'U';
390 defvarflag = 0;
391 }
392 else continue;
393
394 while (c != '(')
395 {
396 if (c < 0)
397 goto eof;
398 c = getc (infile);
399 }
400
401 c = getc (infile);
402 if (c != '"')
403 continue;
404 c = read_c_string (infile, -1);
405
406 if (defunflag)
407 commas = 5;
408 else if (defvarperbufferflag)
409 commas = 2;
410 else if (defvarflag)
411 commas = 1;
412 else /* For DEFSIMPLE and DEFPRED */
413 commas = 2;
414
415 while (commas)
416 {
417 if (c == ',')
418 {
419 commas--;
420 if (defunflag && (commas == 1 || commas == 2))
421 {
422 do
423 c = getc (infile);
424 while (c == ' ' || c == '\n' || c == '\t');
425 if (c < 0)
426 goto eof;
427 ungetc (c, infile);
428 if (commas == 2) /* pick up minargs */
429 fscanf (infile, "%d", &minargs);
430 else /* pick up maxargs */
431 if (c == 'M' || c == 'U') /* MANY || UNEVALLED */
432 maxargs = -1;
433 else
434 fscanf (infile, "%d", &maxargs);
435 }
436 }
437 if (c < 0)
438 goto eof;
439 c = getc (infile);
440 }
441 while (c == ' ' || c == '\n' || c == '\t')
442 c = getc (infile);
443 if (c == '"')
444 c = read_c_string (infile, 0);
445 while (c != ',')
446 c = getc (infile);
447 c = getc (infile);
448 while (c == ' ' || c == '\n' || c == '\t')
449 c = getc (infile);
450
451 if (c == '"')
452 {
453 putc (037, outfile);
454 putc (defvarflag ? 'V' : 'F', outfile);
455 fprintf (outfile, "%s\n", buf);
456 c = read_c_string (infile, 1);
457
458 /* If this is a defun, find the arguments and print them. If
459 this function takes MANY or UNEVALLED args, then the C source
460 won't give the names of the arguments, so we shouldn't bother
461 trying to find them. */
462 if (defunflag && maxargs != -1)
463 {
464 char argbuf[1024], *p = argbuf;
465 while (c != ')')
466 {
467 if (c < 0)
468 goto eof;
469 c = getc (infile);
470 }
471 /* Skip into arguments. */
472 while (c != '(')
473 {
474 if (c < 0)
475 goto eof;
476 c = getc (infile);
477 }
478 /* Copy arguments into ARGBUF. */
479 *p++ = c;
480 do
481 *p++ = c = getc (infile);
482 while (c != ')');
483 *p = '\0';
484 /* Output them. */
485 fprintf (outfile, "\n\n");
486 write_c_args (outfile, buf, argbuf, minargs, maxargs);
487 }
488 }
489 }
490 eof:
491 fclose (infile);
492 return 0;
493 }
494 \f
495 /* Read a file of Lisp code, compiled or interpreted.
496 Looks for
497 (defun NAME ARGS DOCSTRING ...)
498 (defmacro NAME ARGS DOCSTRING ...)
499 (autoload (quote NAME) FILE DOCSTRING ...)
500 (defvar NAME VALUE DOCSTRING)
501 (defconst NAME VALUE DOCSTRING)
502 (fset (quote NAME) (make-byte-code ... DOCSTRING ...))
503 (fset (quote NAME) #[... DOCSTRING ...])
504 (defalias (quote NAME) #[... DOCSTRING ...])
505 starting in column zero.
506 (quote NAME) may appear as 'NAME as well.
507
508 We also look for #@LENGTH CONTENTS^_ at the beginning of the line.
509 When we find that, we save it for the following defining-form,
510 and we use that instead of reading a doc string within that defining-form.
511
512 For defun, defmacro, and autoload, we know how to skip over the arglist.
513 For defvar, defconst, and fset we skip to the docstring with a kludgy
514 formatting convention: all docstrings must appear on the same line as the
515 initial open-paren (the one in column zero) and must contain a backslash
516 and a double-quote immediately after the initial double-quote. No newlines
517 must appear between the beginning of the form and the first double-quote.
518 The only source file that must follow this convention is loaddefs.el; aside
519 from that, it is always the .elc file that we look at, and they are no
520 problem because byte-compiler output follows this convention.
521 The NAME and DOCSTRING are output.
522 NAME is preceded by `F' for a function or `V' for a variable.
523 An entry is output only if DOCSTRING has \ newline just after the opening "
524 */
525
526 void
527 skip_white (infile)
528 FILE *infile;
529 {
530 char c = ' ';
531 while (c == ' ' || c == '\t' || c == '\n')
532 c = getc (infile);
533 ungetc (c, infile);
534 }
535
536 void
537 read_lisp_symbol (infile, buffer)
538 FILE *infile;
539 char *buffer;
540 {
541 char c;
542 char *fillp = buffer;
543
544 skip_white (infile);
545 while (1)
546 {
547 c = getc (infile);
548 if (c == '\\')
549 *(++fillp) = getc (infile);
550 else if (c == ' ' || c == '\t' || c == '\n' || c == '(' || c == ')')
551 {
552 ungetc (c, infile);
553 *fillp = 0;
554 break;
555 }
556 else
557 *fillp++ = c;
558 }
559
560 if (! buffer[0])
561 fprintf (stderr, "## expected a symbol, got '%c'\n", c);
562
563 skip_white (infile);
564 }
565
566 int
567 scan_lisp_file (filename, mode)
568 char *filename, *mode;
569 {
570 FILE *infile;
571 register int c;
572 char *saved_string = 0;
573
574 infile = fopen (filename, mode);
575 if (infile == NULL)
576 {
577 perror (filename);
578 return 0; /* No error */
579 }
580
581 c = '\n';
582 while (!feof (infile))
583 {
584 char buffer[BUFSIZ];
585 char type;
586
587 if (c != '\n')
588 {
589 c = getc (infile);
590 continue;
591 }
592 c = getc (infile);
593 /* Detect a dynamic doc string and save it for the next expression. */
594 if (c == '#')
595 {
596 c = getc (infile);
597 if (c == '@')
598 {
599 int length = 0;
600 int i;
601
602 /* Read the length. */
603 while ((c = getc (infile),
604 c >= '0' && c <= '9'))
605 {
606 length *= 10;
607 length += c - '0';
608 }
609
610 /* The next character is a space that is counted in the length
611 but not part of the doc string.
612 We already read it, so just ignore it. */
613 length--;
614
615 /* Read in the contents. */
616 if (saved_string != 0)
617 free (saved_string);
618 saved_string = (char *) malloc (length);
619 for (i = 0; i < length; i++)
620 saved_string[i] = getc (infile);
621 /* The last character is a ^_.
622 That is needed in the .elc file
623 but it is redundant in DOC. So get rid of it here. */
624 saved_string[length - 1] = 0;
625 /* Skip the newline. */
626 c = getc (infile);
627 while (c != '\n')
628 c = getc (infile);
629 }
630 continue;
631 }
632
633 if (c != '(')
634 continue;
635
636 read_lisp_symbol (infile, buffer);
637
638 if (! strcmp (buffer, "defun") ||
639 ! strcmp (buffer, "defmacro"))
640 {
641 type = 'F';
642 read_lisp_symbol (infile, buffer);
643
644 /* Skip the arguments: either "nil" or a list in parens */
645
646 c = getc (infile);
647 if (c == 'n') /* nil */
648 {
649 if ((c = getc (infile)) != 'i' ||
650 (c = getc (infile)) != 'l')
651 {
652 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
653 buffer, filename);
654 continue;
655 }
656 }
657 else if (c != '(')
658 {
659 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
660 buffer, filename);
661 continue;
662 }
663 else
664 while (c != ')')
665 c = getc (infile);
666 skip_white (infile);
667
668 /* If the next three characters aren't `dquote bslash newline'
669 then we're not reading a docstring.
670 */
671 if ((c = getc (infile)) != '"' ||
672 (c = getc (infile)) != '\\' ||
673 (c = getc (infile)) != '\n')
674 {
675 #ifdef DEBUG
676 fprintf (stderr, "## non-docstring in %s (%s)\n",
677 buffer, filename);
678 #endif
679 continue;
680 }
681 }
682
683 else if (! strcmp (buffer, "defvar") ||
684 ! strcmp (buffer, "defconst"))
685 {
686 char c1 = 0, c2 = 0;
687 type = 'V';
688 read_lisp_symbol (infile, buffer);
689
690 if (saved_string == 0)
691 {
692
693 /* Skip until the first newline; remember the two previous chars. */
694 while (c != '\n' && c >= 0)
695 {
696 c2 = c1;
697 c1 = c;
698 c = getc (infile);
699 }
700
701 /* If two previous characters were " and \,
702 this is a doc string. Otherwise, there is none. */
703 if (c2 != '"' || c1 != '\\')
704 {
705 #ifdef DEBUG
706 fprintf (stderr, "## non-docstring in %s (%s)\n",
707 buffer, filename);
708 #endif
709 continue;
710 }
711 }
712 }
713
714 else if (! strcmp (buffer, "fset") || ! strcmp (buffer, "defalias"))
715 {
716 char c1 = 0, c2 = 0;
717 type = 'F';
718
719 c = getc (infile);
720 if (c == '\'')
721 read_lisp_symbol (infile, buffer);
722 else
723 {
724 if (c != '(')
725 {
726 fprintf (stderr, "## unparsable name in fset in %s\n",
727 filename);
728 continue;
729 }
730 read_lisp_symbol (infile, buffer);
731 if (strcmp (buffer, "quote"))
732 {
733 fprintf (stderr, "## unparsable name in fset in %s\n",
734 filename);
735 continue;
736 }
737 read_lisp_symbol (infile, buffer);
738 c = getc (infile);
739 if (c != ')')
740 {
741 fprintf (stderr,
742 "## unparsable quoted name in fset in %s\n",
743 filename);
744 continue;
745 }
746 }
747
748 if (saved_string == 0)
749 {
750 /* Skip until the first newline; remember the two previous chars. */
751 while (c != '\n' && c >= 0)
752 {
753 c2 = c1;
754 c1 = c;
755 c = getc (infile);
756 }
757
758 /* If two previous characters were " and \,
759 this is a doc string. Otherwise, there is none. */
760 if (c2 != '"' || c1 != '\\')
761 {
762 #ifdef DEBUG
763 fprintf (stderr, "## non-docstring in %s (%s)\n",
764 buffer, filename);
765 #endif
766 continue;
767 }
768 }
769 }
770
771 else if (! strcmp (buffer, "autoload"))
772 {
773 type = 'F';
774 c = getc (infile);
775 if (c == '\'')
776 read_lisp_symbol (infile, buffer);
777 else
778 {
779 if (c != '(')
780 {
781 fprintf (stderr, "## unparsable name in autoload in %s\n",
782 filename);
783 continue;
784 }
785 read_lisp_symbol (infile, buffer);
786 if (strcmp (buffer, "quote"))
787 {
788 fprintf (stderr, "## unparsable name in autoload in %s\n",
789 filename);
790 continue;
791 }
792 read_lisp_symbol (infile, buffer);
793 c = getc (infile);
794 if (c != ')')
795 {
796 fprintf (stderr,
797 "## unparsable quoted name in autoload in %s\n",
798 filename);
799 continue;
800 }
801 }
802 skip_white (infile);
803 if ((c = getc (infile)) != '\"')
804 {
805 fprintf (stderr, "## autoload of %s unparsable (%s)\n",
806 buffer, filename);
807 continue;
808 }
809 read_c_string (infile, 0);
810 skip_white (infile);
811
812 if (saved_string == 0)
813 {
814 /* If the next three characters aren't `dquote bslash newline'
815 then we're not reading a docstring. */
816 if ((c = getc (infile)) != '"' ||
817 (c = getc (infile)) != '\\' ||
818 (c = getc (infile)) != '\n')
819 {
820 #ifdef DEBUG
821 fprintf (stderr, "## non-docstring in %s (%s)\n",
822 buffer, filename);
823 #endif
824 continue;
825 }
826 }
827 }
828
829 #ifdef DEBUG
830 else if (! strcmp (buffer, "if") ||
831 ! strcmp (buffer, "byte-code"))
832 ;
833 #endif
834
835 else
836 {
837 #ifdef DEBUG
838 fprintf (stderr, "## unrecognised top-level form, %s (%s)\n",
839 buffer, filename);
840 #endif
841 continue;
842 }
843
844 /* At this point, we should either use the previous
845 dynamic doc string in saved_string
846 or gobble a doc string from the input file.
847
848 In the latter case, the opening quote (and leading
849 backslash-newline) have already been read. */
850
851 putc (037, outfile);
852 putc (type, outfile);
853 fprintf (outfile, "%s\n", buffer);
854 if (saved_string)
855 {
856 fputs (saved_string, outfile);
857 /* Don't use one dynamic doc string twice. */
858 free (saved_string);
859 saved_string = 0;
860 }
861 else
862 read_c_string (infile, 1);
863 }
864 fclose (infile);
865 return 0;
866 }