(scan_file): Make sure it never looks at filename[-1].
[bpt/emacs.git] / lib-src / make-docfile.c
1 /* Generate doc-string file for GNU Emacs from source files.
2 Copyright (C) 1985, 1986, 1992, 1993, 1994 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20 /* The arguments given to this program are all the C and Lisp source files
21 of GNU Emacs. .elc and .el and .c files are allowed.
22 A .o file can also be specified; the .c file it was made from is used.
23 This helps the makefile pass the correct list of files.
24
25 The results, which go to standard output or to a file
26 specified with -a or -o (-a to append, -o to start from nothing),
27 are entries containing function or variable names and their documentation.
28 Each entry starts with a ^_ character.
29 Then comes F for a function or V for a variable.
30 Then comes the function or variable name, terminated with a newline.
31 Then comes the documentation for that function or variable.
32 */
33
34 #define NO_SHORTNAMES /* Tell config not to load remap.h */
35 #include <../src/config.h>
36
37 #include <stdio.h>
38 #ifdef MSDOS
39 #include <fcntl.h>
40 #endif /* MSDOS */
41 #ifdef WINDOWSNT
42 #include <stdlib.h>
43 #include <fcntl.h>
44 #include <direct.h>
45 #endif /* WINDOWSNT */
46
47 #ifdef DOS_NT
48 #define READ_TEXT "rt"
49 #define READ_BINARY "rb"
50 #else /* not DOS_NT */
51 #define READ_TEXT "r"
52 #define READ_BINARY "r"
53 #endif /* not DOS_NT */
54
55 int scan_file ();
56 int scan_lisp_file ();
57 int scan_c_file ();
58
59 #ifdef MSDOS
60 /* s/msdos.h defines this as sys_chdir, but we're not linking with the
61 file where that function is defined. */
62 #undef chdir
63 #endif
64
65 /* Stdio stream for output to the DOC file. */
66 FILE *outfile;
67
68 /* Name this program was invoked with. */
69 char *progname;
70
71 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
72
73 /* VARARGS1 */
74 void
75 error (s1, s2)
76 char *s1, *s2;
77 {
78 fprintf (stderr, "%s: ", progname);
79 fprintf (stderr, s1, s2);
80 fprintf (stderr, "\n");
81 }
82
83 /* Print error message and exit. */
84
85 /* VARARGS1 */
86 void
87 fatal (s1, s2)
88 char *s1, *s2;
89 {
90 error (s1, s2);
91 exit (1);
92 }
93
94 /* Like malloc but get fatal error if memory is exhausted. */
95
96 long *
97 xmalloc (size)
98 unsigned int size;
99 {
100 long *result = (long *) malloc (size);
101 if (result == NULL)
102 fatal ("virtual memory exhausted", 0);
103 return result;
104 }
105 \f
106 int
107 main (argc, argv)
108 int argc;
109 char **argv;
110 {
111 int i;
112 int err_count = 0;
113 int first_infile;
114
115 progname = argv[0];
116
117 outfile = stdout;
118
119 /* Don't put CRs in the DOC file. */
120 #ifdef MSDOS
121 _fmode = O_BINARY;
122 #if 0 /* Suspicion is that this causes hanging.
123 So instead we require people to use -o on MSDOS. */
124 (stdout)->_flag &= ~_IOTEXT;
125 _setmode (fileno (stdout), O_BINARY);
126 #endif
127 outfile = 0;
128 #endif /* MSDOS */
129 #ifdef WINDOWSNT
130 _fmode = O_BINARY;
131 _setmode (fileno (stdout), O_BINARY);
132 #endif /* WINDOWSNT */
133
134 /* If first two args are -o FILE, output to FILE. */
135 i = 1;
136 if (argc > i + 1 && !strcmp (argv[i], "-o"))
137 {
138 outfile = fopen (argv[i + 1], "w");
139 i += 2;
140 }
141 if (argc > i + 1 && !strcmp (argv[i], "-a"))
142 {
143 outfile = fopen (argv[i + 1], "a");
144 i += 2;
145 }
146 if (argc > i + 1 && !strcmp (argv[i], "-d"))
147 {
148 chdir (argv[i + 1]);
149 i += 2;
150 }
151
152 if (outfile == 0)
153 fatal ("No output file specified", "");
154
155 first_infile = i;
156 for (; i < argc; i++)
157 {
158 int j;
159 /* Don't process one file twice. */
160 for (j = first_infile; j < i; j++)
161 if (! strcmp (argv[i], argv[j]))
162 break;
163 if (j == i)
164 err_count += scan_file (argv[i]);
165 }
166 #ifndef VMS
167 exit (err_count > 0);
168 #endif /* VMS */
169 return err_count > 0;
170 }
171
172 /* Read file FILENAME and output its doc strings to outfile. */
173 /* Return 1 if file is not found, 0 if it is found. */
174
175 int
176 scan_file (filename)
177 char *filename;
178 {
179 int len = strlen (filename);
180 if (len > 4 && !strcmp (filename + len - 4, ".elc"))
181 return scan_lisp_file (filename, READ_BINARY);
182 else if (len > 3 && !strcmp (filename + len - 3, ".el"))
183 return scan_lisp_file (filename, READ_TEXT);
184 else
185 return scan_c_file (filename, READ_TEXT);
186 }
187 \f
188 char buf[128];
189
190 /* Skip a C string from INFILE,
191 and return the character that follows the closing ".
192 If printflag is positive, output string contents to outfile.
193 If it is negative, store contents in buf.
194 Convert escape sequences \n and \t to newline and tab;
195 discard \ followed by newline. */
196
197 int
198 read_c_string (infile, printflag)
199 FILE *infile;
200 int printflag;
201 {
202 register int c;
203 char *p = buf;
204
205 c = getc (infile);
206 while (c != EOF)
207 {
208 while (c != '"' && c != EOF)
209 {
210 if (c == '\\')
211 {
212 c = getc (infile);
213 if (c == '\n')
214 {
215 c = getc (infile);
216 continue;
217 }
218 if (c == 'n')
219 c = '\n';
220 if (c == 't')
221 c = '\t';
222 }
223 if (printflag > 0)
224 putc (c, outfile);
225 else if (printflag < 0)
226 *p++ = c;
227 c = getc (infile);
228 }
229 c = getc (infile);
230 if (c != '"')
231 break;
232 /* If we had a "", concatenate the two strings. */
233 c = getc (infile);
234 }
235
236 if (printflag < 0)
237 *p = 0;
238
239 return c;
240 }
241 \f
242 /* Write to file OUT the argument names of function FUNC, whose text is in BUF.
243 MINARGS and MAXARGS are the minimum and maximum number of arguments. */
244
245 void
246 write_c_args (out, func, buf, minargs, maxargs)
247 FILE *out;
248 char *func, *buf;
249 int minargs, maxargs;
250 {
251 register char *p;
252 int in_ident = 0;
253 int just_spaced = 0;
254 int need_space = 1;
255
256 fprintf (out, "(%s", func);
257
258 if (*buf == '(')
259 ++buf;
260
261 for (p = buf; *p; p++)
262 {
263 char c = *p;
264 int ident_start = 0;
265
266 /* Notice when we start printing a new identifier. */
267 if ((('A' <= c && c <= 'Z')
268 || ('a' <= c && c <= 'z')
269 || ('0' <= c && c <= '9')
270 || c == '_')
271 != in_ident)
272 {
273 if (!in_ident)
274 {
275 in_ident = 1;
276 ident_start = 1;
277
278 if (need_space)
279 putc (' ', out);
280
281 if (minargs == 0 && maxargs > 0)
282 fprintf (out, "&optional ");
283 just_spaced = 1;
284
285 minargs--;
286 maxargs--;
287 }
288 else
289 in_ident = 0;
290 }
291
292 /* Print the C argument list as it would appear in lisp:
293 print underscores as hyphens, and print commas as spaces.
294 Collapse adjacent spaces into one. */
295 if (c == '_') c = '-';
296 if (c == ',') c = ' ';
297
298 /* In C code, `default' is a reserved word, so we spell it
299 `defalt'; unmangle that here. */
300 if (ident_start
301 && strncmp (p, "defalt", 6) == 0
302 && ! (('A' <= p[6] && p[6] <= 'Z')
303 || ('a' <= p[6] && p[6] <= 'z')
304 || ('0' <= p[6] && p[6] <= '9')
305 || p[6] == '_'))
306 {
307 fprintf (out, "DEFAULT");
308 p += 5;
309 in_ident = 0;
310 just_spaced = 0;
311 }
312 else if (c != ' ' || ! just_spaced)
313 {
314 if (c >= 'a' && c <= 'z')
315 /* Upcase the letter. */
316 c += 'A' - 'a';
317 putc (c, out);
318 }
319
320 just_spaced = (c == ' ');
321 need_space = 0;
322 }
323 }
324 \f
325 /* Read through a c file. If a .o file is named,
326 the corresponding .c file is read instead.
327 Looks for DEFUN constructs such as are defined in ../src/lisp.h.
328 Accepts any word starting DEF... so it finds DEFSIMPLE and DEFPRED. */
329
330 int
331 scan_c_file (filename, mode)
332 char *filename, *mode;
333 {
334 FILE *infile;
335 register int c;
336 register int commas;
337 register int defunflag;
338 register int defvarperbufferflag;
339 register int defvarflag;
340 int minargs, maxargs;
341 int extension = filename[strlen (filename) - 1];
342
343 if (extension == 'o')
344 filename[strlen (filename) - 1] = 'c';
345
346 infile = fopen (filename, mode);
347
348 /* No error if non-ex input file */
349 if (infile == NULL)
350 {
351 perror (filename);
352 return 0;
353 }
354
355 /* Reset extension to be able to detect duplicate files. */
356 filename[strlen (filename) - 1] = extension;
357
358 c = '\n';
359 while (!feof (infile))
360 {
361 if (c != '\n')
362 {
363 c = getc (infile);
364 continue;
365 }
366 c = getc (infile);
367 if (c == ' ')
368 {
369 while (c == ' ')
370 c = getc (infile);
371 if (c != 'D')
372 continue;
373 c = getc (infile);
374 if (c != 'E')
375 continue;
376 c = getc (infile);
377 if (c != 'F')
378 continue;
379 c = getc (infile);
380 if (c != 'V')
381 continue;
382 c = getc (infile);
383 if (c != 'A')
384 continue;
385 c = getc (infile);
386 if (c != 'R')
387 continue;
388 c = getc (infile);
389 if (c != '_')
390 continue;
391
392 defvarflag = 1;
393 defunflag = 0;
394
395 c = getc (infile);
396 defvarperbufferflag = (c == 'P');
397
398 c = getc (infile);
399 }
400 else if (c == 'D')
401 {
402 c = getc (infile);
403 if (c != 'E')
404 continue;
405 c = getc (infile);
406 if (c != 'F')
407 continue;
408 c = getc (infile);
409 defunflag = c == 'U';
410 defvarflag = 0;
411 }
412 else continue;
413
414 while (c != '(')
415 {
416 if (c < 0)
417 goto eof;
418 c = getc (infile);
419 }
420
421 c = getc (infile);
422 if (c != '"')
423 continue;
424 c = read_c_string (infile, -1);
425
426 if (defunflag)
427 commas = 5;
428 else if (defvarperbufferflag)
429 commas = 2;
430 else if (defvarflag)
431 commas = 1;
432 else /* For DEFSIMPLE and DEFPRED */
433 commas = 2;
434
435 while (commas)
436 {
437 if (c == ',')
438 {
439 commas--;
440 if (defunflag && (commas == 1 || commas == 2))
441 {
442 do
443 c = getc (infile);
444 while (c == ' ' || c == '\n' || c == '\t');
445 if (c < 0)
446 goto eof;
447 ungetc (c, infile);
448 if (commas == 2) /* pick up minargs */
449 fscanf (infile, "%d", &minargs);
450 else /* pick up maxargs */
451 if (c == 'M' || c == 'U') /* MANY || UNEVALLED */
452 maxargs = -1;
453 else
454 fscanf (infile, "%d", &maxargs);
455 }
456 }
457 if (c < 0)
458 goto eof;
459 c = getc (infile);
460 }
461 while (c == ' ' || c == '\n' || c == '\t')
462 c = getc (infile);
463 if (c == '"')
464 c = read_c_string (infile, 0);
465 while (c != ',')
466 c = getc (infile);
467 c = getc (infile);
468 while (c == ' ' || c == '\n' || c == '\t')
469 c = getc (infile);
470
471 if (c == '"')
472 {
473 putc (037, outfile);
474 putc (defvarflag ? 'V' : 'F', outfile);
475 fprintf (outfile, "%s\n", buf);
476 c = read_c_string (infile, 1);
477
478 /* If this is a defun, find the arguments and print them. If
479 this function takes MANY or UNEVALLED args, then the C source
480 won't give the names of the arguments, so we shouldn't bother
481 trying to find them. */
482 if (defunflag && maxargs != -1)
483 {
484 char argbuf[1024], *p = argbuf;
485 while (c != ')')
486 {
487 if (c < 0)
488 goto eof;
489 c = getc (infile);
490 }
491 /* Skip into arguments. */
492 while (c != '(')
493 {
494 if (c < 0)
495 goto eof;
496 c = getc (infile);
497 }
498 /* Copy arguments into ARGBUF. */
499 *p++ = c;
500 do
501 *p++ = c = getc (infile);
502 while (c != ')');
503 *p = '\0';
504 /* Output them. */
505 fprintf (outfile, "\n\n");
506 write_c_args (outfile, buf, argbuf, minargs, maxargs);
507 }
508 }
509 }
510 eof:
511 fclose (infile);
512 return 0;
513 }
514 \f
515 /* Read a file of Lisp code, compiled or interpreted.
516 Looks for
517 (defun NAME ARGS DOCSTRING ...)
518 (defmacro NAME ARGS DOCSTRING ...)
519 (autoload (quote NAME) FILE DOCSTRING ...)
520 (defvar NAME VALUE DOCSTRING)
521 (defconst NAME VALUE DOCSTRING)
522 (fset (quote NAME) (make-byte-code ... DOCSTRING ...))
523 (fset (quote NAME) #[... DOCSTRING ...])
524 (defalias (quote NAME) #[... DOCSTRING ...])
525 starting in column zero.
526 (quote NAME) may appear as 'NAME as well.
527
528 We also look for #@LENGTH CONTENTS^_ at the beginning of the line.
529 When we find that, we save it for the following defining-form,
530 and we use that instead of reading a doc string within that defining-form.
531
532 For defun, defmacro, and autoload, we know how to skip over the arglist.
533 For defvar, defconst, and fset we skip to the docstring with a kludgy
534 formatting convention: all docstrings must appear on the same line as the
535 initial open-paren (the one in column zero) and must contain a backslash
536 and a double-quote immediately after the initial double-quote. No newlines
537 must appear between the beginning of the form and the first double-quote.
538 The only source file that must follow this convention is loaddefs.el; aside
539 from that, it is always the .elc file that we look at, and they are no
540 problem because byte-compiler output follows this convention.
541 The NAME and DOCSTRING are output.
542 NAME is preceded by `F' for a function or `V' for a variable.
543 An entry is output only if DOCSTRING has \ newline just after the opening "
544 */
545
546 void
547 skip_white (infile)
548 FILE *infile;
549 {
550 char c = ' ';
551 while (c == ' ' || c == '\t' || c == '\n')
552 c = getc (infile);
553 ungetc (c, infile);
554 }
555
556 void
557 read_lisp_symbol (infile, buffer)
558 FILE *infile;
559 char *buffer;
560 {
561 char c;
562 char *fillp = buffer;
563
564 skip_white (infile);
565 while (1)
566 {
567 c = getc (infile);
568 if (c == '\\')
569 *(++fillp) = getc (infile);
570 else if (c == ' ' || c == '\t' || c == '\n' || c == '(' || c == ')')
571 {
572 ungetc (c, infile);
573 *fillp = 0;
574 break;
575 }
576 else
577 *fillp++ = c;
578 }
579
580 if (! buffer[0])
581 fprintf (stderr, "## expected a symbol, got '%c'\n", c);
582
583 skip_white (infile);
584 }
585
586 int
587 scan_lisp_file (filename, mode)
588 char *filename, *mode;
589 {
590 FILE *infile;
591 register int c;
592 char *saved_string = 0;
593
594 infile = fopen (filename, mode);
595 if (infile == NULL)
596 {
597 perror (filename);
598 return 0; /* No error */
599 }
600
601 c = '\n';
602 while (!feof (infile))
603 {
604 char buffer[BUFSIZ];
605 char type;
606
607 if (c != '\n')
608 {
609 c = getc (infile);
610 continue;
611 }
612 c = getc (infile);
613 /* Detect a dynamic doc string and save it for the next expression. */
614 if (c == '#')
615 {
616 c = getc (infile);
617 if (c == '@')
618 {
619 int length = 0;
620 int i;
621
622 /* Read the length. */
623 while ((c = getc (infile),
624 c >= '0' && c <= '9'))
625 {
626 length *= 10;
627 length += c - '0';
628 }
629
630 /* The next character is a space that is counted in the length
631 but not part of the doc string.
632 We already read it, so just ignore it. */
633 length--;
634
635 /* Read in the contents. */
636 if (saved_string != 0)
637 free (saved_string);
638 saved_string = (char *) malloc (length);
639 for (i = 0; i < length; i++)
640 saved_string[i] = getc (infile);
641 /* The last character is a ^_.
642 That is needed in the .elc file
643 but it is redundant in DOC. So get rid of it here. */
644 saved_string[length - 1] = 0;
645 /* Skip the newline. */
646 c = getc (infile);
647 while (c != '\n')
648 c = getc (infile);
649 }
650 continue;
651 }
652
653 if (c != '(')
654 continue;
655
656 read_lisp_symbol (infile, buffer);
657
658 if (! strcmp (buffer, "defun") ||
659 ! strcmp (buffer, "defmacro"))
660 {
661 type = 'F';
662 read_lisp_symbol (infile, buffer);
663
664 /* Skip the arguments: either "nil" or a list in parens */
665
666 c = getc (infile);
667 if (c == 'n') /* nil */
668 {
669 if ((c = getc (infile)) != 'i' ||
670 (c = getc (infile)) != 'l')
671 {
672 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
673 buffer, filename);
674 continue;
675 }
676 }
677 else if (c != '(')
678 {
679 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
680 buffer, filename);
681 continue;
682 }
683 else
684 while (c != ')')
685 c = getc (infile);
686 skip_white (infile);
687
688 /* If the next three characters aren't `dquote bslash newline'
689 then we're not reading a docstring.
690 */
691 if ((c = getc (infile)) != '"' ||
692 (c = getc (infile)) != '\\' ||
693 (c = getc (infile)) != '\n')
694 {
695 #ifdef DEBUG
696 fprintf (stderr, "## non-docstring in %s (%s)\n",
697 buffer, filename);
698 #endif
699 continue;
700 }
701 }
702
703 else if (! strcmp (buffer, "defvar") ||
704 ! strcmp (buffer, "defconst"))
705 {
706 char c1 = 0, c2 = 0;
707 type = 'V';
708 read_lisp_symbol (infile, buffer);
709
710 if (saved_string == 0)
711 {
712
713 /* Skip until the first newline; remember the two previous chars. */
714 while (c != '\n' && c >= 0)
715 {
716 c2 = c1;
717 c1 = c;
718 c = getc (infile);
719 }
720
721 /* If two previous characters were " and \,
722 this is a doc string. Otherwise, there is none. */
723 if (c2 != '"' || c1 != '\\')
724 {
725 #ifdef DEBUG
726 fprintf (stderr, "## non-docstring in %s (%s)\n",
727 buffer, filename);
728 #endif
729 continue;
730 }
731 }
732 }
733
734 else if (! strcmp (buffer, "fset") || ! strcmp (buffer, "defalias"))
735 {
736 char c1 = 0, c2 = 0;
737 type = 'F';
738
739 c = getc (infile);
740 if (c == '\'')
741 read_lisp_symbol (infile, buffer);
742 else
743 {
744 if (c != '(')
745 {
746 fprintf (stderr, "## unparsable name in fset in %s\n",
747 filename);
748 continue;
749 }
750 read_lisp_symbol (infile, buffer);
751 if (strcmp (buffer, "quote"))
752 {
753 fprintf (stderr, "## unparsable name in fset in %s\n",
754 filename);
755 continue;
756 }
757 read_lisp_symbol (infile, buffer);
758 c = getc (infile);
759 if (c != ')')
760 {
761 fprintf (stderr,
762 "## unparsable quoted name in fset in %s\n",
763 filename);
764 continue;
765 }
766 }
767
768 if (saved_string == 0)
769 {
770 /* Skip until the first newline; remember the two previous chars. */
771 while (c != '\n' && c >= 0)
772 {
773 c2 = c1;
774 c1 = c;
775 c = getc (infile);
776 }
777
778 /* If two previous characters were " and \,
779 this is a doc string. Otherwise, there is none. */
780 if (c2 != '"' || c1 != '\\')
781 {
782 #ifdef DEBUG
783 fprintf (stderr, "## non-docstring in %s (%s)\n",
784 buffer, filename);
785 #endif
786 continue;
787 }
788 }
789 }
790
791 else if (! strcmp (buffer, "autoload"))
792 {
793 type = 'F';
794 c = getc (infile);
795 if (c == '\'')
796 read_lisp_symbol (infile, buffer);
797 else
798 {
799 if (c != '(')
800 {
801 fprintf (stderr, "## unparsable name in autoload in %s\n",
802 filename);
803 continue;
804 }
805 read_lisp_symbol (infile, buffer);
806 if (strcmp (buffer, "quote"))
807 {
808 fprintf (stderr, "## unparsable name in autoload in %s\n",
809 filename);
810 continue;
811 }
812 read_lisp_symbol (infile, buffer);
813 c = getc (infile);
814 if (c != ')')
815 {
816 fprintf (stderr,
817 "## unparsable quoted name in autoload in %s\n",
818 filename);
819 continue;
820 }
821 }
822 skip_white (infile);
823 if ((c = getc (infile)) != '\"')
824 {
825 fprintf (stderr, "## autoload of %s unparsable (%s)\n",
826 buffer, filename);
827 continue;
828 }
829 read_c_string (infile, 0);
830 skip_white (infile);
831
832 if (saved_string == 0)
833 {
834 /* If the next three characters aren't `dquote bslash newline'
835 then we're not reading a docstring. */
836 if ((c = getc (infile)) != '"' ||
837 (c = getc (infile)) != '\\' ||
838 (c = getc (infile)) != '\n')
839 {
840 #ifdef DEBUG
841 fprintf (stderr, "## non-docstring in %s (%s)\n",
842 buffer, filename);
843 #endif
844 continue;
845 }
846 }
847 }
848
849 #ifdef DEBUG
850 else if (! strcmp (buffer, "if") ||
851 ! strcmp (buffer, "byte-code"))
852 ;
853 #endif
854
855 else
856 {
857 #ifdef DEBUG
858 fprintf (stderr, "## unrecognised top-level form, %s (%s)\n",
859 buffer, filename);
860 #endif
861 continue;
862 }
863
864 /* At this point, we should either use the previous
865 dynamic doc string in saved_string
866 or gobble a doc string from the input file.
867
868 In the latter case, the opening quote (and leading
869 backslash-newline) have already been read. */
870
871 putc (037, outfile);
872 putc (type, outfile);
873 fprintf (outfile, "%s\n", buffer);
874 if (saved_string)
875 {
876 fputs (saved_string, outfile);
877 /* Don't use one dynamic doc string twice. */
878 free (saved_string);
879 saved_string = 0;
880 }
881 else
882 read_c_string (infile, 1);
883 }
884 fclose (infile);
885 return 0;
886 }