* make-docfile.c (search_lisp_doc_at_eol): Unget last read
[bpt/emacs.git] / lib-src / make-docfile.c
1 /* Generate doc-string file for GNU Emacs from source files.
2 Copyright (C) 1985-1986, 1992-1994, 1997, 1999-2012
3 Free Software Foundation, Inc.
4
5 This file is part of GNU Emacs.
6
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
19
20
21 /* The arguments given to this program are all the C and Lisp source files
22 of GNU Emacs. .elc and .el and .c files are allowed.
23 A .o file can also be specified; the .c file it was made from is used.
24 This helps the makefile pass the correct list of files.
25 Option -d DIR means change to DIR before looking for files.
26
27 The results, which go to standard output or to a file
28 specified with -a or -o (-a to append, -o to start from nothing),
29 are entries containing function or variable names and their documentation.
30 Each entry starts with a ^_ character.
31 Then comes F for a function or V for a variable.
32 Then comes the function or variable name, terminated with a newline.
33 Then comes the documentation for that function or variable.
34 */
35
36 #include <config.h>
37
38 /* Defined to be emacs_main, sys_fopen, etc. in config.h. */
39 #undef main
40 #undef fopen
41 #undef chdir
42
43 #include <stdio.h>
44 #include <stdlib.h>
45 #ifdef MSDOS
46 #include <fcntl.h>
47 #endif /* MSDOS */
48 #ifdef WINDOWSNT
49 #include <fcntl.h>
50 #include <direct.h>
51 #endif /* WINDOWSNT */
52
53 #ifdef DOS_NT
54 #define READ_TEXT "rt"
55 #define READ_BINARY "rb"
56 #else /* not DOS_NT */
57 #define READ_TEXT "r"
58 #define READ_BINARY "r"
59 #endif /* not DOS_NT */
60
61 #ifndef DIRECTORY_SEP
62 #define DIRECTORY_SEP '/'
63 #endif
64
65 #ifndef IS_DIRECTORY_SEP
66 #define IS_DIRECTORY_SEP(_c_) ((_c_) == DIRECTORY_SEP)
67 #endif
68
69 /* Use this to suppress gcc's `...may be used before initialized' warnings. */
70 #ifdef lint
71 # define IF_LINT(Code) Code
72 #else
73 # define IF_LINT(Code) /* empty */
74 #endif
75
76 static int scan_file (char *filename);
77 static int scan_lisp_file (const char *filename, const char *mode);
78 static int scan_c_file (char *filename, const char *mode);
79 static void fatal (const char *s1, const char *s2) NO_RETURN;
80 static void start_globals (void);
81 static void write_globals (void);
82
83 #ifdef MSDOS
84 /* s/msdos.h defines this as sys_chdir, but we're not linking with the
85 file where that function is defined. */
86 #undef chdir
87 #endif
88
89 #include <unistd.h>
90
91 /* Stdio stream for output to the DOC file. */
92 FILE *outfile;
93
94 /* Name this program was invoked with. */
95 char *progname;
96
97 /* Nonzero if this invocation is generating globals.h. */
98 int generate_globals;
99
100 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
101
102 /* VARARGS1 */
103 static void
104 error (const char *s1, const char *s2)
105 {
106 fprintf (stderr, "%s: ", progname);
107 fprintf (stderr, s1, s2);
108 fprintf (stderr, "\n");
109 }
110
111 /* Print error message and exit. */
112
113 /* VARARGS1 */
114 static void
115 fatal (const char *s1, const char *s2)
116 {
117 error (s1, s2);
118 exit (EXIT_FAILURE);
119 }
120
121 /* Like malloc but get fatal error if memory is exhausted. */
122
123 static void *
124 xmalloc (unsigned int size)
125 {
126 void *result = (void *) malloc (size);
127 if (result == NULL)
128 fatal ("virtual memory exhausted", 0);
129 return result;
130 }
131
132 /* Like realloc but get fatal error if memory is exhausted. */
133
134 static void *
135 xrealloc (void *arg, unsigned int size)
136 {
137 void *result = (void *) realloc (arg, size);
138 if (result == NULL)
139 fatal ("virtual memory exhausted", 0);
140 return result;
141 }
142
143 \f
144 int
145 main (int argc, char **argv)
146 {
147 int i;
148 int err_count = 0;
149 int first_infile;
150
151 progname = argv[0];
152
153 outfile = stdout;
154
155 /* Don't put CRs in the DOC file. */
156 #ifdef MSDOS
157 _fmode = O_BINARY;
158 #if 0 /* Suspicion is that this causes hanging.
159 So instead we require people to use -o on MSDOS. */
160 (stdout)->_flag &= ~_IOTEXT;
161 _setmode (fileno (stdout), O_BINARY);
162 #endif
163 outfile = 0;
164 #endif /* MSDOS */
165 #ifdef WINDOWSNT
166 _fmode = O_BINARY;
167 _setmode (fileno (stdout), O_BINARY);
168 #endif /* WINDOWSNT */
169
170 /* If first two args are -o FILE, output to FILE. */
171 i = 1;
172 if (argc > i + 1 && !strcmp (argv[i], "-o"))
173 {
174 outfile = fopen (argv[i + 1], "w");
175 i += 2;
176 }
177 if (argc > i + 1 && !strcmp (argv[i], "-a"))
178 {
179 outfile = fopen (argv[i + 1], "a");
180 i += 2;
181 }
182 if (argc > i + 1 && !strcmp (argv[i], "-d"))
183 {
184 if (chdir (argv[i + 1]) != 0)
185 {
186 perror (argv[i + 1]);
187 return EXIT_FAILURE;
188 }
189 i += 2;
190 }
191 if (argc > i && !strcmp (argv[i], "-g"))
192 {
193 generate_globals = 1;
194 ++i;
195 }
196
197 if (outfile == 0)
198 fatal ("No output file specified", "");
199
200 if (generate_globals)
201 start_globals ();
202
203 first_infile = i;
204 for (; i < argc; i++)
205 {
206 int j;
207 /* Don't process one file twice. */
208 for (j = first_infile; j < i; j++)
209 if (! strcmp (argv[i], argv[j]))
210 break;
211 if (j == i)
212 err_count += scan_file (argv[i]);
213 }
214
215 if (err_count == 0 && generate_globals)
216 write_globals ();
217
218 return (err_count > 0 ? EXIT_FAILURE : EXIT_SUCCESS);
219 }
220
221 /* Add a source file name boundary marker in the output file. */
222 static void
223 put_filename (char *filename)
224 {
225 char *tmp;
226
227 for (tmp = filename; *tmp; tmp++)
228 {
229 if (IS_DIRECTORY_SEP (*tmp))
230 filename = tmp + 1;
231 }
232
233 putc (037, outfile);
234 putc ('S', outfile);
235 fprintf (outfile, "%s\n", filename);
236 }
237
238 /* Read file FILENAME and output its doc strings to outfile. */
239 /* Return 1 if file is not found, 0 if it is found. */
240
241 static int
242 scan_file (char *filename)
243 {
244
245 size_t len = strlen (filename);
246
247 if (!generate_globals)
248 put_filename (filename);
249 if (len > 4 && !strcmp (filename + len - 4, ".elc"))
250 return scan_lisp_file (filename, READ_BINARY);
251 else if (len > 3 && !strcmp (filename + len - 3, ".el"))
252 return scan_lisp_file (filename, READ_TEXT);
253 else
254 return scan_c_file (filename, READ_TEXT);
255 }
256
257 static void
258 start_globals (void)
259 {
260 fprintf (outfile, "/* This file was auto-generated by make-docfile. */\n");
261 fprintf (outfile, "/* DO NOT EDIT. */\n");
262 fprintf (outfile, "struct emacs_globals {\n");
263 }
264 \f
265 static char input_buffer[128];
266
267 /* Some state during the execution of `read_c_string_or_comment'. */
268 struct rcsoc_state
269 {
270 /* A count of spaces and newlines that have been read, but not output. */
271 unsigned pending_spaces, pending_newlines;
272
273 /* Where we're reading from. */
274 FILE *in_file;
275
276 /* If non-zero, a buffer into which to copy characters. */
277 char *buf_ptr;
278 /* If non-zero, a file into which to copy characters. */
279 FILE *out_file;
280
281 /* A keyword we look for at the beginning of lines. If found, it is
282 not copied, and SAW_KEYWORD is set to true. */
283 const char *keyword;
284 /* The current point we've reached in an occurrence of KEYWORD in
285 the input stream. */
286 const char *cur_keyword_ptr;
287 /* Set to true if we saw an occurrence of KEYWORD. */
288 int saw_keyword;
289 };
290
291 /* Output CH to the file or buffer in STATE. Any pending newlines or
292 spaces are output first. */
293
294 static inline void
295 put_char (int ch, struct rcsoc_state *state)
296 {
297 int out_ch;
298 do
299 {
300 if (state->pending_newlines > 0)
301 {
302 state->pending_newlines--;
303 out_ch = '\n';
304 }
305 else if (state->pending_spaces > 0)
306 {
307 state->pending_spaces--;
308 out_ch = ' ';
309 }
310 else
311 out_ch = ch;
312
313 if (state->out_file)
314 putc (out_ch, state->out_file);
315 if (state->buf_ptr)
316 *state->buf_ptr++ = out_ch;
317 }
318 while (out_ch != ch);
319 }
320
321 /* If in the middle of scanning a keyword, continue scanning with
322 character CH, otherwise output CH to the file or buffer in STATE.
323 Any pending newlines or spaces are output first, as well as any
324 previously scanned characters that were thought to be part of a
325 keyword, but were in fact not. */
326
327 static void
328 scan_keyword_or_put_char (int ch, struct rcsoc_state *state)
329 {
330 if (state->keyword
331 && *state->cur_keyword_ptr == ch
332 && (state->cur_keyword_ptr > state->keyword
333 || state->pending_newlines > 0))
334 /* We might be looking at STATE->keyword at some point.
335 Keep looking until we know for sure. */
336 {
337 if (*++state->cur_keyword_ptr == '\0')
338 /* Saw the whole keyword. Set SAW_KEYWORD flag to true. */
339 {
340 state->saw_keyword = 1;
341
342 /* Reset the scanning pointer. */
343 state->cur_keyword_ptr = state->keyword;
344
345 /* Canonicalize whitespace preceding a usage string. */
346 state->pending_newlines = 2;
347 state->pending_spaces = 0;
348
349 /* Skip any whitespace between the keyword and the
350 usage string. */
351 do
352 ch = getc (state->in_file);
353 while (ch == ' ' || ch == '\n');
354
355 /* Output the open-paren we just read. */
356 put_char (ch, state);
357
358 /* Skip the function name and replace it with `fn'. */
359 do
360 ch = getc (state->in_file);
361 while (ch != ' ' && ch != ')');
362 put_char ('f', state);
363 put_char ('n', state);
364
365 /* Put back the last character. */
366 ungetc (ch, state->in_file);
367 }
368 }
369 else
370 {
371 if (state->keyword && state->cur_keyword_ptr > state->keyword)
372 /* We scanned the beginning of a potential usage
373 keyword, but it was a false alarm. Output the
374 part we scanned. */
375 {
376 const char *p;
377
378 for (p = state->keyword; p < state->cur_keyword_ptr; p++)
379 put_char (*p, state);
380
381 state->cur_keyword_ptr = state->keyword;
382 }
383
384 put_char (ch, state);
385 }
386 }
387
388
389 /* Skip a C string or C-style comment from INFILE, and return the
390 character that follows. COMMENT non-zero means skip a comment. If
391 PRINTFLAG is positive, output string contents to outfile. If it is
392 negative, store contents in buf. Convert escape sequences \n and
393 \t to newline and tab; discard \ followed by newline.
394 If SAW_USAGE is non-zero, then any occurrences of the string `usage:'
395 at the beginning of a line will be removed, and *SAW_USAGE set to
396 true if any were encountered. */
397
398 static int
399 read_c_string_or_comment (FILE *infile, int printflag, int comment, int *saw_usage)
400 {
401 register int c;
402 struct rcsoc_state state;
403
404 state.in_file = infile;
405 state.buf_ptr = (printflag < 0 ? input_buffer : 0);
406 state.out_file = (printflag > 0 ? outfile : 0);
407 state.pending_spaces = 0;
408 state.pending_newlines = 0;
409 state.keyword = (saw_usage ? "usage:" : 0);
410 state.cur_keyword_ptr = state.keyword;
411 state.saw_keyword = 0;
412
413 c = getc (infile);
414 if (comment)
415 while (c == '\n' || c == '\r' || c == '\t' || c == ' ')
416 c = getc (infile);
417
418 while (c != EOF)
419 {
420 while (c != EOF && (comment ? c != '*' : c != '"'))
421 {
422 if (c == '\\')
423 {
424 c = getc (infile);
425 if (c == '\n' || c == '\r')
426 {
427 c = getc (infile);
428 continue;
429 }
430 if (c == 'n')
431 c = '\n';
432 if (c == 't')
433 c = '\t';
434 }
435
436 if (c == ' ')
437 state.pending_spaces++;
438 else if (c == '\n')
439 {
440 state.pending_newlines++;
441 state.pending_spaces = 0;
442 }
443 else
444 scan_keyword_or_put_char (c, &state);
445
446 c = getc (infile);
447 }
448
449 if (c != EOF)
450 c = getc (infile);
451
452 if (comment)
453 {
454 if (c == '/')
455 {
456 c = getc (infile);
457 break;
458 }
459
460 scan_keyword_or_put_char ('*', &state);
461 }
462 else
463 {
464 if (c != '"')
465 break;
466
467 /* If we had a "", concatenate the two strings. */
468 c = getc (infile);
469 }
470 }
471
472 if (printflag < 0)
473 *state.buf_ptr = 0;
474
475 if (saw_usage)
476 *saw_usage = state.saw_keyword;
477
478 return c;
479 }
480
481
482 \f
483 /* Write to file OUT the argument names of function FUNC, whose text is in BUF.
484 MINARGS and MAXARGS are the minimum and maximum number of arguments. */
485
486 static void
487 write_c_args (FILE *out, char *func, char *buf, int minargs, int maxargs)
488 {
489 register char *p;
490 int in_ident = 0;
491 char *ident_start IF_LINT (= NULL);
492 size_t ident_length = 0;
493
494 fprintf (out, "(fn");
495
496 if (*buf == '(')
497 ++buf;
498
499 for (p = buf; *p; p++)
500 {
501 char c = *p;
502
503 /* Notice when a new identifier starts. */
504 if ((('A' <= c && c <= 'Z')
505 || ('a' <= c && c <= 'z')
506 || ('0' <= c && c <= '9')
507 || c == '_')
508 != in_ident)
509 {
510 if (!in_ident)
511 {
512 in_ident = 1;
513 ident_start = p;
514 }
515 else
516 {
517 in_ident = 0;
518 ident_length = p - ident_start;
519 }
520 }
521
522 /* Found the end of an argument, write out the last seen
523 identifier. */
524 if (c == ',' || c == ')')
525 {
526 if (ident_length == 0)
527 {
528 error ("empty arg list for `%s' should be (void), not ()", func);
529 continue;
530 }
531
532 if (strncmp (ident_start, "void", ident_length) == 0)
533 continue;
534
535 putc (' ', out);
536
537 if (minargs == 0 && maxargs > 0)
538 fprintf (out, "&optional ");
539
540 minargs--;
541 maxargs--;
542
543 /* In C code, `default' is a reserved word, so we spell it
544 `defalt'; demangle that here. */
545 if (ident_length == 6 && strncmp (ident_start, "defalt", 6) == 0)
546 fprintf (out, "DEFAULT");
547 else
548 while (ident_length-- > 0)
549 {
550 c = *ident_start++;
551 if (c >= 'a' && c <= 'z')
552 /* Upcase the letter. */
553 c += 'A' - 'a';
554 else if (c == '_')
555 /* Print underscore as hyphen. */
556 c = '-';
557 putc (c, out);
558 }
559 }
560 }
561
562 putc (')', out);
563 }
564 \f
565 /* The types of globals. */
566 enum global_type
567 {
568 EMACS_INTEGER,
569 BOOLEAN,
570 LISP_OBJECT,
571 INVALID
572 };
573
574 /* A single global. */
575 struct global
576 {
577 enum global_type type;
578 char *name;
579 };
580
581 /* All the variable names we saw while scanning C sources in `-g'
582 mode. */
583 int num_globals;
584 int num_globals_allocated;
585 struct global *globals;
586
587 static void
588 add_global (enum global_type type, char *name)
589 {
590 /* Ignore the one non-symbol that can occur. */
591 if (strcmp (name, "..."))
592 {
593 ++num_globals;
594
595 if (num_globals_allocated == 0)
596 {
597 num_globals_allocated = 100;
598 globals = xmalloc (num_globals_allocated * sizeof (struct global));
599 }
600 else if (num_globals == num_globals_allocated)
601 {
602 num_globals_allocated *= 2;
603 globals = xrealloc (globals,
604 num_globals_allocated * sizeof (struct global));
605 }
606
607 globals[num_globals - 1].type = type;
608 globals[num_globals - 1].name = name;
609 }
610 }
611
612 static int
613 compare_globals (const void *a, const void *b)
614 {
615 const struct global *ga = a;
616 const struct global *gb = b;
617 return strcmp (ga->name, gb->name);
618 }
619
620 static void
621 write_globals (void)
622 {
623 int i;
624 qsort (globals, num_globals, sizeof (struct global), compare_globals);
625 for (i = 0; i < num_globals; ++i)
626 {
627 char const *type;
628
629 switch (globals[i].type)
630 {
631 case EMACS_INTEGER:
632 type = "EMACS_INT";
633 break;
634 case BOOLEAN:
635 type = "int";
636 break;
637 case LISP_OBJECT:
638 type = "Lisp_Object";
639 break;
640 default:
641 fatal ("not a recognized DEFVAR_", 0);
642 }
643
644 fprintf (outfile, " %s f_%s;\n", type, globals[i].name);
645 fprintf (outfile, "#define %s globals.f_%s\n",
646 globals[i].name, globals[i].name);
647 while (i + 1 < num_globals
648 && !strcmp (globals[i].name, globals[i + 1].name))
649 ++i;
650 }
651
652 fprintf (outfile, "};\n");
653 fprintf (outfile, "extern struct emacs_globals globals;\n");
654 }
655
656 \f
657 /* Read through a c file. If a .o file is named,
658 the corresponding .c or .m file is read instead.
659 Looks for DEFUN constructs such as are defined in ../src/lisp.h.
660 Accepts any word starting DEF... so it finds DEFSIMPLE and DEFPRED. */
661
662 static int
663 scan_c_file (char *filename, const char *mode)
664 {
665 FILE *infile;
666 register int c;
667 register int commas;
668 int minargs, maxargs;
669 int extension = filename[strlen (filename) - 1];
670
671 if (extension == 'o')
672 filename[strlen (filename) - 1] = 'c';
673
674 infile = fopen (filename, mode);
675
676 if (infile == NULL && extension == 'o')
677 {
678 /* Try .m. */
679 filename[strlen (filename) - 1] = 'm';
680 infile = fopen (filename, mode);
681 if (infile == NULL)
682 filename[strlen (filename) - 1] = 'c'; /* Don't confuse people. */
683 }
684
685 /* No error if non-ex input file. */
686 if (infile == NULL)
687 {
688 perror (filename);
689 return 0;
690 }
691
692 /* Reset extension to be able to detect duplicate files. */
693 filename[strlen (filename) - 1] = extension;
694
695 c = '\n';
696 while (!feof (infile))
697 {
698 int doc_keyword = 0;
699 int defunflag = 0;
700 int defvarperbufferflag = 0;
701 int defvarflag = 0;
702 enum global_type type = INVALID;
703
704 if (c != '\n' && c != '\r')
705 {
706 c = getc (infile);
707 continue;
708 }
709 c = getc (infile);
710 if (c == ' ')
711 {
712 while (c == ' ')
713 c = getc (infile);
714 if (c != 'D')
715 continue;
716 c = getc (infile);
717 if (c != 'E')
718 continue;
719 c = getc (infile);
720 if (c != 'F')
721 continue;
722 c = getc (infile);
723 if (c != 'V')
724 continue;
725 c = getc (infile);
726 if (c != 'A')
727 continue;
728 c = getc (infile);
729 if (c != 'R')
730 continue;
731 c = getc (infile);
732 if (c != '_')
733 continue;
734
735 defvarflag = 1;
736
737 c = getc (infile);
738 defvarperbufferflag = (c == 'P');
739 if (generate_globals)
740 {
741 if (c == 'I')
742 type = EMACS_INTEGER;
743 else if (c == 'L')
744 type = LISP_OBJECT;
745 else if (c == 'B')
746 type = BOOLEAN;
747 }
748
749 c = getc (infile);
750 /* We need to distinguish between DEFVAR_BOOL and
751 DEFVAR_BUFFER_DEFAULTS. */
752 if (generate_globals && type == BOOLEAN && c != 'O')
753 type = INVALID;
754 }
755 else if (c == 'D')
756 {
757 c = getc (infile);
758 if (c != 'E')
759 continue;
760 c = getc (infile);
761 if (c != 'F')
762 continue;
763 c = getc (infile);
764 defunflag = c == 'U';
765 }
766 else continue;
767
768 if (generate_globals && (!defvarflag || defvarperbufferflag
769 || type == INVALID))
770 continue;
771
772 while (c != '(')
773 {
774 if (c < 0)
775 goto eof;
776 c = getc (infile);
777 }
778
779 /* Lisp variable or function name. */
780 c = getc (infile);
781 if (c != '"')
782 continue;
783 c = read_c_string_or_comment (infile, -1, 0, 0);
784
785 if (generate_globals)
786 {
787 int i = 0;
788 char *name;
789
790 /* Skip "," and whitespace. */
791 do
792 {
793 c = getc (infile);
794 }
795 while (c == ',' || c == ' ' || c == '\t' || c == '\n' || c == '\r');
796
797 /* Read in the identifier. */
798 do
799 {
800 input_buffer[i++] = c;
801 c = getc (infile);
802 }
803 while (! (c == ',' || c == ' ' || c == '\t'
804 || c == '\n' || c == '\r'));
805 input_buffer[i] = '\0';
806
807 name = xmalloc (i + 1);
808 memcpy (name, input_buffer, i + 1);
809 add_global (type, name);
810 continue;
811 }
812
813 /* DEFVAR_LISP ("name", addr, "doc")
814 DEFVAR_LISP ("name", addr /\* doc *\/)
815 DEFVAR_LISP ("name", addr, doc: /\* doc *\/) */
816
817 if (defunflag)
818 commas = 5;
819 else if (defvarperbufferflag)
820 commas = 3;
821 else if (defvarflag)
822 commas = 1;
823 else /* For DEFSIMPLE and DEFPRED. */
824 commas = 2;
825
826 while (commas)
827 {
828 if (c == ',')
829 {
830 commas--;
831
832 if (defunflag && (commas == 1 || commas == 2))
833 {
834 int scanned = 0;
835 do
836 c = getc (infile);
837 while (c == ' ' || c == '\n' || c == '\r' || c == '\t');
838 if (c < 0)
839 goto eof;
840 ungetc (c, infile);
841 if (commas == 2) /* Pick up minargs. */
842 scanned = fscanf (infile, "%d", &minargs);
843 else /* Pick up maxargs. */
844 if (c == 'M' || c == 'U') /* MANY || UNEVALLED */
845 maxargs = -1;
846 else
847 scanned = fscanf (infile, "%d", &maxargs);
848 if (scanned < 0)
849 goto eof;
850 }
851 }
852
853 if (c == EOF)
854 goto eof;
855 c = getc (infile);
856 }
857
858 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
859 c = getc (infile);
860
861 if (c == '"')
862 c = read_c_string_or_comment (infile, 0, 0, 0);
863
864 while (c != EOF && c != ',' && c != '/')
865 c = getc (infile);
866 if (c == ',')
867 {
868 c = getc (infile);
869 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
870 c = getc (infile);
871 while ((c >= 'a' && c <= 'z') || (c >= 'Z' && c <= 'Z'))
872 c = getc (infile);
873 if (c == ':')
874 {
875 doc_keyword = 1;
876 c = getc (infile);
877 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
878 c = getc (infile);
879 }
880 }
881
882 if (c == '"'
883 || (c == '/'
884 && (c = getc (infile),
885 ungetc (c, infile),
886 c == '*')))
887 {
888 int comment = c != '"';
889 int saw_usage;
890
891 putc (037, outfile);
892 putc (defvarflag ? 'V' : 'F', outfile);
893 fprintf (outfile, "%s\n", input_buffer);
894
895 if (comment)
896 getc (infile); /* Skip past `*'. */
897 c = read_c_string_or_comment (infile, 1, comment, &saw_usage);
898
899 /* If this is a defun, find the arguments and print them. If
900 this function takes MANY or UNEVALLED args, then the C source
901 won't give the names of the arguments, so we shouldn't bother
902 trying to find them.
903
904 Various doc-string styles:
905 0: DEFUN (..., "DOC") (args) [!comment]
906 1: DEFUN (..., /\* DOC *\/ (args)) [comment && !doc_keyword]
907 2: DEFUN (..., doc: /\* DOC *\/) (args) [comment && doc_keyword]
908 */
909 if (defunflag && maxargs != -1 && !saw_usage)
910 {
911 char argbuf[1024], *p = argbuf;
912
913 if (!comment || doc_keyword)
914 while (c != ')')
915 {
916 if (c < 0)
917 goto eof;
918 c = getc (infile);
919 }
920
921 /* Skip into arguments. */
922 while (c != '(')
923 {
924 if (c < 0)
925 goto eof;
926 c = getc (infile);
927 }
928 /* Copy arguments into ARGBUF. */
929 *p++ = c;
930 do
931 *p++ = c = getc (infile);
932 while (c != ')');
933 *p = '\0';
934 /* Output them. */
935 fprintf (outfile, "\n\n");
936 write_c_args (outfile, input_buffer, argbuf, minargs, maxargs);
937 }
938 else if (defunflag && maxargs == -1 && !saw_usage)
939 /* The DOC should provide the usage form. */
940 fprintf (stderr, "Missing `usage' for function `%s'.\n",
941 input_buffer);
942 }
943 }
944 eof:
945 fclose (infile);
946 return 0;
947 }
948 \f
949 /* Read a file of Lisp code, compiled or interpreted.
950 Looks for
951 (defun NAME ARGS DOCSTRING ...)
952 (defmacro NAME ARGS DOCSTRING ...)
953 (defsubst NAME ARGS DOCSTRING ...)
954 (autoload (quote NAME) FILE DOCSTRING ...)
955 (defvar NAME VALUE DOCSTRING)
956 (defconst NAME VALUE DOCSTRING)
957 (fset (quote NAME) (make-byte-code ... DOCSTRING ...))
958 (fset (quote NAME) #[... DOCSTRING ...])
959 (defalias (quote NAME) #[... DOCSTRING ...])
960 (custom-declare-variable (quote NAME) VALUE DOCSTRING ...)
961 starting in column zero.
962 (quote NAME) may appear as 'NAME as well.
963
964 We also look for #@LENGTH CONTENTS^_ at the beginning of the line.
965 When we find that, we save it for the following defining-form,
966 and we use that instead of reading a doc string within that defining-form.
967
968 For defvar, defconst, and fset we skip to the docstring with a kludgy
969 formatting convention: all docstrings must appear on the same line as the
970 initial open-paren (the one in column zero) and must contain a backslash
971 and a newline immediately after the initial double-quote. No newlines
972 must appear between the beginning of the form and the first double-quote.
973 For defun, defmacro, and autoload, we know how to skip over the
974 arglist, but the doc string must still have a backslash and newline
975 immediately after the double quote.
976 The only source files that must follow this convention are preloaded
977 uncompiled ones like loaddefs.el and bindings.el; aside
978 from that, it is always the .elc file that we look at, and they are no
979 problem because byte-compiler output follows this convention.
980 The NAME and DOCSTRING are output.
981 NAME is preceded by `F' for a function or `V' for a variable.
982 An entry is output only if DOCSTRING has \ newline just after the opening ".
983 */
984
985 static void
986 skip_white (FILE *infile)
987 {
988 char c = ' ';
989 while (c == ' ' || c == '\t' || c == '\n' || c == '\r')
990 c = getc (infile);
991 ungetc (c, infile);
992 }
993
994 static void
995 read_lisp_symbol (FILE *infile, char *buffer)
996 {
997 char c;
998 char *fillp = buffer;
999
1000 skip_white (infile);
1001 while (1)
1002 {
1003 c = getc (infile);
1004 if (c == '\\')
1005 *(++fillp) = getc (infile);
1006 else if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '(' || c == ')')
1007 {
1008 ungetc (c, infile);
1009 *fillp = 0;
1010 break;
1011 }
1012 else
1013 *fillp++ = c;
1014 }
1015
1016 if (! buffer[0])
1017 fprintf (stderr, "## expected a symbol, got '%c'\n", c);
1018
1019 skip_white (infile);
1020 }
1021
1022 static int
1023 search_lisp_doc_at_eol (FILE *infile)
1024 {
1025 char c = 0, c1 = 0, c2 = 0;
1026
1027 /* Skip until the end of line; remember two previous chars. */
1028 while (c != '\n' && c != '\r' && c != EOF)
1029 {
1030 c2 = c1;
1031 c1 = c;
1032 c = getc (infile);
1033 }
1034
1035 /* If two previous characters were " and \,
1036 this is a doc string. Otherwise, there is none. */
1037 if (c2 != '"' || c1 != '\\')
1038 {
1039 #ifdef DEBUG
1040 fprintf (stderr, "## non-docstring in %s (%s)\n",
1041 buffer, filename);
1042 #endif
1043 if (c != EOF)
1044 ungetc (c, infile);
1045 return 0;
1046 }
1047 return 1;
1048 }
1049
1050 static int
1051 scan_lisp_file (const char *filename, const char *mode)
1052 {
1053 FILE *infile;
1054 register int c;
1055 char *saved_string = 0;
1056
1057 if (generate_globals)
1058 fatal ("scanning lisp file when -g specified", 0);
1059
1060 infile = fopen (filename, mode);
1061 if (infile == NULL)
1062 {
1063 perror (filename);
1064 return 0; /* No error. */
1065 }
1066
1067 c = '\n';
1068 while (!feof (infile))
1069 {
1070 char buffer[BUFSIZ];
1071 char type;
1072
1073 /* If not at end of line, skip till we get to one. */
1074 if (c != '\n' && c != '\r')
1075 {
1076 c = getc (infile);
1077 continue;
1078 }
1079 /* Skip the line break. */
1080 while (c == '\n' || c == '\r')
1081 c = getc (infile);
1082 /* Detect a dynamic doc string and save it for the next expression. */
1083 if (c == '#')
1084 {
1085 c = getc (infile);
1086 if (c == '@')
1087 {
1088 size_t length = 0;
1089 size_t i;
1090
1091 /* Read the length. */
1092 while ((c = getc (infile),
1093 c >= '0' && c <= '9'))
1094 {
1095 length *= 10;
1096 length += c - '0';
1097 }
1098
1099 if (length <= 1)
1100 fatal ("invalid dynamic doc string length", "");
1101
1102 if (c != ' ')
1103 fatal ("space not found after dynamic doc string length", "");
1104
1105 /* The next character is a space that is counted in the length
1106 but not part of the doc string.
1107 We already read it, so just ignore it. */
1108 length--;
1109
1110 /* Read in the contents. */
1111 free (saved_string);
1112 saved_string = (char *) xmalloc (length);
1113 for (i = 0; i < length; i++)
1114 saved_string[i] = getc (infile);
1115 /* The last character is a ^_.
1116 That is needed in the .elc file
1117 but it is redundant in DOC. So get rid of it here. */
1118 saved_string[length - 1] = 0;
1119 /* Skip the line break. */
1120 while (c == '\n' || c == '\r')
1121 c = getc (infile);
1122 /* Skip the following line. */
1123 while (c != '\n' && c != '\r')
1124 c = getc (infile);
1125 }
1126 continue;
1127 }
1128
1129 if (c != '(')
1130 continue;
1131
1132 read_lisp_symbol (infile, buffer);
1133
1134 if (! strcmp (buffer, "defun")
1135 || ! strcmp (buffer, "defmacro")
1136 || ! strcmp (buffer, "defsubst"))
1137 {
1138 type = 'F';
1139 read_lisp_symbol (infile, buffer);
1140
1141 /* Skip the arguments: either "nil" or a list in parens. */
1142
1143 c = getc (infile);
1144 if (c == 'n') /* nil */
1145 {
1146 if ((c = getc (infile)) != 'i'
1147 || (c = getc (infile)) != 'l')
1148 {
1149 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
1150 buffer, filename);
1151 continue;
1152 }
1153 }
1154 else if (c != '(')
1155 {
1156 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
1157 buffer, filename);
1158 continue;
1159 }
1160 else
1161 while (c != ')')
1162 c = getc (infile);
1163 skip_white (infile);
1164
1165 /* If the next three characters aren't `dquote bslash newline'
1166 then we're not reading a docstring.
1167 */
1168 if ((c = getc (infile)) != '"'
1169 || (c = getc (infile)) != '\\'
1170 || ((c = getc (infile)) != '\n' && c != '\r'))
1171 {
1172 #ifdef DEBUG
1173 fprintf (stderr, "## non-docstring in %s (%s)\n",
1174 buffer, filename);
1175 #endif
1176 continue;
1177 }
1178 }
1179
1180 /* defcustom can only occur in uncompiled Lisp files. */
1181 else if (! strcmp (buffer, "defvar")
1182 || ! strcmp (buffer, "defconst")
1183 || ! strcmp (buffer, "defcustom"))
1184 {
1185 type = 'V';
1186 read_lisp_symbol (infile, buffer);
1187
1188 if (saved_string == 0)
1189 if (!search_lisp_doc_at_eol (infile))
1190 continue;
1191 }
1192
1193 else if (! strcmp (buffer, "custom-declare-variable")
1194 || ! strcmp (buffer, "defvaralias")
1195 )
1196 {
1197 type = 'V';
1198
1199 c = getc (infile);
1200 if (c == '\'')
1201 read_lisp_symbol (infile, buffer);
1202 else
1203 {
1204 if (c != '(')
1205 {
1206 fprintf (stderr,
1207 "## unparsable name in custom-declare-variable in %s\n",
1208 filename);
1209 continue;
1210 }
1211 read_lisp_symbol (infile, buffer);
1212 if (strcmp (buffer, "quote"))
1213 {
1214 fprintf (stderr,
1215 "## unparsable name in custom-declare-variable in %s\n",
1216 filename);
1217 continue;
1218 }
1219 read_lisp_symbol (infile, buffer);
1220 c = getc (infile);
1221 if (c != ')')
1222 {
1223 fprintf (stderr,
1224 "## unparsable quoted name in custom-declare-variable in %s\n",
1225 filename);
1226 continue;
1227 }
1228 }
1229
1230 if (saved_string == 0)
1231 if (!search_lisp_doc_at_eol (infile))
1232 continue;
1233 }
1234
1235 else if (! strcmp (buffer, "fset") || ! strcmp (buffer, "defalias"))
1236 {
1237 type = 'F';
1238
1239 c = getc (infile);
1240 if (c == '\'')
1241 read_lisp_symbol (infile, buffer);
1242 else
1243 {
1244 if (c != '(')
1245 {
1246 fprintf (stderr, "## unparsable name in fset in %s\n",
1247 filename);
1248 continue;
1249 }
1250 read_lisp_symbol (infile, buffer);
1251 if (strcmp (buffer, "quote"))
1252 {
1253 fprintf (stderr, "## unparsable name in fset in %s\n",
1254 filename);
1255 continue;
1256 }
1257 read_lisp_symbol (infile, buffer);
1258 c = getc (infile);
1259 if (c != ')')
1260 {
1261 fprintf (stderr,
1262 "## unparsable quoted name in fset in %s\n",
1263 filename);
1264 continue;
1265 }
1266 }
1267
1268 if (saved_string == 0)
1269 if (!search_lisp_doc_at_eol (infile))
1270 continue;
1271 }
1272
1273 else if (! strcmp (buffer, "autoload"))
1274 {
1275 type = 'F';
1276 c = getc (infile);
1277 if (c == '\'')
1278 read_lisp_symbol (infile, buffer);
1279 else
1280 {
1281 if (c != '(')
1282 {
1283 fprintf (stderr, "## unparsable name in autoload in %s\n",
1284 filename);
1285 continue;
1286 }
1287 read_lisp_symbol (infile, buffer);
1288 if (strcmp (buffer, "quote"))
1289 {
1290 fprintf (stderr, "## unparsable name in autoload in %s\n",
1291 filename);
1292 continue;
1293 }
1294 read_lisp_symbol (infile, buffer);
1295 c = getc (infile);
1296 if (c != ')')
1297 {
1298 fprintf (stderr,
1299 "## unparsable quoted name in autoload in %s\n",
1300 filename);
1301 continue;
1302 }
1303 }
1304 skip_white (infile);
1305 if ((c = getc (infile)) != '\"')
1306 {
1307 fprintf (stderr, "## autoload of %s unparsable (%s)\n",
1308 buffer, filename);
1309 continue;
1310 }
1311 read_c_string_or_comment (infile, 0, 0, 0);
1312
1313 if (saved_string == 0)
1314 if (!search_lisp_doc_at_eol (infile))
1315 continue;
1316 }
1317
1318 #ifdef DEBUG
1319 else if (! strcmp (buffer, "if")
1320 || ! strcmp (buffer, "byte-code"))
1321 continue;
1322 #endif
1323
1324 else
1325 {
1326 #ifdef DEBUG
1327 fprintf (stderr, "## unrecognized top-level form, %s (%s)\n",
1328 buffer, filename);
1329 #endif
1330 continue;
1331 }
1332
1333 /* At this point, we should either use the previous dynamic doc string in
1334 saved_string or gobble a doc string from the input file.
1335 In the latter case, the opening quote (and leading backslash-newline)
1336 have already been read. */
1337
1338 putc (037, outfile);
1339 putc (type, outfile);
1340 fprintf (outfile, "%s\n", buffer);
1341 if (saved_string)
1342 {
1343 fputs (saved_string, outfile);
1344 /* Don't use one dynamic doc string twice. */
1345 free (saved_string);
1346 saved_string = 0;
1347 }
1348 else
1349 read_c_string_or_comment (infile, 1, 0, 0);
1350 }
1351 fclose (infile);
1352 return 0;
1353 }
1354
1355
1356 /* make-docfile.c ends here */