Merge from trunk; up to 2013-02-18T01:30:27Z!monnier@iro.umontreal.ca.
[bpt/emacs.git] / lib-src / make-docfile.c
1 /* Generate doc-string file for GNU Emacs from source files.
2
3 Copyright (C) 1985-1986, 1992-1994, 1997, 1999-2013 Free Software
4 Foundation, Inc.
5
6 This file is part of GNU Emacs.
7
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
20
21
22 /* The arguments given to this program are all the C and Lisp source files
23 of GNU Emacs. .elc and .el and .c files are allowed.
24 A .o file can also be specified; the .c file it was made from is used.
25 This helps the makefile pass the correct list of files.
26 Option -d DIR means change to DIR before looking for files.
27
28 The results, which go to standard output or to a file
29 specified with -a or -o (-a to append, -o to start from nothing),
30 are entries containing function or variable names and their documentation.
31 Each entry starts with a ^_ character.
32 Then comes F for a function or V for a variable.
33 Then comes the function or variable name, terminated with a newline.
34 Then comes the documentation for that function or variable.
35 */
36
37 #include <config.h>
38
39 #include <stdio.h>
40 #include <stdlib.h> /* config.h unconditionally includes this anyway */
41 #ifdef MSDOS
42 #include <fcntl.h>
43 #endif /* MSDOS */
44 #ifdef WINDOWSNT
45 /* Defined to be sys_fopen in ms-w32.h, but only #ifdef emacs, so this
46 is really just insurance. */
47 #undef fopen
48 #include <fcntl.h>
49 #include <direct.h>
50 #endif /* WINDOWSNT */
51
52 #ifdef DOS_NT
53 /* Defined to be sys_chdir in ms-w32.h, but only #ifdef emacs, so this
54 is really just insurance.
55
56 Similarly, msdos defines this as sys_chdir, but we're not linking with the
57 file where that function is defined. */
58 #undef chdir
59 #define READ_TEXT "rt"
60 #define READ_BINARY "rb"
61 #define IS_SLASH(c) ((c) == '/' || (c) == '\\' || (c) == ':')
62 #else /* not DOS_NT */
63 #define READ_TEXT "r"
64 #define READ_BINARY "r"
65 #define IS_SLASH(c) ((c) == '/')
66 #endif /* not DOS_NT */
67
68 static int scan_file (char *filename);
69 static int scan_lisp_file (const char *filename, const char *mode);
70 static int scan_c_file (char *filename, const char *mode);
71 static void start_globals (void);
72 static void write_globals (void);
73
74 #include <unistd.h>
75
76 /* Stdio stream for output to the DOC file. */
77 FILE *outfile;
78
79 /* Name this program was invoked with. */
80 char *progname;
81
82 /* Nonzero if this invocation is generating globals.h. */
83 int generate_globals;
84
85 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
86
87 /* VARARGS1 */
88 static void
89 error (const char *s1, const char *s2)
90 {
91 fprintf (stderr, "%s: ", progname);
92 fprintf (stderr, s1, s2);
93 fprintf (stderr, "\n");
94 }
95
96 /* Print error message and exit. */
97
98 /* VARARGS1 */
99 static _Noreturn void
100 fatal (const char *s1, const char *s2)
101 {
102 error (s1, s2);
103 exit (EXIT_FAILURE);
104 }
105
106 /* Like malloc but get fatal error if memory is exhausted. */
107
108 static void *
109 xmalloc (unsigned int size)
110 {
111 void *result = (void *) malloc (size);
112 if (result == NULL)
113 fatal ("virtual memory exhausted", 0);
114 return result;
115 }
116
117 /* Like realloc but get fatal error if memory is exhausted. */
118
119 static void *
120 xrealloc (void *arg, unsigned int size)
121 {
122 void *result = (void *) realloc (arg, size);
123 if (result == NULL)
124 fatal ("virtual memory exhausted", 0);
125 return result;
126 }
127
128 \f
129 int
130 main (int argc, char **argv)
131 {
132 int i;
133 int err_count = 0;
134 int first_infile;
135
136 progname = argv[0];
137
138 outfile = stdout;
139
140 /* Don't put CRs in the DOC file. */
141 #ifdef MSDOS
142 _fmode = O_BINARY;
143 #if 0 /* Suspicion is that this causes hanging.
144 So instead we require people to use -o on MSDOS. */
145 (stdout)->_flag &= ~_IOTEXT;
146 _setmode (fileno (stdout), O_BINARY);
147 #endif
148 outfile = 0;
149 #endif /* MSDOS */
150 #ifdef WINDOWSNT
151 _fmode = O_BINARY;
152 _setmode (fileno (stdout), O_BINARY);
153 #endif /* WINDOWSNT */
154
155 /* If first two args are -o FILE, output to FILE. */
156 i = 1;
157 if (argc > i + 1 && !strcmp (argv[i], "-o"))
158 {
159 outfile = fopen (argv[i + 1], "w");
160 i += 2;
161 }
162 if (argc > i + 1 && !strcmp (argv[i], "-a"))
163 {
164 outfile = fopen (argv[i + 1], "a");
165 i += 2;
166 }
167 if (argc > i + 1 && !strcmp (argv[i], "-d"))
168 {
169 if (chdir (argv[i + 1]) != 0)
170 {
171 perror (argv[i + 1]);
172 return EXIT_FAILURE;
173 }
174 i += 2;
175 }
176 if (argc > i && !strcmp (argv[i], "-g"))
177 {
178 generate_globals = 1;
179 ++i;
180 }
181
182 if (outfile == 0)
183 fatal ("No output file specified", "");
184
185 if (generate_globals)
186 start_globals ();
187
188 first_infile = i;
189 for (; i < argc; i++)
190 {
191 int j;
192 /* Don't process one file twice. */
193 for (j = first_infile; j < i; j++)
194 if (! strcmp (argv[i], argv[j]))
195 break;
196 if (j == i)
197 err_count += scan_file (argv[i]);
198 }
199
200 if (err_count == 0 && generate_globals)
201 write_globals ();
202
203 return (err_count > 0 ? EXIT_FAILURE : EXIT_SUCCESS);
204 }
205
206 /* Add a source file name boundary marker in the output file. */
207 static void
208 put_filename (char *filename)
209 {
210 char *tmp;
211
212 for (tmp = filename; *tmp; tmp++)
213 {
214 if (IS_DIRECTORY_SEP (*tmp))
215 filename = tmp + 1;
216 }
217
218 putc (037, outfile);
219 putc ('S', outfile);
220 fprintf (outfile, "%s\n", filename);
221 }
222
223 /* Read file FILENAME and output its doc strings to outfile. */
224 /* Return 1 if file is not found, 0 if it is found. */
225
226 static int
227 scan_file (char *filename)
228 {
229
230 size_t len = strlen (filename);
231
232 if (!generate_globals)
233 put_filename (filename);
234 if (len > 4 && !strcmp (filename + len - 4, ".elc"))
235 return scan_lisp_file (filename, READ_BINARY);
236 else if (len > 3 && !strcmp (filename + len - 3, ".el"))
237 return scan_lisp_file (filename, READ_TEXT);
238 else
239 return scan_c_file (filename, READ_TEXT);
240 }
241
242 static void
243 start_globals (void)
244 {
245 fprintf (outfile, "/* This file was auto-generated by make-docfile. */\n");
246 fprintf (outfile, "/* DO NOT EDIT. */\n");
247 fprintf (outfile, "struct emacs_globals {\n");
248 }
249 \f
250 static char input_buffer[128];
251
252 /* Some state during the execution of `read_c_string_or_comment'. */
253 struct rcsoc_state
254 {
255 /* A count of spaces and newlines that have been read, but not output. */
256 unsigned pending_spaces, pending_newlines;
257
258 /* Where we're reading from. */
259 FILE *in_file;
260
261 /* If non-zero, a buffer into which to copy characters. */
262 char *buf_ptr;
263 /* If non-zero, a file into which to copy characters. */
264 FILE *out_file;
265
266 /* A keyword we look for at the beginning of lines. If found, it is
267 not copied, and SAW_KEYWORD is set to true. */
268 const char *keyword;
269 /* The current point we've reached in an occurrence of KEYWORD in
270 the input stream. */
271 const char *cur_keyword_ptr;
272 /* Set to true if we saw an occurrence of KEYWORD. */
273 int saw_keyword;
274 };
275
276 /* Output CH to the file or buffer in STATE. Any pending newlines or
277 spaces are output first. */
278
279 static inline void
280 put_char (int ch, struct rcsoc_state *state)
281 {
282 int out_ch;
283 do
284 {
285 if (state->pending_newlines > 0)
286 {
287 state->pending_newlines--;
288 out_ch = '\n';
289 }
290 else if (state->pending_spaces > 0)
291 {
292 state->pending_spaces--;
293 out_ch = ' ';
294 }
295 else
296 out_ch = ch;
297
298 if (state->out_file)
299 putc (out_ch, state->out_file);
300 if (state->buf_ptr)
301 *state->buf_ptr++ = out_ch;
302 }
303 while (out_ch != ch);
304 }
305
306 /* If in the middle of scanning a keyword, continue scanning with
307 character CH, otherwise output CH to the file or buffer in STATE.
308 Any pending newlines or spaces are output first, as well as any
309 previously scanned characters that were thought to be part of a
310 keyword, but were in fact not. */
311
312 static void
313 scan_keyword_or_put_char (int ch, struct rcsoc_state *state)
314 {
315 if (state->keyword
316 && *state->cur_keyword_ptr == ch
317 && (state->cur_keyword_ptr > state->keyword
318 || state->pending_newlines > 0))
319 /* We might be looking at STATE->keyword at some point.
320 Keep looking until we know for sure. */
321 {
322 if (*++state->cur_keyword_ptr == '\0')
323 /* Saw the whole keyword. Set SAW_KEYWORD flag to true. */
324 {
325 state->saw_keyword = 1;
326
327 /* Reset the scanning pointer. */
328 state->cur_keyword_ptr = state->keyword;
329
330 /* Canonicalize whitespace preceding a usage string. */
331 state->pending_newlines = 2;
332 state->pending_spaces = 0;
333
334 /* Skip any whitespace between the keyword and the
335 usage string. */
336 do
337 ch = getc (state->in_file);
338 while (ch == ' ' || ch == '\n');
339
340 /* Output the open-paren we just read. */
341 put_char (ch, state);
342
343 /* Skip the function name and replace it with `fn'. */
344 do
345 ch = getc (state->in_file);
346 while (ch != ' ' && ch != ')');
347 put_char ('f', state);
348 put_char ('n', state);
349
350 /* Put back the last character. */
351 ungetc (ch, state->in_file);
352 }
353 }
354 else
355 {
356 if (state->keyword && state->cur_keyword_ptr > state->keyword)
357 /* We scanned the beginning of a potential usage
358 keyword, but it was a false alarm. Output the
359 part we scanned. */
360 {
361 const char *p;
362
363 for (p = state->keyword; p < state->cur_keyword_ptr; p++)
364 put_char (*p, state);
365
366 state->cur_keyword_ptr = state->keyword;
367 }
368
369 put_char (ch, state);
370 }
371 }
372
373
374 /* Skip a C string or C-style comment from INFILE, and return the
375 character that follows. COMMENT non-zero means skip a comment. If
376 PRINTFLAG is positive, output string contents to outfile. If it is
377 negative, store contents in buf. Convert escape sequences \n and
378 \t to newline and tab; discard \ followed by newline.
379 If SAW_USAGE is non-zero, then any occurrences of the string `usage:'
380 at the beginning of a line will be removed, and *SAW_USAGE set to
381 true if any were encountered. */
382
383 static int
384 read_c_string_or_comment (FILE *infile, int printflag, int comment, int *saw_usage)
385 {
386 register int c;
387 struct rcsoc_state state;
388
389 state.in_file = infile;
390 state.buf_ptr = (printflag < 0 ? input_buffer : 0);
391 state.out_file = (printflag > 0 ? outfile : 0);
392 state.pending_spaces = 0;
393 state.pending_newlines = 0;
394 state.keyword = (saw_usage ? "usage:" : 0);
395 state.cur_keyword_ptr = state.keyword;
396 state.saw_keyword = 0;
397
398 c = getc (infile);
399 if (comment)
400 while (c == '\n' || c == '\r' || c == '\t' || c == ' ')
401 c = getc (infile);
402
403 while (c != EOF)
404 {
405 while (c != EOF && (comment ? c != '*' : c != '"'))
406 {
407 if (c == '\\')
408 {
409 c = getc (infile);
410 if (c == '\n' || c == '\r')
411 {
412 c = getc (infile);
413 continue;
414 }
415 if (c == 'n')
416 c = '\n';
417 if (c == 't')
418 c = '\t';
419 }
420
421 if (c == ' ')
422 state.pending_spaces++;
423 else if (c == '\n')
424 {
425 state.pending_newlines++;
426 state.pending_spaces = 0;
427 }
428 else
429 scan_keyword_or_put_char (c, &state);
430
431 c = getc (infile);
432 }
433
434 if (c != EOF)
435 c = getc (infile);
436
437 if (comment)
438 {
439 if (c == '/')
440 {
441 c = getc (infile);
442 break;
443 }
444
445 scan_keyword_or_put_char ('*', &state);
446 }
447 else
448 {
449 if (c != '"')
450 break;
451
452 /* If we had a "", concatenate the two strings. */
453 c = getc (infile);
454 }
455 }
456
457 if (printflag < 0)
458 *state.buf_ptr = 0;
459
460 if (saw_usage)
461 *saw_usage = state.saw_keyword;
462
463 return c;
464 }
465
466
467 \f
468 /* Write to file OUT the argument names of function FUNC, whose text is in BUF.
469 MINARGS and MAXARGS are the minimum and maximum number of arguments. */
470
471 static void
472 write_c_args (FILE *out, char *func, char *buf, int minargs, int maxargs)
473 {
474 register char *p;
475 int in_ident = 0;
476 char *ident_start IF_LINT (= NULL);
477 size_t ident_length = 0;
478
479 fprintf (out, "(fn");
480
481 if (*buf == '(')
482 ++buf;
483
484 for (p = buf; *p; p++)
485 {
486 char c = *p;
487
488 /* Notice when a new identifier starts. */
489 if ((('A' <= c && c <= 'Z')
490 || ('a' <= c && c <= 'z')
491 || ('0' <= c && c <= '9')
492 || c == '_')
493 != in_ident)
494 {
495 if (!in_ident)
496 {
497 in_ident = 1;
498 ident_start = p;
499 }
500 else
501 {
502 in_ident = 0;
503 ident_length = p - ident_start;
504 }
505 }
506
507 /* Found the end of an argument, write out the last seen
508 identifier. */
509 if (c == ',' || c == ')')
510 {
511 if (ident_length == 0)
512 {
513 error ("empty arg list for `%s' should be (void), not ()", func);
514 continue;
515 }
516
517 if (strncmp (ident_start, "void", ident_length) == 0)
518 continue;
519
520 putc (' ', out);
521
522 if (minargs == 0 && maxargs > 0)
523 fprintf (out, "&optional ");
524
525 minargs--;
526 maxargs--;
527
528 /* In C code, `default' is a reserved word, so we spell it
529 `defalt'; demangle that here. */
530 if (ident_length == 6 && memcmp (ident_start, "defalt", 6) == 0)
531 fprintf (out, "DEFAULT");
532 else
533 while (ident_length-- > 0)
534 {
535 c = *ident_start++;
536 if (c >= 'a' && c <= 'z')
537 /* Upcase the letter. */
538 c += 'A' - 'a';
539 else if (c == '_')
540 /* Print underscore as hyphen. */
541 c = '-';
542 putc (c, out);
543 }
544 }
545 }
546
547 putc (')', out);
548 }
549 \f
550 /* The types of globals. These are sorted roughly in decreasing alignment
551 order to avoid allocation gaps, except that functions are last. */
552 enum global_type
553 {
554 INVALID,
555 LISP_OBJECT,
556 EMACS_INTEGER,
557 BOOLEAN,
558 FUNCTION,
559 };
560
561 /* A single global. */
562 struct global
563 {
564 enum global_type type;
565 char *name;
566 int value;
567 };
568
569 /* All the variable names we saw while scanning C sources in `-g'
570 mode. */
571 int num_globals;
572 int num_globals_allocated;
573 struct global *globals;
574
575 static void
576 add_global (enum global_type type, char *name, int value)
577 {
578 /* Ignore the one non-symbol that can occur. */
579 if (strcmp (name, "..."))
580 {
581 ++num_globals;
582
583 if (num_globals_allocated == 0)
584 {
585 num_globals_allocated = 100;
586 globals = xmalloc (num_globals_allocated * sizeof (struct global));
587 }
588 else if (num_globals == num_globals_allocated)
589 {
590 num_globals_allocated *= 2;
591 globals = xrealloc (globals,
592 num_globals_allocated * sizeof (struct global));
593 }
594
595 globals[num_globals - 1].type = type;
596 globals[num_globals - 1].name = name;
597 globals[num_globals - 1].value = value;
598 }
599 }
600
601 static int
602 compare_globals (const void *a, const void *b)
603 {
604 const struct global *ga = a;
605 const struct global *gb = b;
606
607 if (ga->type != gb->type)
608 return ga->type - gb->type;
609
610 return strcmp (ga->name, gb->name);
611 }
612
613 static void
614 close_emacs_globals (void)
615 {
616 fprintf (outfile, "};\n");
617 fprintf (outfile, "extern struct emacs_globals globals;\n");
618 }
619
620 static void
621 write_globals (void)
622 {
623 int i, seen_defun = 0;
624 qsort (globals, num_globals, sizeof (struct global), compare_globals);
625 for (i = 0; i < num_globals; ++i)
626 {
627 char const *type = 0;
628
629 switch (globals[i].type)
630 {
631 case EMACS_INTEGER:
632 type = "EMACS_INT";
633 break;
634 case BOOLEAN:
635 type = "bool";
636 break;
637 case LISP_OBJECT:
638 type = "Lisp_Object";
639 break;
640 case FUNCTION:
641 if (!seen_defun)
642 {
643 close_emacs_globals ();
644 fprintf (outfile, "\n");
645 seen_defun = 1;
646 }
647 break;
648 default:
649 fatal ("not a recognized DEFVAR_", 0);
650 }
651
652 if (type)
653 {
654 fprintf (outfile, " %s f_%s;\n", type, globals[i].name);
655 fprintf (outfile, "#define %s globals.f_%s\n",
656 globals[i].name, globals[i].name);
657 }
658 else
659 {
660 /* It would be nice to have a cleaner way to deal with these
661 special hacks. */
662 if (strcmp (globals[i].name, "Fthrow") == 0
663 || strcmp (globals[i].name, "Ftop_level") == 0
664 || strcmp (globals[i].name, "Fkill_emacs") == 0
665 || strcmp (globals[i].name, "Fexit_recursive_edit") == 0
666 || strcmp (globals[i].name, "Fabort_recursive_edit") == 0)
667 fprintf (outfile, "_Noreturn ");
668 fprintf (outfile, "EXFUN (%s, ", globals[i].name);
669 if (globals[i].value == -1)
670 fprintf (outfile, "MANY");
671 else if (globals[i].value == -2)
672 fprintf (outfile, "UNEVALLED");
673 else
674 fprintf (outfile, "%d", globals[i].value);
675 fprintf (outfile, ");\n");
676 }
677
678 while (i + 1 < num_globals
679 && !strcmp (globals[i].name, globals[i + 1].name))
680 {
681 if (globals[i].type == FUNCTION
682 && globals[i].value != globals[i + 1].value)
683 error ("function '%s' defined twice with differing signatures",
684 globals[i].name);
685 ++i;
686 }
687 }
688
689 if (!seen_defun)
690 close_emacs_globals ();
691 }
692
693 \f
694 /* Read through a c file. If a .o file is named,
695 the corresponding .c or .m file is read instead.
696 Looks for DEFUN constructs such as are defined in ../src/lisp.h.
697 Accepts any word starting DEF... so it finds DEFSIMPLE and DEFPRED. */
698
699 static int
700 scan_c_file (char *filename, const char *mode)
701 {
702 FILE *infile;
703 register int c;
704 register int commas;
705 int minargs, maxargs;
706 int extension = filename[strlen (filename) - 1];
707
708 if (extension == 'o')
709 filename[strlen (filename) - 1] = 'c';
710
711 infile = fopen (filename, mode);
712
713 if (infile == NULL && extension == 'o')
714 {
715 /* Try .m. */
716 filename[strlen (filename) - 1] = 'm';
717 infile = fopen (filename, mode);
718 if (infile == NULL)
719 filename[strlen (filename) - 1] = 'c'; /* Don't confuse people. */
720 }
721
722 /* No error if non-ex input file. */
723 if (infile == NULL)
724 {
725 perror (filename);
726 return 0;
727 }
728
729 /* Reset extension to be able to detect duplicate files. */
730 filename[strlen (filename) - 1] = extension;
731
732 c = '\n';
733 while (!feof (infile))
734 {
735 int doc_keyword = 0;
736 int defunflag = 0;
737 int defvarperbufferflag = 0;
738 int defvarflag = 0;
739 enum global_type type = INVALID;
740 char *name IF_LINT (= 0);
741
742 if (c != '\n' && c != '\r')
743 {
744 c = getc (infile);
745 continue;
746 }
747 c = getc (infile);
748 if (c == ' ')
749 {
750 while (c == ' ')
751 c = getc (infile);
752 if (c != 'D')
753 continue;
754 c = getc (infile);
755 if (c != 'E')
756 continue;
757 c = getc (infile);
758 if (c != 'F')
759 continue;
760 c = getc (infile);
761 if (c != 'V')
762 continue;
763 c = getc (infile);
764 if (c != 'A')
765 continue;
766 c = getc (infile);
767 if (c != 'R')
768 continue;
769 c = getc (infile);
770 if (c != '_')
771 continue;
772
773 defvarflag = 1;
774
775 c = getc (infile);
776 defvarperbufferflag = (c == 'P');
777 if (generate_globals)
778 {
779 if (c == 'I')
780 type = EMACS_INTEGER;
781 else if (c == 'L')
782 type = LISP_OBJECT;
783 else if (c == 'B')
784 type = BOOLEAN;
785 }
786
787 c = getc (infile);
788 /* We need to distinguish between DEFVAR_BOOL and
789 DEFVAR_BUFFER_DEFAULTS. */
790 if (generate_globals && type == BOOLEAN && c != 'O')
791 type = INVALID;
792 }
793 else if (c == 'D')
794 {
795 c = getc (infile);
796 if (c != 'E')
797 continue;
798 c = getc (infile);
799 if (c != 'F')
800 continue;
801 c = getc (infile);
802 defunflag = c == 'U';
803 }
804 else continue;
805
806 if (generate_globals
807 && (!defvarflag || defvarperbufferflag || type == INVALID)
808 && !defunflag)
809 continue;
810
811 while (c != '(')
812 {
813 if (c < 0)
814 goto eof;
815 c = getc (infile);
816 }
817
818 /* Lisp variable or function name. */
819 c = getc (infile);
820 if (c != '"')
821 continue;
822 c = read_c_string_or_comment (infile, -1, 0, 0);
823
824 if (generate_globals)
825 {
826 int i = 0;
827
828 /* Skip "," and whitespace. */
829 do
830 {
831 c = getc (infile);
832 }
833 while (c == ',' || c == ' ' || c == '\t' || c == '\n' || c == '\r');
834
835 /* Read in the identifier. */
836 do
837 {
838 input_buffer[i++] = c;
839 c = getc (infile);
840 }
841 while (! (c == ',' || c == ' ' || c == '\t'
842 || c == '\n' || c == '\r'));
843 input_buffer[i] = '\0';
844
845 name = xmalloc (i + 1);
846 memcpy (name, input_buffer, i + 1);
847
848 if (!defunflag)
849 {
850 add_global (type, name, 0);
851 continue;
852 }
853 }
854
855 /* DEFVAR_LISP ("name", addr, "doc")
856 DEFVAR_LISP ("name", addr /\* doc *\/)
857 DEFVAR_LISP ("name", addr, doc: /\* doc *\/) */
858
859 if (defunflag)
860 commas = generate_globals ? 4 : 5;
861 else if (defvarperbufferflag)
862 commas = 3;
863 else if (defvarflag)
864 commas = 1;
865 else /* For DEFSIMPLE and DEFPRED. */
866 commas = 2;
867
868 while (commas)
869 {
870 if (c == ',')
871 {
872 commas--;
873
874 if (defunflag && (commas == 1 || commas == 2))
875 {
876 int scanned = 0;
877 do
878 c = getc (infile);
879 while (c == ' ' || c == '\n' || c == '\r' || c == '\t');
880 if (c < 0)
881 goto eof;
882 ungetc (c, infile);
883 if (commas == 2) /* Pick up minargs. */
884 scanned = fscanf (infile, "%d", &minargs);
885 else /* Pick up maxargs. */
886 if (c == 'M' || c == 'U') /* MANY || UNEVALLED */
887 {
888 if (generate_globals)
889 maxargs = (c == 'M') ? -1 : -2;
890 else
891 maxargs = -1;
892 }
893 else
894 scanned = fscanf (infile, "%d", &maxargs);
895 if (scanned < 0)
896 goto eof;
897 }
898 }
899
900 if (c == EOF)
901 goto eof;
902 c = getc (infile);
903 }
904
905 if (generate_globals)
906 {
907 add_global (FUNCTION, name, maxargs);
908 continue;
909 }
910
911 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
912 c = getc (infile);
913
914 if (c == '"')
915 c = read_c_string_or_comment (infile, 0, 0, 0);
916
917 while (c != EOF && c != ',' && c != '/')
918 c = getc (infile);
919 if (c == ',')
920 {
921 c = getc (infile);
922 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
923 c = getc (infile);
924 while ((c >= 'a' && c <= 'z') || (c >= 'Z' && c <= 'Z'))
925 c = getc (infile);
926 if (c == ':')
927 {
928 doc_keyword = 1;
929 c = getc (infile);
930 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
931 c = getc (infile);
932 }
933 }
934
935 if (c == '"'
936 || (c == '/'
937 && (c = getc (infile),
938 ungetc (c, infile),
939 c == '*')))
940 {
941 int comment = c != '"';
942 int saw_usage;
943
944 putc (037, outfile);
945 putc (defvarflag ? 'V' : 'F', outfile);
946 fprintf (outfile, "%s\n", input_buffer);
947
948 if (comment)
949 getc (infile); /* Skip past `*'. */
950 c = read_c_string_or_comment (infile, 1, comment, &saw_usage);
951
952 /* If this is a defun, find the arguments and print them. If
953 this function takes MANY or UNEVALLED args, then the C source
954 won't give the names of the arguments, so we shouldn't bother
955 trying to find them.
956
957 Various doc-string styles:
958 0: DEFUN (..., "DOC") (args) [!comment]
959 1: DEFUN (..., /\* DOC *\/ (args)) [comment && !doc_keyword]
960 2: DEFUN (..., doc: /\* DOC *\/) (args) [comment && doc_keyword]
961 */
962 if (defunflag && maxargs != -1 && !saw_usage)
963 {
964 char argbuf[1024], *p = argbuf;
965
966 if (!comment || doc_keyword)
967 while (c != ')')
968 {
969 if (c < 0)
970 goto eof;
971 c = getc (infile);
972 }
973
974 /* Skip into arguments. */
975 while (c != '(')
976 {
977 if (c < 0)
978 goto eof;
979 c = getc (infile);
980 }
981 /* Copy arguments into ARGBUF. */
982 *p++ = c;
983 do
984 *p++ = c = getc (infile);
985 while (c != ')');
986 *p = '\0';
987 /* Output them. */
988 fprintf (outfile, "\n\n");
989 write_c_args (outfile, input_buffer, argbuf, minargs, maxargs);
990 }
991 else if (defunflag && maxargs == -1 && !saw_usage)
992 /* The DOC should provide the usage form. */
993 fprintf (stderr, "Missing `usage' for function `%s'.\n",
994 input_buffer);
995 }
996 }
997 eof:
998 fclose (infile);
999 return 0;
1000 }
1001 \f
1002 /* Read a file of Lisp code, compiled or interpreted.
1003 Looks for
1004 (defun NAME ARGS DOCSTRING ...)
1005 (defmacro NAME ARGS DOCSTRING ...)
1006 (defsubst NAME ARGS DOCSTRING ...)
1007 (autoload (quote NAME) FILE DOCSTRING ...)
1008 (defvar NAME VALUE DOCSTRING)
1009 (defconst NAME VALUE DOCSTRING)
1010 (fset (quote NAME) (make-byte-code ... DOCSTRING ...))
1011 (fset (quote NAME) #[... DOCSTRING ...])
1012 (defalias (quote NAME) #[... DOCSTRING ...])
1013 (custom-declare-variable (quote NAME) VALUE DOCSTRING ...)
1014 starting in column zero.
1015 (quote NAME) may appear as 'NAME as well.
1016
1017 We also look for #@LENGTH CONTENTS^_ at the beginning of the line.
1018 When we find that, we save it for the following defining-form,
1019 and we use that instead of reading a doc string within that defining-form.
1020
1021 For defvar, defconst, and fset we skip to the docstring with a kludgy
1022 formatting convention: all docstrings must appear on the same line as the
1023 initial open-paren (the one in column zero) and must contain a backslash
1024 and a newline immediately after the initial double-quote. No newlines
1025 must appear between the beginning of the form and the first double-quote.
1026 For defun, defmacro, and autoload, we know how to skip over the
1027 arglist, but the doc string must still have a backslash and newline
1028 immediately after the double quote.
1029 The only source files that must follow this convention are preloaded
1030 uncompiled ones like loaddefs.el; aside from that, it is always the .elc
1031 file that we should look at, and they are no problem because byte-compiler
1032 output follows this convention.
1033 The NAME and DOCSTRING are output.
1034 NAME is preceded by `F' for a function or `V' for a variable.
1035 An entry is output only if DOCSTRING has \ newline just after the opening ".
1036 */
1037
1038 static void
1039 skip_white (FILE *infile)
1040 {
1041 char c = ' ';
1042 while (c == ' ' || c == '\t' || c == '\n' || c == '\r')
1043 c = getc (infile);
1044 ungetc (c, infile);
1045 }
1046
1047 static void
1048 read_lisp_symbol (FILE *infile, char *buffer)
1049 {
1050 char c;
1051 char *fillp = buffer;
1052
1053 skip_white (infile);
1054 while (1)
1055 {
1056 c = getc (infile);
1057 if (c == '\\')
1058 *(++fillp) = getc (infile);
1059 else if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '(' || c == ')')
1060 {
1061 ungetc (c, infile);
1062 *fillp = 0;
1063 break;
1064 }
1065 else
1066 *fillp++ = c;
1067 }
1068
1069 if (! buffer[0])
1070 fprintf (stderr, "## expected a symbol, got '%c'\n", c);
1071
1072 skip_white (infile);
1073 }
1074
1075 static int
1076 search_lisp_doc_at_eol (FILE *infile)
1077 {
1078 char c = 0, c1 = 0, c2 = 0;
1079
1080 /* Skip until the end of line; remember two previous chars. */
1081 while (c != '\n' && c != '\r' && c != EOF)
1082 {
1083 c2 = c1;
1084 c1 = c;
1085 c = getc (infile);
1086 }
1087
1088 /* If two previous characters were " and \,
1089 this is a doc string. Otherwise, there is none. */
1090 if (c2 != '"' || c1 != '\\')
1091 {
1092 #ifdef DEBUG
1093 fprintf (stderr, "## non-docstring in %s (%s)\n",
1094 buffer, filename);
1095 #endif
1096 if (c != EOF)
1097 ungetc (c, infile);
1098 return 0;
1099 }
1100 return 1;
1101 }
1102
1103 #define DEF_ELISP_FILE(fn) { #fn, sizeof(#fn) - 1 }
1104
1105 static int
1106 scan_lisp_file (const char *filename, const char *mode)
1107 {
1108 FILE *infile;
1109 register int c;
1110 char *saved_string = 0;
1111 /* These are the only files that are loaded uncompiled, and must
1112 follow the conventions of the doc strings expected by this
1113 function. These conventions are automatically followed by the
1114 byte compiler when it produces the .elc files. */
1115 static struct {
1116 const char *fn;
1117 size_t fl;
1118 } const uncompiled[] = {
1119 DEF_ELISP_FILE (loaddefs.el),
1120 DEF_ELISP_FILE (loadup.el),
1121 DEF_ELISP_FILE (charprop.el),
1122 DEF_ELISP_FILE (cp51932.el),
1123 DEF_ELISP_FILE (eucjp-ms.el)
1124 };
1125 int i, match;
1126 size_t flen = strlen (filename);
1127
1128 if (generate_globals)
1129 fatal ("scanning lisp file when -g specified", 0);
1130 if (flen > 3 && !strcmp (filename + flen - 3, ".el"))
1131 {
1132 for (i = 0, match = 0; i < sizeof (uncompiled) / sizeof (uncompiled[0]);
1133 i++)
1134 {
1135 if (uncompiled[i].fl <= flen
1136 && !strcmp (filename + flen - uncompiled[i].fl, uncompiled[i].fn)
1137 && (flen == uncompiled[i].fl
1138 || IS_SLASH (filename[flen - uncompiled[i].fl - 1])))
1139 {
1140 match = 1;
1141 break;
1142 }
1143 }
1144 if (!match)
1145 fatal ("uncompiled lisp file %s is not supported", filename);
1146 }
1147
1148 infile = fopen (filename, mode);
1149 if (infile == NULL)
1150 {
1151 perror (filename);
1152 return 0; /* No error. */
1153 }
1154
1155 c = '\n';
1156 while (!feof (infile))
1157 {
1158 char buffer[BUFSIZ];
1159 char type;
1160
1161 /* If not at end of line, skip till we get to one. */
1162 if (c != '\n' && c != '\r')
1163 {
1164 c = getc (infile);
1165 continue;
1166 }
1167 /* Skip the line break. */
1168 while (c == '\n' || c == '\r')
1169 c = getc (infile);
1170 /* Detect a dynamic doc string and save it for the next expression. */
1171 if (c == '#')
1172 {
1173 c = getc (infile);
1174 if (c == '@')
1175 {
1176 size_t length = 0;
1177 size_t i;
1178
1179 /* Read the length. */
1180 while ((c = getc (infile),
1181 c >= '0' && c <= '9'))
1182 {
1183 length *= 10;
1184 length += c - '0';
1185 }
1186
1187 if (length <= 1)
1188 fatal ("invalid dynamic doc string length", "");
1189
1190 if (c != ' ')
1191 fatal ("space not found after dynamic doc string length", "");
1192
1193 /* The next character is a space that is counted in the length
1194 but not part of the doc string.
1195 We already read it, so just ignore it. */
1196 length--;
1197
1198 /* Read in the contents. */
1199 free (saved_string);
1200 saved_string = (char *) xmalloc (length);
1201 for (i = 0; i < length; i++)
1202 saved_string[i] = getc (infile);
1203 /* The last character is a ^_.
1204 That is needed in the .elc file
1205 but it is redundant in DOC. So get rid of it here. */
1206 saved_string[length - 1] = 0;
1207 /* Skip the line break. */
1208 while (c == '\n' || c == '\r')
1209 c = getc (infile);
1210 /* Skip the following line. */
1211 while (c != '\n' && c != '\r')
1212 c = getc (infile);
1213 }
1214 continue;
1215 }
1216
1217 if (c != '(')
1218 continue;
1219
1220 read_lisp_symbol (infile, buffer);
1221
1222 if (! strcmp (buffer, "defun")
1223 || ! strcmp (buffer, "defmacro")
1224 || ! strcmp (buffer, "defsubst"))
1225 {
1226 type = 'F';
1227 read_lisp_symbol (infile, buffer);
1228
1229 /* Skip the arguments: either "nil" or a list in parens. */
1230
1231 c = getc (infile);
1232 if (c == 'n') /* nil */
1233 {
1234 if ((c = getc (infile)) != 'i'
1235 || (c = getc (infile)) != 'l')
1236 {
1237 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
1238 buffer, filename);
1239 continue;
1240 }
1241 }
1242 else if (c != '(')
1243 {
1244 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
1245 buffer, filename);
1246 continue;
1247 }
1248 else
1249 while (c != ')')
1250 c = getc (infile);
1251 skip_white (infile);
1252
1253 /* If the next three characters aren't `dquote bslash newline'
1254 then we're not reading a docstring.
1255 */
1256 if ((c = getc (infile)) != '"'
1257 || (c = getc (infile)) != '\\'
1258 || ((c = getc (infile)) != '\n' && c != '\r'))
1259 {
1260 #ifdef DEBUG
1261 fprintf (stderr, "## non-docstring in %s (%s)\n",
1262 buffer, filename);
1263 #endif
1264 continue;
1265 }
1266 }
1267
1268 /* defcustom can only occur in uncompiled Lisp files. */
1269 else if (! strcmp (buffer, "defvar")
1270 || ! strcmp (buffer, "defconst")
1271 || ! strcmp (buffer, "defcustom"))
1272 {
1273 type = 'V';
1274 read_lisp_symbol (infile, buffer);
1275
1276 if (saved_string == 0)
1277 if (!search_lisp_doc_at_eol (infile))
1278 continue;
1279 }
1280
1281 else if (! strcmp (buffer, "custom-declare-variable")
1282 || ! strcmp (buffer, "defvaralias")
1283 )
1284 {
1285 type = 'V';
1286
1287 c = getc (infile);
1288 if (c == '\'')
1289 read_lisp_symbol (infile, buffer);
1290 else
1291 {
1292 if (c != '(')
1293 {
1294 fprintf (stderr,
1295 "## unparsable name in custom-declare-variable in %s\n",
1296 filename);
1297 continue;
1298 }
1299 read_lisp_symbol (infile, buffer);
1300 if (strcmp (buffer, "quote"))
1301 {
1302 fprintf (stderr,
1303 "## unparsable name in custom-declare-variable in %s\n",
1304 filename);
1305 continue;
1306 }
1307 read_lisp_symbol (infile, buffer);
1308 c = getc (infile);
1309 if (c != ')')
1310 {
1311 fprintf (stderr,
1312 "## unparsable quoted name in custom-declare-variable in %s\n",
1313 filename);
1314 continue;
1315 }
1316 }
1317
1318 if (saved_string == 0)
1319 if (!search_lisp_doc_at_eol (infile))
1320 continue;
1321 }
1322
1323 else if (! strcmp (buffer, "fset") || ! strcmp (buffer, "defalias"))
1324 {
1325 type = 'F';
1326
1327 c = getc (infile);
1328 if (c == '\'')
1329 read_lisp_symbol (infile, buffer);
1330 else
1331 {
1332 if (c != '(')
1333 {
1334 fprintf (stderr, "## unparsable name in fset in %s\n",
1335 filename);
1336 continue;
1337 }
1338 read_lisp_symbol (infile, buffer);
1339 if (strcmp (buffer, "quote"))
1340 {
1341 fprintf (stderr, "## unparsable name in fset in %s\n",
1342 filename);
1343 continue;
1344 }
1345 read_lisp_symbol (infile, buffer);
1346 c = getc (infile);
1347 if (c != ')')
1348 {
1349 fprintf (stderr,
1350 "## unparsable quoted name in fset in %s\n",
1351 filename);
1352 continue;
1353 }
1354 }
1355
1356 if (saved_string == 0)
1357 if (!search_lisp_doc_at_eol (infile))
1358 continue;
1359 }
1360
1361 else if (! strcmp (buffer, "autoload"))
1362 {
1363 type = 'F';
1364 c = getc (infile);
1365 if (c == '\'')
1366 read_lisp_symbol (infile, buffer);
1367 else
1368 {
1369 if (c != '(')
1370 {
1371 fprintf (stderr, "## unparsable name in autoload in %s\n",
1372 filename);
1373 continue;
1374 }
1375 read_lisp_symbol (infile, buffer);
1376 if (strcmp (buffer, "quote"))
1377 {
1378 fprintf (stderr, "## unparsable name in autoload in %s\n",
1379 filename);
1380 continue;
1381 }
1382 read_lisp_symbol (infile, buffer);
1383 c = getc (infile);
1384 if (c != ')')
1385 {
1386 fprintf (stderr,
1387 "## unparsable quoted name in autoload in %s\n",
1388 filename);
1389 continue;
1390 }
1391 }
1392 skip_white (infile);
1393 if ((c = getc (infile)) != '\"')
1394 {
1395 fprintf (stderr, "## autoload of %s unparsable (%s)\n",
1396 buffer, filename);
1397 continue;
1398 }
1399 read_c_string_or_comment (infile, 0, 0, 0);
1400
1401 if (saved_string == 0)
1402 if (!search_lisp_doc_at_eol (infile))
1403 continue;
1404 }
1405
1406 #ifdef DEBUG
1407 else if (! strcmp (buffer, "if")
1408 || ! strcmp (buffer, "byte-code"))
1409 continue;
1410 #endif
1411
1412 else
1413 {
1414 #ifdef DEBUG
1415 fprintf (stderr, "## unrecognized top-level form, %s (%s)\n",
1416 buffer, filename);
1417 #endif
1418 continue;
1419 }
1420
1421 /* At this point, we should either use the previous dynamic doc string in
1422 saved_string or gobble a doc string from the input file.
1423 In the latter case, the opening quote (and leading backslash-newline)
1424 have already been read. */
1425
1426 putc (037, outfile);
1427 putc (type, outfile);
1428 fprintf (outfile, "%s\n", buffer);
1429 if (saved_string)
1430 {
1431 fputs (saved_string, outfile);
1432 /* Don't use one dynamic doc string twice. */
1433 free (saved_string);
1434 saved_string = 0;
1435 }
1436 else
1437 read_c_string_or_comment (infile, 1, 0, 0);
1438 }
1439 fclose (infile);
1440 return 0;
1441 }
1442
1443
1444 /* make-docfile.c ends here */