X-Git-Url: http://git.hcoop.net/bpt/emacs.git/blobdiff_plain/901b219d7b84c2f7b9ff22571e00a61c1af03269..5bd471e85a81bc27e319ee84efcdab0cc9a6e2f7:/lib-src/etags.c diff --git a/lib-src/etags.c b/lib-src/etags.c index c9b5ddeb4d..552a09d2f0 100644 --- a/lib-src/etags.c +++ b/lib-src/etags.c @@ -1,5 +1,5 @@ /* Tags file maker to go with GNU Emacs - Copyright (C) 1984, 87, 88, 89, 93, 94, 95 + Copyright (C) 1984, 87, 88, 89, 93, 94, 95, 98, 99 Free Software Foundation, Inc. and Ken Arnold This file is not considered part of GNU Emacs. @@ -28,22 +28,38 @@ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ * Francesco Potorti` reorganised C and C++ based on work by Joe Wells. * Regexp tags by Tom Tromey. * - * Francesco Potorti` (F.Potorti@cnuce.cnr.it) is the current maintainer. + * Francesco Potorti` (pot@gnu.org) is the current maintainer. */ -char pot_etags_version[] = "@(#) pot revision number is 11.66"; +char pot_etags_version[] = "@(#) pot revision number is 13.33"; #define TRUE 1 #define FALSE 0 +#ifndef _GNU_SOURCE +# define _GNU_SOURCE /* enables some compiler checks on GNU */ +#endif #ifndef DEBUG # define DEBUG FALSE #endif +#ifdef HAVE_CONFIG_H +# include + /* On some systems, Emacs defines static as nothing for the sake + of unexec. We don't want that here since we don't use unexec. */ +# undef static +# define ETAGS_REGEXPS /* use the regexp features */ +# define LONG_OPTIONS /* accept long options */ +#endif /* HAVE_CONFIG_H */ + #ifdef MSDOS -# include # include # include +# include +# ifndef HAVE_CONFIG_H +# define DOS_NT +# include +# endif #endif /* MSDOS */ #ifdef WINDOWSNT @@ -52,20 +68,34 @@ char pot_etags_version[] = "@(#) pot revision number is 11.66"; # include # include # define MAXPATHLEN _MAX_PATH +# ifdef HAVE_CONFIG_H +# undef HAVE_NTGUI +# else +# define DOS_NT +# endif /* not HAVE_CONFIG_H */ +# ifndef HAVE_GETCWD +# define HAVE_GETCWD +# endif /* undef HAVE_GETCWD */ +#endif /* WINDOWSNT */ + +#if !defined (WINDOWSNT) && defined (STDC_HEADERS) +#include +#include #endif -#ifdef HAVE_CONFIG_H -# include - /* On some systems, Emacs defines static as nothing for the sake - of unexec. We don't want that here since we don't use unexec. */ -# undef static -#endif +#ifdef HAVE_UNISTD_H +# include +#else +# ifdef HAVE_GETCWD + extern char *getcwd (); +# endif +#endif /* HAVE_UNISTD_H */ #include #include #include #ifndef errno -extern int errno; + extern int errno; #endif #include #include @@ -74,14 +104,20 @@ extern int errno; # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) #endif -#include +#ifdef LONG_OPTIONS +# include +#else +# define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr) + extern char *optarg; + extern int optind, opterr; +#endif /* LONG_OPTIONS */ #ifdef ETAGS_REGEXPS # include #endif /* ETAGS_REGEXPS */ /* Define CTAGS to make the program "ctags" compatible with the usual one. - Let it undefined to make the program "etags", which makes emacs-style + Leave it undefined to make the program "etags", which makes emacs-style tag tables and tags typedefs, #defines and struct/union/enum by default. */ #ifdef CTAGS # undef CTAGS @@ -102,78 +138,72 @@ extern int errno; /* C extensions. */ #define C_PLPL 0x00001 /* C++ */ #define C_STAR 0x00003 /* C* */ +#define C_JAVA 0x00005 /* JAVA */ #define YACC 0x10000 /* yacc file */ -#define streq(s,t) ((DEBUG &&!(s)&&!(t)&&(abort(),1)) || !strcmp(s,t)) -#define strneq(s,t,n) ((DEBUG &&!(s)&&!(t)&&(abort(),1)) || !strncmp(s,t,n)) - -#define lowcase(c) tolower ((unsigned char)c) +#define streq(s,t) ((DEBUG && (s) == NULL && (t) == NULL \ + && (abort (), 1)) || !strcmp (s, t)) +#define strneq(s,t,n) ((DEBUG && (s) == NULL && (t) == NULL \ + && (abort (), 1)) || !strncmp (s, t, n)) -#define iswhite(arg) (_wht[arg]) /* T if char is white */ -#define begtoken(arg) (_btk[arg]) /* T if char can start token */ -#define intoken(arg) (_itk[arg]) /* T if char can be in token */ -#define endtoken(arg) (_etk[arg]) /* T if char ends tokens */ +#define lowcase(c) tolower ((char)c) -#ifdef DOS_NT -# define absolutefn(fn) (fn[0] == '/' \ - || (fn[1] == ':' && fn[2] == '/')) -#else -# define absolutefn(fn) (fn[0] == '/') -#endif +#define CHARS 256 /* 2^sizeof(char) */ +#define CHAR(x) ((unsigned int)x & (CHARS - 1)) +#define iswhite(c) (_wht[CHAR(c)]) /* c is white */ +#define notinname(c) (_nin[CHAR(c)]) /* c is not in a name */ +#define begtoken(c) (_btk[CHAR(c)]) /* c can start token */ +#define intoken(c) (_itk[CHAR(c)]) /* c can be in token */ +#define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens */ /* - * xnew -- allocate storage + * xnew, xrnew -- allocate, reallocate storage * * SYNOPSIS: Type *xnew (int n, Type); + * Type *xrnew (OldPointer, int n, Type); */ -#define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type))) +#ifdef chkmalloc +# include "chkmalloc.h" +# define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \ + (n) * sizeof (Type))) +# define xrnew(op,n,Type) ((Type *) trace_realloc (__FILE__, __LINE__, \ + (op), (n) * sizeof (Type))) +#else +# define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type))) +# define xrnew(op,n,Type) ((Type *) xrealloc ((op), (n) * sizeof (Type))) +#endif -typedef int logical; +typedef int bool; -typedef struct nd_st -{ /* sorting structure */ - char *name; /* function or type name */ - char *file; /* file name */ - logical is_func; /* use pattern or line no */ - logical been_warned; /* set if noticed dup */ - int lno; /* line number tag is on */ - long cno; /* character number line starts on */ - char *pat; /* search pattern */ - struct nd_st *left, *right; /* left and right sons */ -} NODE; +typedef void Lang_function (); -extern char *getenv (); +typedef struct +{ + char *suffix; + char *command; /* Takes one arg and decompresses to stdout */ +} compressor; -char *concat (); -char *savenstr (), *savestr (); -char *etags_strchr (), *etags_strrchr (); -char *etags_getcwd (); -char *relative_filename (), *absolute_filename (), *absolute_dirname (); -long *xmalloc (), *xrealloc (); +typedef struct +{ + char *name; + Lang_function *function; + char **suffixes; + char **interpreters; +} language; -typedef void Lang_function (); -#if FALSE /* many compilers barf on this */ -Lang_function Asm_labels; -Lang_function default_C_entries; -Lang_function C_entries; -Lang_function Cplusplus_entries; -Lang_function Cstar_entries; -Lang_function Erlang_functions; -Lang_function Fortran_functions; -Lang_function Yacc_entries; -Lang_function Lisp_functions; -Lang_function Pascal_functions; -Lang_function Perl_functions; -Lang_function Prolog_functions; -Lang_function Scheme_functions; -Lang_function TeX_functions; -Lang_function just_read_file; -#else /* so let's write it this way */ +extern char *getenv (); + +/* Many compilers barf on this: + Lang_function Ada_funcs; + so let's write it this way */ +void Ada_funcs (); void Asm_labels (); void C_entries (); void default_C_entries (); void plain_C_entries (); +void Cjava_entries (); +void Cobol_paragraphs (); void Cplusplus_entries (); void Cstar_entries (); void Erlang_functions (); @@ -182,150 +212,217 @@ void Yacc_entries (); void Lisp_functions (); void Pascal_functions (); void Perl_functions (); +void Postscript_functions (); void Prolog_functions (); +void Python_functions (); void Scheme_functions (); void TeX_functions (); void just_read_file (); -#endif -Lang_function *get_language_from_name (); -Lang_function *get_language_from_interpreter (); -Lang_function *get_language_from_suffix (); +compressor *get_compressor_from_suffix (); +language *get_language_from_name (); +language *get_language_from_interpreter (); +language *get_language_from_suffix (); int total_size_of_entries (); -long readline (); -long readline_internal (); +long readline (), readline_internal (); +void get_tag (); + #ifdef ETAGS_REGEXPS +void analyse_regex (); void add_regex (); -#endif -void add_node (); +void free_patterns (); +#endif /* ETAGS_REGEXPS */ void error (); void suggest_asking_for_help (); void fatal (), pfatal (); -void find_entries (); -void free_tree (); -void getit (); +void add_node (); + void init (); void initbuffer (); -void pfnote (); +void find_entries (); +void free_tree (); +void pfnote (), new_pfnote (); void process_file (); void put_entries (); void takeprec (); +char *concat (); +char *skip_spaces (), *skip_non_spaces (); +char *savenstr (), *savestr (); +char *etags_strchr (), *etags_strrchr (); +char *etags_getcwd (); +char *relative_filename (), *absolute_filename (), *absolute_dirname (); +bool filename_is_absolute (); +void canonicalize_filename (); +void grow_linebuffer (); +long *xmalloc (), *xrealloc (); + char searchar = '/'; /* use /.../ searches */ -int lineno; /* line number of current line */ -long charno; /* current character number */ -long linecharno; /* charno of start of line */ - -char *curfile; /* current input file name */ char *tagfile; /* output file */ char *progname; /* name this program was invoked with */ char *cwd; /* current working directory */ char *tagfiledir; /* directory of tagfile */ - FILE *tagf; /* ioptr for tags file */ -NODE *head; /* the head of the binary tree of tags */ + +char *curfile; /* current input file name */ +language *curlang; /* current language */ + +int lineno; /* line number of current line */ +long charno; /* current character number */ +long linecharno; /* charno of start of current line */ +char *dbp; /* pointer to start of current tag */ + +typedef struct node_st +{ /* sorting structure */ + char *name; /* function or type name */ + char *file; /* file name */ + bool is_func; /* use pattern or line no */ + bool been_warned; /* set if noticed dup */ + int lno; /* line number tag is on */ + long cno; /* character number line starts on */ + char *pat; /* search pattern */ + struct node_st *left, *right; /* left and right sons */ +} node; + +node *head; /* the head of the binary tree of tags */ /* - * A `struct linebuffer' is a structure which holds a line of text. - * `readline' reads a line from a stream into a linebuffer and works - * regardless of the length of the line. + * A `linebuffer' is a structure which holds a line of text. + * `readline_internal' reads a line from a stream into a linebuffer + * and works regardless of the length of the line. + * SIZE is the size of BUFFER, LEN is the length of the string in + * BUFFER after readline reads it. */ -#define GROW_LINEBUFFER(buf,toksize) \ -while (buf.size < toksize) \ - buf.buffer = (char *) xrealloc (buf.buffer, buf.size *= 2) -struct linebuffer +typedef struct { long size; + int len; char *buffer; -}; +} linebuffer; -struct linebuffer lb; /* the current line */ -struct linebuffer token_name; /* used by C_entries as a temporary area */ +linebuffer lb; /* the current line */ +linebuffer token_name; /* used by C_entries as a temporary area */ struct { long linepos; - struct linebuffer lb; /* used by C_entries instead of lb */ + linebuffer lb; /* used by C_entries instead of lb */ } lbs[2]; /* boolean "functions" (see init) */ -logical _wht[0177], _etk[0177], _itk[0177], _btk[0177]; +bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS]; char /* white chars */ - *white = " \f\t\n\013", + *white = " \f\t\n\r", + /* not in a name */ + *nonam = " \f\t\n\r(=,[;", /* token ending chars */ - *endtk = " \t\n\013\"'#()[]{}=-+%*/&|^~!<>;,.:?", + *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?", /* token starting chars */ *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@", /* valid in-token chars */ - *intk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789"; + *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789"; -logical append_to_tagfile; /* -a: append to tags */ -/* The following three default to TRUE for etags, but to FALSE for ctags. */ -logical typedefs; /* -t: create tags for typedefs */ -logical typedefs_and_cplusplus; /* -T: create tags for typedefs, level */ +bool append_to_tagfile; /* -a: append to tags */ +/* The following four default to TRUE for etags, but to FALSE for ctags. */ +bool typedefs; /* -t: create tags for C and Ada typedefs */ +bool typedefs_and_cplusplus; /* -T: create tags for C typedefs, level */ /* 0 struct/enum/union decls, and C++ */ /* member functions. */ -logical constantypedefs; /* -d: create tags for C #define and enum */ - /* constants. Enum consts not implemented. */ +bool constantypedefs; /* -d: create tags for C #define, enum */ + /* constants and variables. */ /* -D: opposite of -d. Default under ctags. */ -logical update; /* -u: update tags */ -logical vgrind_style; /* -v: create vgrind style index output */ -logical no_warnings; /* -w: suppress warnings */ -logical cxref_style; /* -x: create cxref style output */ -logical cplusplus; /* .[hc] means C++, not C */ -logical noindentypedefs; /* -I: ignore indentation in C */ - +bool declarations; /* --declarations: tag them and extern in C&Co*/ +bool globals; /* create tags for global variables */ +bool members; /* create tags for C member variables */ +bool update; /* -u: update tags */ +bool vgrind_style; /* -v: create vgrind style index output */ +bool no_warnings; /* -w: suppress warnings */ +bool cxref_style; /* -x: create cxref style output */ +bool cplusplus; /* .[hc] means C++, not C */ +bool noindentypedefs; /* -I: ignore indentation in C */ +bool packages_only; /* --packages-only: in Ada, only tag packages*/ + +#ifdef LONG_OPTIONS struct option longopts[] = { - { "append", no_argument, NULL, 'a' }, - { "backward-search", no_argument, NULL, 'B' }, - { "c++", no_argument, NULL, 'C' }, - { "cxref", no_argument, NULL, 'x' }, - { "defines", no_argument, NULL, 'd' }, - { "help", no_argument, NULL, 'h' }, - { "help", no_argument, NULL, 'H' }, - { "ignore-indentation", no_argument, NULL, 'I' }, - { "include", required_argument, NULL, 'i' }, - { "language", required_argument, NULL, 'l' }, - { "no-defines", no_argument, NULL, 'D' }, - { "no-regex", no_argument, NULL, 'R' }, - { "no-warn", no_argument, NULL, 'w' }, - { "output", required_argument, NULL, 'o' }, - { "regex", required_argument, NULL, 'r' }, - { "typedefs", no_argument, NULL, 't' }, - { "typedefs-and-c++", no_argument, NULL, 'T' }, - { "update", no_argument, NULL, 'u' }, - { "version", no_argument, NULL, 'V' }, - { "vgrind", no_argument, NULL, 'v' }, - { 0 } + { "packages-only", no_argument, &packages_only, TRUE }, + { "append", no_argument, NULL, 'a' }, + { "backward-search", no_argument, NULL, 'B' }, + { "c++", no_argument, NULL, 'C' }, + { "cxref", no_argument, NULL, 'x' }, + { "defines", no_argument, NULL, 'd' }, + { "declarations", no_argument, &declarations, TRUE }, + { "no-defines", no_argument, NULL, 'D' }, + { "globals", no_argument, &globals, TRUE }, + { "no-globals", no_argument, &globals, FALSE }, + { "help", no_argument, NULL, 'h' }, + { "help", no_argument, NULL, 'H' }, + { "ignore-indentation", no_argument, NULL, 'I' }, + { "include", required_argument, NULL, 'i' }, + { "language", required_argument, NULL, 'l' }, + { "members", no_argument, &members, TRUE }, + { "no-members", no_argument, &members, FALSE }, + { "no-warn", no_argument, NULL, 'w' }, + { "output", required_argument, NULL, 'o' }, +#ifdef ETAGS_REGEXPS + { "regex", required_argument, NULL, 'r' }, + { "no-regex", no_argument, NULL, 'R' }, + { "ignore-case-regex", required_argument, NULL, 'c' }, +#endif /* ETAGS_REGEXPS */ + { "typedefs", no_argument, NULL, 't' }, + { "typedefs-and-c++", no_argument, NULL, 'T' }, + { "update", no_argument, NULL, 'u' }, + { "version", no_argument, NULL, 'V' }, + { "vgrind", no_argument, NULL, 'v' }, + { NULL } }; +#endif /* LONG_OPTIONS */ #ifdef ETAGS_REGEXPS /* Structure defining a regular expression. Elements are the compiled pattern, and the name string. */ -struct pattern +typedef struct pattern { + struct pattern *p_next; + language *language; + char *regex; struct re_pattern_buffer *pattern; struct re_registers regs; char *name_pattern; - logical error_signaled; -}; + bool error_signaled; +} pattern; -/* Number of regexps found. */ -int num_patterns = 0; +/* List of all regexps. */ +pattern *p_head = NULL; -/* Array of all regexps. */ -struct pattern *patterns = NULL; +/* How many characters in the character set. (From regex.c.) */ +#define CHAR_SET_SIZE 256 +/* Translation table for case-insensitive matching. */ +char lc_trans[CHAR_SET_SIZE]; #endif /* ETAGS_REGEXPS */ +compressor compressors[] = +{ + { "z", "gzip -d -c"}, + { "Z", "gzip -d -c"}, + { "gz", "gzip -d -c"}, + { "GZ", "gzip -d -c"}, + { "bz2", "bzip2 -d -c" }, + { NULL } +}; + /* * Language stuff. */ /* Non-NULL if language fixed. */ -Lang_function *lang_func = NULL; +language *forced_lang = NULL; + +/* Ada code */ +char *Ada_suffixes [] = + { "ads", "adb", "ada", NULL }; /* Assembly code */ char *Asm_suffixes [] = { "a", /* Unix assembler */ @@ -334,6 +431,7 @@ char *Asm_suffixes [] = { "a", /* Unix assembler */ "inc", /* Microcontroller include files */ "ins", /* Microcontroller include files */ "s", "sa", /* Unix assembler */ + "S", /* cpp-processed Unix assembler */ "src", /* BSO/Tasking C compiler output */ NULL }; @@ -343,9 +441,17 @@ char *Asm_suffixes [] = { "a", /* Unix assembler */ char *default_C_suffixes [] = { "c", "h", NULL }; -/* .M is for Objective C++ files. */ char *Cplusplus_suffixes [] = - { "C", "H", "c++", "cc", "cpp", "cxx", "h++", "hh", "hpp", "hxx", "M", NULL}; + { "C", "H", "c++", "cc", "cpp", "cxx", "h++", "hh", "hpp", "hxx", + "M", /* Objective C++ */ + "pdb", /* Postscript with C syntax */ + NULL }; + +char *Cjava_suffixes [] = + { "java", NULL }; + +char *Cobol_suffixes [] = + { "COB", "cob", NULL }; char *Cstar_suffixes [] = { "cs", "hs", NULL }; @@ -373,57 +479,62 @@ char *plain_C_suffixes [] = "lm", /* Objective lex file */ NULL }; +char *Postscript_suffixes [] = + { "ps", "psw", NULL }; /* .psw is for PSWrap */ + char *Prolog_suffixes [] = { "prolog", NULL }; +char *Python_suffixes [] = + { "py", NULL }; + /* Can't do the `SCM' or `scm' prefix with a version number. */ char *Scheme_suffixes [] = - { "SCM", "SM", "oak", "sch", "scheme", "scm", "sm", "t", NULL }; + { "SCM", "SM", "oak", "sch", "scheme", "scm", "sm", "ss", "t", NULL }; char *TeX_suffixes [] = { "TeX", "bib", "clo", "cls", "ltx", "sty", "tex", NULL }; char *Yacc_suffixes [] = - { "y", "ym", NULL }; /* .ym is Objective yacc file */ + { "y", "ym", "yy", "yxx", "y++", NULL }; /* .ym is Objective yacc file */ -/* Table of language names and corresponding functions, file suffixes - and interpreter names. - It is ok for a given function to be listed under more than one - name. I just didn't. */ -struct lang_entry -{ - char *name; - Lang_function *function; - char **suffixes; - char **interpreters; -}; +/* + * Table of languages. + * + * It is ok for a given function to be listed under more than one + * name. I just didn't. + */ -struct lang_entry lang_names [] = +language lang_names [] = { - { "asm", Asm_labels, Asm_suffixes, NULL }, - { "c", default_C_entries, default_C_suffixes, NULL }, - { "c++", Cplusplus_entries, Cplusplus_suffixes, NULL }, - { "c*", Cstar_entries, Cstar_suffixes, NULL }, - { "erlang", Erlang_functions, Erlang_suffixes, NULL }, - { "fortran", Fortran_functions, Fortran_suffixes, NULL }, - { "lisp", Lisp_functions, Lisp_suffixes, NULL }, - { "pascal", Pascal_functions, Pascal_suffixes, NULL }, - { "perl", Perl_functions, Perl_suffixes, Perl_interpreters }, - { "proc", plain_C_entries, plain_C_suffixes, NULL }, - { "prolog", Prolog_functions, Prolog_suffixes, NULL }, - { "scheme", Scheme_functions, Scheme_suffixes, NULL }, - { "tex", TeX_functions, TeX_suffixes, NULL }, - { "yacc", Yacc_entries, Yacc_suffixes, NULL }, + { "ada", Ada_funcs, Ada_suffixes, NULL }, + { "asm", Asm_labels, Asm_suffixes, NULL }, + { "c", default_C_entries, default_C_suffixes, NULL }, + { "c++", Cplusplus_entries, Cplusplus_suffixes, NULL }, + { "c*", Cstar_entries, Cstar_suffixes, NULL }, + { "cobol", Cobol_paragraphs, Cobol_suffixes, NULL }, + { "erlang", Erlang_functions, Erlang_suffixes, NULL }, + { "fortran", Fortran_functions, Fortran_suffixes, NULL }, + { "java", Cjava_entries, Cjava_suffixes, NULL }, + { "lisp", Lisp_functions, Lisp_suffixes, NULL }, + { "pascal", Pascal_functions, Pascal_suffixes, NULL }, + { "perl", Perl_functions, Perl_suffixes, Perl_interpreters }, + { "postscript", Postscript_functions, Postscript_suffixes, NULL }, + { "proc", plain_C_entries, plain_C_suffixes, NULL }, + { "prolog", Prolog_functions, Prolog_suffixes, NULL }, + { "python", Python_functions, Python_suffixes, NULL }, + { "scheme", Scheme_functions, Scheme_suffixes, NULL }, + { "tex", TeX_functions, TeX_suffixes, NULL }, + { "yacc", Yacc_entries, Yacc_suffixes, NULL }, { "auto", NULL }, /* default guessing scheme */ { "none", just_read_file }, /* regexp matching only */ { NULL, NULL } /* end of list */ }; - void print_language_names () { - struct lang_entry *lang; + language *lang; char **ext; puts ("\nThese are the currently supported languages, along with the\n\ @@ -441,16 +552,19 @@ name suffix, and `none' means only do regexp processing on files.\n\ If no language is specified and no matching suffix is found,\n\ the first line of the file is read for a sharp-bang (#!) sequence\n\ followed by the name of an interpreter. If no such sequence is found,\n\ -Fortran is tried first; if no tags are found, C is tried next."); +Fortran is tried first; if no tags are found, C is tried next.\n\ +Compressed files are supported using gzip and bzip2."); } #ifndef VERSION -# define VERSION "19" +# define VERSION "20" #endif void print_version () { - printf ("%s for Emacs version %s\n", (CTAGS) ? "ctags" : "etags", VERSION); + printf ("%s (GNU Emacs %s)\n", (CTAGS) ? "ctags" : "etags", VERSION); + puts ("Copyright (C) 1999 Free Software Foundation, Inc. and Ken Arnold"); + puts ("This program is distributed under the same terms as Emacs"); exit (GOOD); } @@ -458,17 +572,27 @@ print_version () void print_help () { - printf ("These are the options accepted by %s. You may use unambiguous\n\ -abbreviations for the long option names. A - as file name means read\n\ -names from stdin.", progname); + printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\ +\n\ +These are the options accepted by %s.\n", progname, progname); +#ifdef LONG_OPTIONS + puts ("You may use unambiguous abbreviations for the long option names."); +#else + puts ("Long option names do not work with this executable, as it is not\n\ +linked with GNU getopt."); +#endif /* LONG_OPTIONS */ + puts ("A - as file name means read names from stdin (one per line)."); if (!CTAGS) - printf (" Absolute names are stored in the output file as they\n\ -are. Relative ones are stored relative to the output file's directory."); + printf (" Absolute names are stored in the output file as they are.\n\ +Relative ones are stored relative to the output file's directory."); puts ("\n"); puts ("-a, --append\n\ Append tag entries to existing tags file."); + puts ("--packages-only\n\ + For Ada files, only generate tags for packages ."); + if (CTAGS) puts ("-B, --backward-search\n\ Write the search commands for the tag entries using '?', the\n\ @@ -477,13 +601,21 @@ are. Relative ones are stored relative to the output file's directory."); puts ("-C, --c++\n\ Treat files whose name suffix defaults to C language as C++ files."); + puts ("--declarations\n\ + In C and derived languages, create tags for function declarations,"); + if (CTAGS) + puts ("\tand create tags for extern variables if --globals is used."); + else + puts + ("\tand create tags for extern variables unless --no-globals is used."); + if (CTAGS) puts ("-d, --defines\n\ - Create tag entries for constant C #defines, too."); + Create tag entries for C #define constants and enum constants, too."); else puts ("-D, --no-defines\n\ - Don't create tag entries for constant C #defines. This makes\n\ - the tags file smaller."); + Don't create tag entries for C #define constants and enum constants.\n\ + This makes the tags file smaller."); if (!CTAGS) { @@ -496,13 +628,27 @@ are. Relative ones are stored relative to the output file's directory."); named language up to the next --language=LANG option."); } + if (CTAGS) + puts ("--globals\n\ + Create tag entries for global variables in some languages."); + else + puts ("--no-globals\n\ + Do not create tag entries for global variables in some\n\ + languages. This makes the tags file smaller."); + puts ("--members\n\ + Create tag entries for member variables in C and derived languages."); + #ifdef ETAGS_REGEXPS - puts ("-r /REGEXP/, --regex=/REGEXP/\n\ - Make a tag for each line matching pattern REGEXP in the\n\ - following files. REGEXP is anchored (as if preceded by ^).\n\ - The form /REGEXP/NAME/ creates a named tag. For example Tcl\n\ - named tags can be created with:\n\ + puts ("-r /REGEXP/, --regex=/REGEXP/ or --regex=@regexfile\n\ + Make a tag for each line matching pattern REGEXP in the following\n\ + files. {LANGUAGE}/REGEXP/ uses REGEXP for LANGUAGE files only.\n\ + regexfile is a file containing one REGEXP per line.\n\ + REGEXP is anchored (as if preceded by ^).\n\ + The form /REGEXP/NAME/ creates a named tag.\n\ + For example Tcl named tags can be created with:\n\ --regex=/proc[ \\t]+\\([^ \\t]+\\)/\\1/."); + puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\ + Like -r, --regex but ignore case when matching expressions."); puts ("-R, --no-regex\n\ Don't create tags from regexps for the following files."); #endif /* ETAGS_REGEXPS */ @@ -517,7 +663,7 @@ are. Relative ones are stored relative to the output file's directory."); if (CTAGS) { puts ("-t, --typedefs\n\ - Generate tag entries for C typedefs."); + Generate tag entries for C and Ada typedefs."); puts ("-T, --typedefs-and-c++\n\ Generate tag entries for C typedefs, C struct/enum/union tags,\n\ and C++ member functions."); @@ -549,6 +695,9 @@ are. Relative ones are stored relative to the output file's directory."); print_language_names (); + puts (""); + puts ("Report bugs to bug-gnu-emacs@gnu.org"); + exit (GOOD); } @@ -557,15 +706,16 @@ enum argument_type { at_language, at_regexp, - at_filename + at_filename, + at_icregexp }; -/* This structure helps us allow mixing of --lang and filenames. */ +/* This structure helps us allow mixing of --lang and file names. */ typedef struct { enum argument_type arg_type; char *what; - Lang_function *function; + language *lang; /* language of the regexp */ } argument; #ifdef VMS /* VMS specific functions */ @@ -582,7 +732,7 @@ typedef struct { /* v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names - returning in each successive call the next filename matching the input + returning in each successive call the next file name matching the input spec. The function expects that each in_spec passed to it will be processed to completion; in particular, up to and including the call following that in which the last matching name @@ -591,7 +741,7 @@ typedef struct { If an error occurs, on return out_spec contains the value of in_spec when the error occurred. - With each successive filename returned in out_spec, the + With each successive file name returned in out_spec, the function's return value is one. When there are no more matching names the function returns zero. If on the first call no file matches in_spec, or there is any other error, -1 is returned. @@ -608,7 +758,7 @@ fn_exp (out, in) static long context = 0; static struct dsc$descriptor_s o; static struct dsc$descriptor_s i; - static logical pass1 = TRUE; + static bool pass1 = TRUE; long status; short retval; @@ -648,7 +798,7 @@ fn_exp (out, in) char * gfnames (arg, p_error) char *arg; - logical *p_error; + bool *p_error; { static vspec filename = {MAX_FILE_SPEC_LEN, "\0"}; @@ -670,7 +820,7 @@ gfnames (arg, p_error) system (cmd) char *cmd; { - fprintf (stderr, "system() function not implemented under VMS\n"); + error ("%s", "system() function not implemented under VMS"); } #endif @@ -693,20 +843,20 @@ char *massage_name (s) #endif /* VMS */ -void +int main (argc, argv) int argc; char *argv[]; { int i; - unsigned int nincluded_files = 0; - char **included_files = xnew (argc, char *); + unsigned int nincluded_files; + char **included_files; char *this_file; argument *argbuffer; - int current_arg = 0, file_count = 0; - struct linebuffer filename_lb; + int current_arg, file_count; + linebuffer filename_lb; #ifdef VMS - logical got_err; + bool got_err; #endif #ifdef DOS_NT @@ -714,6 +864,10 @@ main (argc, argv) #endif /* DOS_NT */ progname = argv[0]; + nincluded_files = 0; + included_files = xnew (argc, char *); + current_arg = 0; + file_count = 0; /* Allocate enough no matter what happens. Overkill, but each one is small. */ @@ -721,21 +875,40 @@ main (argc, argv) #ifdef ETAGS_REGEXPS /* Set syntax for regular expression routines. */ - re_set_syntax (RE_SYNTAX_EMACS); + re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS); + /* Translation table for case-insensitive search. */ + for (i = 0; i < CHAR_SET_SIZE; i++) + lc_trans[i] = lowcase (i); #endif /* ETAGS_REGEXPS */ /* * If etags, always find typedefs and structure tags. Why not? - * Also default is to find macro constants. + * Also default is to find macro constants, enum constants and + * global variables. */ if (!CTAGS) - typedefs = typedefs_and_cplusplus = constantypedefs = TRUE; + { + typedefs = typedefs_and_cplusplus = constantypedefs = TRUE; + globals = TRUE; + members = FALSE; + } while (1) { - int opt = getopt_long (argc, argv, - "-aCdDf:Il:o:r:RStTi:BuvxwVhH", longopts, 0); + int opt; + char *optstring; + +#ifdef ETAGS_REGEXPS + optstring = "-aCdDf:Il:o:r:c:RStTi:BuvxwVhH"; +#else + optstring = "-aCdDf:Il:o:StTi:BuvxwVhH"; +#endif /* ETAGS_REGEXPS */ + +#ifndef LONG_OPTIONS + optstring = optstring + 1; +#endif /* LONG_OPTIONS */ + opt = getopt_long (argc, argv, optstring, longopts, 0); if (opt == EOF) break; @@ -747,7 +920,7 @@ main (argc, argv) break; case 1: - /* This means that a filename has been seen. Record it. */ + /* This means that a file name has been seen. Record it. */ argbuffer[current_arg].arg_type = at_filename; argbuffer[current_arg].what = optarg; ++current_arg; @@ -755,24 +928,15 @@ main (argc, argv) break; /* Common options. */ - case 'a': - append_to_tagfile = TRUE; - break; - case 'C': - cplusplus = TRUE; - break; - case 'd': - constantypedefs = TRUE; - break; - case 'D': - constantypedefs = FALSE; - break; + case 'a': append_to_tagfile = TRUE; break; + case 'C': cplusplus = TRUE; break; + case 'd': constantypedefs = TRUE; break; + case 'D': constantypedefs = FALSE; break; case 'f': /* for compatibility with old makefiles */ case 'o': if (tagfile) { - fprintf (stderr, "%s: -%c option may only be given once.\n", - progname, opt); + error ("-o option may only be given once.", (char *)NULL); suggest_asking_for_help (); } tagfile = optarg; @@ -782,9 +946,15 @@ main (argc, argv) noindentypedefs = TRUE; break; case 'l': - argbuffer[current_arg].function = get_language_from_name (optarg); - argbuffer[current_arg].arg_type = at_language; - ++current_arg; + { + language *lang = get_language_from_name (optarg); + if (lang != NULL) + { + argbuffer[current_arg].lang = lang; + argbuffer[current_arg].arg_type = at_language; + ++current_arg; + } + } break; #ifdef ETAGS_REGEXPS case 'r': @@ -797,6 +967,11 @@ main (argc, argv) argbuffer[current_arg].what = NULL; ++current_arg; break; + case 'c': + argbuffer[current_arg].arg_type = at_icregexp; + argbuffer[current_arg].what = optarg; + ++current_arg; + break; #endif /* ETAGS_REGEXPS */ case 'V': print_version (); @@ -818,21 +993,11 @@ main (argc, argv) break; #else /* CTAGS */ /* Ctags options. */ - case 'B': - searchar = '?'; - break; - case 'u': - update = TRUE; - break; - case 'v': - vgrind_style = TRUE; - /*FALLTHRU*/ - case 'x': - cxref_style = TRUE; - break; - case 'w': - no_warnings = TRUE; - break; + case 'B': searchar = '?'; break; + case 'u': update = TRUE; break; + case 'v': vgrind_style = TRUE; /*FALLTHRU*/ + case 'x': cxref_style = TRUE; break; + case 'w': no_warnings = TRUE; break; #endif /* CTAGS */ default: suggest_asking_for_help (); @@ -849,7 +1014,7 @@ main (argc, argv) if (nincluded_files == 0 && file_count == 0) { - fprintf (stderr, "%s: No input files specified.\n", progname); + error ("no input files specified.", (char *)NULL); suggest_asking_for_help (); } @@ -857,7 +1022,11 @@ main (argc, argv) tagfile = CTAGS ? "tags" : "TAGS"; cwd = etags_getcwd (); /* the current working directory */ if (cwd[strlen (cwd) - 1] != '/') - cwd = concat (cwd, "/", ""); + { + char *oldcwd = cwd; + cwd = concat (oldcwd, "/", ""); + free (oldcwd); + } if (streq (tagfile, "-")) tagfiledir = cwd; else @@ -897,11 +1066,14 @@ main (argc, argv) switch (argbuffer[i].arg_type) { case at_language: - lang_func = argbuffer[i].function; + forced_lang = argbuffer[i].lang; break; #ifdef ETAGS_REGEXPS case at_regexp: - add_regex (argbuffer[i].what); + analyse_regex (argbuffer[i].what, FALSE); + break; + case at_icregexp: + analyse_regex (argbuffer[i].what, TRUE); break; #endif case at_filename: @@ -910,7 +1082,7 @@ main (argc, argv) { if (got_err) { - error ("Can't find file %s\n", this_file); + error ("can't find file %s\n", this_file); argc--, argv++; } else @@ -921,7 +1093,7 @@ main (argc, argv) this_file = argbuffer[i].what; #endif /* Input file named "-" means read file names from stdin - and use them. */ + (one per line) and use them. */ if (streq (this_file, "-")) while (readline_internal (&filename_lb, stdin) > 0) process_file (filename_lb.buffer); @@ -934,6 +1106,10 @@ main (argc, argv) } } +#ifdef ETAGS_REGEXPS + free_patterns (); +#endif /* ETAGS_REGEXPS */ + if (!CTAGS) { while (nincluded_files-- > 0) @@ -947,10 +1123,9 @@ main (argc, argv) because we want them ordered. Let's do it now. */ if (cxref_style) { - tagf = fopen (tagfile, append_to_tagfile ? "a" : "w"); - if (tagf == NULL) - pfatal (tagfile); put_entries (head); + free_tree (head); + head = NULL; exit (GOOD); } @@ -965,7 +1140,7 @@ main (argc, argv) "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS", tagfile, argbuffer[i].what, tagfile); if (system (cmd) != GOOD) - fatal ("failed to execute shell command"); + fatal ("failed to execute shell command", (char *)NULL); } append_to_tagfile = TRUE; } @@ -974,6 +1149,8 @@ main (argc, argv) if (tagf == NULL) pfatal (tagfile); put_entries (head); + free_tree (head); + head = NULL; fclose (tagf); if (update) @@ -982,46 +1159,85 @@ main (argc, argv) sprintf (cmd, "sort %s -o %s", tagfile, tagfile); exit (system (cmd)); } - exit (GOOD); + return GOOD; +} + + + +/* + * Return a compressor given the file name. If EXTPTR is non-zero, + * return a pointer into FILE where the compressor-specific + * extension begins. If no compressor is found, NULL is returned + * and EXTPTR is not significant. + * Idea by Vladimir Alexiev + */ +compressor * +get_compressor_from_suffix (file, extptr) + char *file; + char **extptr; +{ + compressor *compr; + char *slash, *suffix; + + /* This relies on FN to be after canonicalize_filename, + so we don't need to consider backslashes on DOS_NT. */ + slash = etags_strrchr (file, '/'); + suffix = etags_strrchr (file, '.'); + if (suffix == NULL || suffix < slash) + return NULL; + if (extptr != NULL) + *extptr = suffix; + suffix += 1; + /* Let those poor souls who live with DOS 8+3 file name limits get + some solace by treating foo.cgz as if it were foo.c.gz, etc. + Only the first do loop is run if not MSDOS */ + do + { + for (compr = compressors; compr->suffix != NULL; compr++) + if (streq (compr->suffix, suffix)) + return compr; +#ifndef MSDOS + break; +#endif + if (extptr != NULL) + *extptr = ++suffix; + } while (*suffix != '\0'); + return NULL; } + /* - * Return a Lang_function given the name. + * Return a language given the name. */ -Lang_function * +language * get_language_from_name (name) char *name; { - struct lang_entry *lang; + language *lang; - if (name != NULL) - for (lang = lang_names; lang->name != NULL; lang++) - { + if (name == NULL) + error ("empty language name", (char *)NULL); + else + { + for (lang = lang_names; lang->name != NULL; lang++) if (streq (name, lang->name)) - return lang->function; - } - - fprintf (stderr, "%s: language \"%s\" not recognized.\n", - progname, optarg); - suggest_asking_for_help (); + return lang; + error ("unknown language \"%s\"", name); + } - /* This point should never be reached. The function should either - return a function pointer or never return. Note that a NULL - pointer cannot be considered as an error, as it means that the - language has not been explicitely imposed by the user ("auto"). */ - return NULL; /* avoid warnings from compiler */ + return NULL; } /* - * Return a Lang_function given the interpreter name. + * Return a language given the interpreter name. */ -Lang_function * +language * get_language_from_interpreter (interpreter) char *interpreter; { - struct lang_entry *lang; + language *lang; char **iname; if (interpreter == NULL) @@ -1030,7 +1246,7 @@ get_language_from_interpreter (interpreter) if (lang->interpreters != NULL) for (iname = lang->interpreters; *iname != NULL; iname++) if (streq (*iname, interpreter)) - return lang->function; + return lang; return NULL; } @@ -1038,27 +1254,29 @@ get_language_from_interpreter (interpreter) /* - * Return a Lang_function given the file suffix. + * Return a language given the file name. */ -Lang_function * -get_language_from_suffix (suffix) - char *suffix; +language * +get_language_from_suffix (file) + char *file; { - struct lang_entry *lang; - char **ext; + language *lang; + char **ext, *suffix; + suffix = etags_strrchr (file, '.'); if (suffix == NULL) return NULL; + suffix += 1; for (lang = lang_names; lang->name != NULL; lang++) if (lang->suffixes != NULL) for (ext = lang->suffixes; *ext != NULL; ext++) if (streq (*ext, suffix)) - return lang->function; - + return lang; return NULL; } + /* * This routine is called on each file argument. */ @@ -1068,47 +1286,136 @@ process_file (file) { struct stat stat_buf; FILE *inf; -#ifdef DOS_NT - char *p; + compressor *compr; + char *compressed_name, *uncompressed_name; + char *ext, *real_name; - for (p = file; *p != '\0'; p++) - if (*p == '\\') - *p = '/'; -#endif - if (stat (file, &stat_buf) == 0 && !S_ISREG (stat_buf.st_mode)) + canonicalize_filename (file); + if (streq (file, tagfile) && !streq (tagfile, "-")) { - fprintf (stderr, "Skipping %s: it is not a regular file.\n", file); + error ("skipping inclusion of %s in self.", file); return; } - if (streq (file, tagfile) && !streq (tagfile, "-")) + if ((compr = get_compressor_from_suffix (file, &ext)) == NULL) { - fprintf (stderr, "Skipping inclusion of %s in self.\n", file); - return; + compressed_name = NULL; + real_name = uncompressed_name = savestr (file); + } + else + { + real_name = compressed_name = savestr (file); + uncompressed_name = savenstr (file, ext - file); } - inf = fopen (file, "r"); + + /* If the canonicalised uncompressed name has already be dealt with, + skip it silently, else add it to the list. */ + { + typedef struct processed_file + { + char *filename; + struct processed_file *next; + } processed_file; + static processed_file *pf_head = NULL; + register processed_file *fnp; + + for (fnp = pf_head; fnp != NULL; fnp = fnp->next) + if (streq (uncompressed_name, fnp->filename)) + goto exit; + fnp = pf_head; + pf_head = xnew (1, struct processed_file); + pf_head->filename = savestr (uncompressed_name); + pf_head->next = fnp; + } + + if (stat (real_name, &stat_buf) != 0) + { + /* Reset real_name and try with a different name. */ + real_name = NULL; + if (compressed_name != NULL) /* try with the given suffix */ + { + if (stat (uncompressed_name, &stat_buf) == 0) + real_name = uncompressed_name; + } + else /* try all possible suffixes */ + { + for (compr = compressors; compr->suffix != NULL; compr++) + { + compressed_name = concat (file, ".", compr->suffix); + if (stat (compressed_name, &stat_buf) != 0) + { +#ifdef MSDOS + char *suf = compressed_name + strlen (file); + size_t suflen = strlen (compr->suffix) + 1; + for ( ; suf[1]; suf++, suflen--) + { + memmove (suf, suf + 1, suflen); + if (stat (compressed_name, &stat_buf) == 0) + { + real_name = compressed_name; + break; + } + } + if (real_name != NULL) + break; +#endif + free (compressed_name); + compressed_name = NULL; + } + else + { + real_name = compressed_name; + break; + } + } + } + if (real_name == NULL) + { + perror (file); + goto exit; + } + } /* try with a different name */ + + if (!S_ISREG (stat_buf.st_mode)) + { + error ("skipping %s: it is not a regular file.", real_name); + goto exit; + } + if (real_name == compressed_name) + { + char *cmd = concat (compr->command, " ", real_name); + inf = popen (cmd, "r"); + free (cmd); + } + else + inf = fopen (real_name, "r"); if (inf == NULL) { - perror (file); - return; + perror (real_name); + goto exit; } - find_entries (file, inf); + find_entries (uncompressed_name, inf); + + if (real_name == compressed_name) + pclose (inf); + else + fclose (inf); if (!CTAGS) { char *filename; - if (absolutefn (file)) + if (filename_is_absolute (uncompressed_name)) { - /* file is an absolute filename. Canonicalise it. */ - filename = absolute_filename (file, cwd); + /* file is an absolute file name. Canonicalise it. */ + filename = absolute_filename (uncompressed_name, cwd); } else { - /* file is a filename relative to cwd. Make it relative + /* file is a file name relative to cwd. Make it relative to the directory of the tags file. */ - filename = relative_filename (file, tagfiledir); + filename = relative_filename (uncompressed_name, tagfiledir); } fprintf (tagf, "\f\n%s,%d\n", filename, total_size_of_entries (head)); free (filename); @@ -1116,11 +1423,16 @@ process_file (file) free_tree (head); head = NULL; } + + exit: + if (compressed_name) free(compressed_name); + if (uncompressed_name) free(uncompressed_name); + return; } /* * This routine sets up the boolean pseudo-functions which work - * by setting boolean flags dependent upon the corresponding character + * by setting boolean flags dependent upon the corresponding character. * Every char which is NOT in that string is not a white char. Therefore, * all of the array "_wht" is set to FALSE, and then the elements * subscripted by the chars in "white" are set to TRUE. Thus "_wht" @@ -1132,66 +1444,63 @@ init () register char *sp; register int i; - for (i = 0; i < 0177; i++) - _wht[i] = _etk[i] = _itk[i] = _btk[i] = FALSE; - for (sp = white; *sp; sp++) - _wht[*sp] = TRUE; - for (sp = endtk; *sp; sp++) - _etk[*sp] = TRUE; - for (sp = intk; *sp; sp++) - _itk[*sp] = TRUE; - for (sp = begtk; *sp; sp++) - _btk[*sp] = TRUE; - _wht[0] = _wht['\n']; - _etk[0] = _etk['\n']; - _btk[0] = _btk['\n']; - _itk[0] = _itk['\n']; + for (i = 0; i < CHARS; i++) + iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE; + for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE; + for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE; + for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE; + for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE; + for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE; + iswhite('\0') = iswhite('\n'); + notinname('\0') = notinname('\n'); + begtoken('\0') = begtoken('\n'); + intoken('\0') = intoken('\n'); + endtoken('\0') = endtoken('\n'); } /* * This routine opens the specified file and calls the function * which finds the function and type definitions. */ +node *last_node = NULL; + void find_entries (file, inf) char *file; FILE *inf; { char *cp; - Lang_function *function; - NODE *old_last_node; - extern NODE *last_node; - - - /* Memory leakage here: the memory block pointed by curfile is never - released. The amount of memory leaked here is the sum of the - lengths of the input file names. */ + language *lang; + node *old_last_node; + + /* Memory leakage here: the string pointed by curfile is + never released, because curfile is copied into np->file + for each node, to be used in CTAGS mode. The amount of + memory leaked here is the sum of the lengths of the + file names. */ curfile = savestr (file); /* If user specified a language, use it. */ - function = lang_func; - if (function != NULL) + lang = forced_lang; + if (lang != NULL && lang->function != NULL) { - function (inf); - fclose (inf); + curlang = lang; + lang->function (inf); return; } - cp = etags_strrchr (file, '.'); - if (cp != NULL) + /* Try to guess the language given the file name. */ + lang = get_language_from_suffix (file); + if (lang != NULL && lang->function != NULL) { - cp += 1; - function = get_language_from_suffix (cp); - if (function != NULL) - { - function (inf); - fclose (inf); - return; - } + curlang = lang; + lang->function (inf); + return; } /* Look for sharp-bang as the first two characters. */ - if (readline_internal (&lb, inf) > 2 + if (readline_internal (&lb, inf) > 0 + && lb.len >= 2 && lb.buffer[0] == '#' && lb.buffer[1] == '!') { @@ -1204,36 +1513,40 @@ find_entries (file, inf) if (lp != NULL) lp += 1; else - for (lp = lb.buffer+2; *lp != '\0' && isspace (*lp); lp++) - continue; - for (cp = lp; *cp != '\0' && !isspace (*cp); cp++) - continue; + lp = skip_spaces (lb.buffer + 2); + cp = skip_non_spaces (lp); *cp = '\0'; if (strlen (lp) > 0) { - function = get_language_from_interpreter (lp); - if (function != NULL) + lang = get_language_from_interpreter (lp); + if (lang != NULL && lang->function != NULL) { - function (inf); - fclose (inf); + curlang = lang; + lang->function (inf); return; } } } + /* We rewind here, even if inf may be a pipe. We fail if the + length of the first line is longer than the pipe block size, + which is unlikely. */ rewind (inf); /* Try Fortran. */ old_last_node = last_node; + curlang = get_language_from_name ("fortran"); Fortran_functions (inf); /* No Fortran entries found. Try C. */ if (old_last_node == last_node) { + /* We do not tag if rewind fails. + Only the file name will be recorded in the tags file. */ rewind (inf); + curlang = get_language_from_name (cplusplus ? "c++" : "c"); default_C_entries (inf); } - fclose (inf); return; } @@ -1241,27 +1554,27 @@ find_entries (file, inf) void pfnote (name, is_func, linestart, linelen, lno, cno) char *name; /* tag name, or NULL if unnamed */ - logical is_func; /* tag is a function */ + bool is_func; /* tag is a function */ char *linestart; /* start of the line where tag is */ int linelen; /* length of the line where tag is */ int lno; /* line number */ long cno; /* character number */ { - register NODE *np; + register node *np; if (CTAGS && name == NULL) return; - np = xnew (1, NODE); + np = xnew (1, node); /* If ctags mode, change name "main" to M. */ if (CTAGS && !cxref_style && streq (name, "main")) { register char *fp = etags_strrchr (curfile, '/'); - np->name = concat ("M", fp == 0 ? curfile : fp + 1, ""); + np->name = concat ("M", fp == NULL ? curfile : fp + 1, ""); fp = etags_strrchr (np->name, '.'); - if (fp && fp[1] != '\0' && fp[2] == '\0') - fp[0] = 0; + if (fp != NULL && fp[1] != '\0' && fp[2] == '\0') + fp[0] = '\0'; } else np->name = name; @@ -1289,23 +1602,80 @@ pfnote (name, is_func, linestart, linelen, lno, cno) add_node (np, &head); } -/* - * free_tree () - * recurse on left children, iterate on right children. +/* Date: Wed, 22 Jan 1997 02:56:31 -0500 [last amended 18 Sep 1997] + * From: Sam Kendall + * Subject: Proposal for firming up the TAGS format specification + * To: F.Potorti@cnuce.cnr.it + * + * pfnote should emit the optimized form [unnamed tag] only if: + * 1. name does not contain any of the characters " \t\r\n(),;"; + * 2. linestart contains name as either a rightmost, or rightmost but + * one character, substring; + * 3. the character, if any, immediately before name in linestart must + * be one of the characters " \t(),;"; + * 4. the character, if any, immediately after name in linestart must + * also be one of the characters " \t(),;". + * + * The real implementation uses the notinname() macro, which recognises + * characters slightly different form " \t\r\n(),;". See the variable + * `nonam'. */ +#define traditional_tag_style TRUE void -free_tree (node) - register NODE *node; +new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno) + char *name; /* tag name, or NULL if unnamed */ + int namelen; /* tag length */ + bool is_func; /* tag is a function */ + char *linestart; /* start of the line where tag is */ + int linelen; /* length of the line where tag is */ + int lno; /* line number */ + long cno; /* character number */ { - while (node) + register char *cp; + bool named; + + named = TRUE; + if (!CTAGS) { - register NODE *node_right = node->right; - free_tree (node->left); - if (node->name != NULL) - free (node->name); - free (node->pat); - free ((char *) node); - node = node_right; + for (cp = name; !notinname (*cp); cp++) + continue; + if (*cp == '\0') /* rule #1 */ + { + cp = linestart + linelen - namelen; + if (notinname (linestart[linelen-1])) + cp -= 1; /* rule #4 */ + if (cp >= linestart /* rule #2 */ + && (cp == linestart + || notinname (cp[-1])) /* rule #3 */ + && strneq (name, cp, namelen)) /* rule #2 */ + named = FALSE; /* use unnamed tag */ + } + } + + if (named) + name = savenstr (name, namelen); + else + name = NULL; + pfnote (name, is_func, linestart, linelen, lno, cno); +} + +/* + * free_tree () + * recurse on left children, iterate on right children. + */ +void +free_tree (np) + register node *np; +{ + while (np) + { + register node *node_right = np->right; + free_tree (np->left); + if (np->name != NULL) + free (np->name); + free (np->pat); + free (np); + np = node_right; } } @@ -1318,18 +1688,17 @@ free_tree (node) * add_node is the only function allowed to add nodes, so it can * maintain state. */ -NODE *last_node = NULL; void -add_node (node, cur_node_p) - NODE *node, **cur_node_p; +add_node (np, cur_node_p) + node *np, **cur_node_p; { register int dif; - register NODE *cur_node = *cur_node_p; + register node *cur_node = *cur_node_p; if (cur_node == NULL) { - *cur_node_p = node; - last_node = node; + *cur_node_p = np; + last_node = np; return; } @@ -1337,14 +1706,14 @@ add_node (node, cur_node_p) { /* Etags Mode */ if (last_node == NULL) - fatal ("internal error in add_node", 0); - last_node->right = node; - last_node = node; + fatal ("internal error in add_node", (char *)NULL); + last_node->right = np; + last_node = np; } else { /* Ctags Mode */ - dif = strcmp (node->name, cur_node->name); + dif = strcmp (np->name, cur_node->name); /* * If this tag name matches an existing one, then @@ -1352,12 +1721,12 @@ add_node (node, cur_node_p) */ if (!dif) { - if (streq (node->file, cur_node->file)) + if (streq (np->file, cur_node->file)) { if (!no_warnings) { fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n", - node->file, lineno, node->name); + np->file, lineno, np->name); fprintf (stderr, "Second entry ignored\n"); } } @@ -1366,64 +1735,64 @@ add_node (node, cur_node_p) fprintf (stderr, "Duplicate entry in files %s and %s: %s (Warning only)\n", - node->file, cur_node->file, node->name); + np->file, cur_node->file, np->name); cur_node->been_warned = TRUE; } return; } /* Actually add the node */ - add_node (node, dif < 0 ? &cur_node->left : &cur_node->right); + add_node (np, dif < 0 ? &cur_node->left : &cur_node->right); } } void -put_entries (node) - register NODE *node; +put_entries (np) + register node *np; { register char *sp; - if (node == NULL) + if (np == NULL) return; /* Output subentries that precede this one */ - put_entries (node->left); + put_entries (np->left); /* Output this entry */ if (!CTAGS) { - if (node->name != NULL) - fprintf (tagf, "%s\177%s\001%d,%d\n", - node->pat, node->name, node->lno, node->cno); + if (np->name != NULL) + fprintf (tagf, "%s\177%s\001%d,%ld\n", + np->pat, np->name, np->lno, np->cno); else - fprintf (tagf, "%s\177%d,%d\n", - node->pat, node->lno, node->cno); + fprintf (tagf, "%s\177%d,%ld\n", + np->pat, np->lno, np->cno); } else { - if (node->name == NULL) - error ("internal error: NULL name in ctags mode.", 0); + if (np->name == NULL) + error ("internal error: NULL name in ctags mode.", (char *)NULL); if (cxref_style) { if (vgrind_style) fprintf (stdout, "%s %s %d\n", - node->name, node->file, (node->lno + 63) / 64); + np->name, np->file, (np->lno + 63) / 64); else fprintf (stdout, "%-16s %3d %-16s %s\n", - node->name, node->lno, node->file, node->pat); + np->name, np->lno, np->file, np->pat); } else { - fprintf (tagf, "%s\t%s\t", node->name, node->file); + fprintf (tagf, "%s\t%s\t", np->name, np->file); - if (node->is_func) + if (np->is_func) { /* a function */ putc (searchar, tagf); putc ('^', tagf); - for (sp = node->pat; *sp; sp++) + for (sp = np->pat; *sp; sp++) { if (*sp == '\\' || *sp == searchar) putc ('\\', tagf); @@ -1433,14 +1802,14 @@ put_entries (node) } else { /* a typedef; text pattern inadequate */ - fprintf (tagf, "%d", node->lno); + fprintf (tagf, "%d", np->lno); } putc ('\n', tagf); } } /* Output subentries that follow this one */ - put_entries (node->right); + put_entries (np->right); } /* Length of a number's decimal representation. */ @@ -1448,11 +1817,9 @@ int number_len (num) long num; { - int len = 0; - if (!num) - return 1; - for (; num; num /= 10) - ++len; + int len = 1; + while ((num /= 10) > 0) + len += 1; return len; } @@ -1464,25 +1831,24 @@ number_len (num) * backward compatibility. */ int -total_size_of_entries (node) - register NODE *node; +total_size_of_entries (np) + register node *np; { register int total; - if (node == NULL) + if (np == NULL) return 0; - total = 0; - for (; node; node = node->right) + for (total = 0; np != NULL; np = np->right) { /* Count left subentries. */ - total += total_size_of_entries (node->left); + total += total_size_of_entries (np->left); /* Count this entry */ - total += strlen (node->pat) + 1; - total += number_len ((long) node->lno) + 1 + number_len (node->cno) + 1; - if (node->name != NULL) - total += 1 + strlen (node->name); /* \001name */ + total += strlen (np->pat) + 1; + total += number_len ((long) np->lno) + 1 + number_len (np->cno) + 1; + if (np->name != NULL) + total += 1 + strlen (np->name); /* \001name */ } return total; @@ -1493,8 +1859,13 @@ total_size_of_entries (node) */ enum sym_type { - st_none, st_C_objprot, st_C_objimpl, st_C_objend, st_C_gnumacro, - st_C_struct, st_C_enum, st_C_define, st_C_typedef, st_C_typespec + st_none, + st_C_objprot, st_C_objimpl, st_C_objend, + st_C_gnumacro, + st_C_ignore, + st_C_javastruct, + st_C_operator, + st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec }; /* Feed stuff between (but not including) %[ and %] lines to: @@ -1502,18 +1873,31 @@ enum sym_type %[ struct C_stab_entry { char *name; int c_ext; enum sym_type type; } %% +if, 0, st_C_ignore +for, 0, st_C_ignore +while, 0, st_C_ignore +switch, 0, st_C_ignore +return, 0, st_C_ignore @interface, 0, st_C_objprot @protocol, 0, st_C_objprot @implementation,0, st_C_objimpl @end, 0, st_C_objend +import, C_JAVA, st_C_ignore +package, C_JAVA, st_C_ignore +friend, C_PLPL, st_C_ignore +extends, C_JAVA, st_C_javastruct +implements, C_JAVA, st_C_javastruct +interface, C_JAVA, st_C_struct class, C_PLPL, st_C_struct namespace, C_PLPL, st_C_struct domain, C_STAR, st_C_struct union, 0, st_C_struct struct, 0, st_C_struct +extern, 0, st_C_extern enum, 0, st_C_enum typedef, 0, st_C_typedef define, 0, st_C_define +operator, C_PLPL, st_C_operator bool, C_PLPL, st_C_typespec long, 0, st_C_typespec short, 0, st_C_typespec @@ -1525,7 +1909,6 @@ signed, 0, st_C_typespec unsigned, 0, st_C_typespec auto, 0, st_C_typespec void, 0, st_C_typespec -extern, 0, st_C_typespec static, 0, st_C_typespec const, 0, st_C_typespec volatile, 0, st_C_typespec @@ -1544,118 +1927,163 @@ PSEUDO, 0, st_C_gnumacro %] and replace lines between %< and %> with its output. */ /*%<*/ -/* C code produced by gperf version 2.1 (K&R C version) */ +/* C code produced by gperf version 2.7.1 (19981006 egcs) */ /* Command-line: gperf -c -k 1,3 -o -p -r -t */ - - struct C_stab_entry { char *name; int c_ext; enum sym_type type; }; -#define MIN_WORD_LENGTH 3 +#define TOTAL_KEYWORDS 46 +#define MIN_WORD_LENGTH 2 #define MAX_WORD_LENGTH 15 -#define MIN_HASH_VALUE 34 -#define MAX_HASH_VALUE 121 -/* - 34 keywords - 88 is the maximum key range -*/ +#define MIN_HASH_VALUE 13 +#define MAX_HASH_VALUE 123 +/* maximum key range = 111, duplicates = 0 */ -static int +#ifdef __GNUC__ +__inline +#endif +static unsigned int hash (str, len) - register char *str; - register unsigned int len; + register const char *str; + register unsigned int len; { - static unsigned char hash_table[] = + static unsigned char asso_values[] = + { + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 3, 124, 124, 124, 43, 6, + 11, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 11, 124, 124, 58, 7, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 57, 7, 42, + 4, 14, 52, 0, 124, 53, 124, 124, 29, 11, + 6, 35, 32, 124, 29, 34, 59, 58, 51, 24, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124 + }; + register int hval = len; + + switch (hval) { - 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, - 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, - 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, - 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, - 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, - 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, - 121, 121, 121, 121, 45, 121, 121, 121, 16, 19, - 61, 121, 121, 121, 121, 121, 121, 121, 121, 121, - 10, 121, 121, 20, 53, 121, 121, 121, 121, 121, - 121, 121, 121, 121, 121, 121, 121, 41, 45, 22, - 60, 47, 37, 28, 121, 55, 121, 121, 20, 14, - 29, 30, 5, 121, 50, 59, 30, 54, 6, 121, - 121, 121, 121, 121, 121, 121, 121, 121, - }; - return len + hash_table[str[2]] + hash_table[str[0]]; + default: + case 3: + hval += asso_values[(unsigned char)str[2]]; + case 2: + case 1: + hval += asso_values[(unsigned char)str[0]]; + break; + } + return hval; } +#ifdef __GNUC__ +__inline +#endif struct C_stab_entry * in_word_set (str, len) - register char *str; + register const char *str; register unsigned int len; { - - static struct C_stab_entry wordlist[] = + static struct C_stab_entry wordlist[] = { - {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, - {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, - {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, - {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, - {"volatile", 0, st_C_typespec}, - {"PSEUDO", 0, st_C_gnumacro}, - {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, - {"typedef", 0, st_C_typedef}, - {"typename", C_PLPL, st_C_typespec}, - {"",}, {"",}, {"",}, - {"SYSCALL", 0, st_C_gnumacro}, - {"",}, {"",}, {"",}, - {"mutable", C_PLPL, st_C_typespec}, - {"namespace", C_PLPL, st_C_struct}, - {"long", 0, st_C_typespec}, - {"",}, {"",}, - {"const", 0, st_C_typespec}, - {"",}, {"",}, {"",}, - {"explicit", C_PLPL, st_C_typespec}, - {"",}, {"",}, {"",}, {"",}, - {"void", 0, st_C_typespec}, - {"",}, - {"char", 0, st_C_typespec}, - {"class", C_PLPL, st_C_struct}, - {"",}, {"",}, {"",}, - {"float", 0, st_C_typespec}, - {"",}, - {"@implementation", 0, st_C_objimpl}, - {"auto", 0, st_C_typespec}, - {"",}, - {"ENTRY", 0, st_C_gnumacro}, - {"@end", 0, st_C_objend}, - {"bool", C_PLPL, st_C_typespec}, - {"domain", C_STAR, st_C_struct}, - {"",}, - {"DEFUN", 0, st_C_gnumacro}, - {"extern", 0, st_C_typespec}, - {"@interface", 0, st_C_objprot}, - {"",}, {"",}, {"",}, - {"int", 0, st_C_typespec}, - {"",}, {"",}, {"",}, {"",}, - {"signed", 0, st_C_typespec}, - {"short", 0, st_C_typespec}, - {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, - {"define", 0, st_C_define}, - {"@protocol", 0, st_C_objprot}, - {"enum", 0, st_C_enum}, - {"static", 0, st_C_typespec}, - {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, - {"union", 0, st_C_struct}, - {"struct", 0, st_C_struct}, - {"",}, {"",}, {"",}, {"",}, - {"double", 0, st_C_typespec}, - {"unsigned", 0, st_C_typespec}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, + {"@end", 0, st_C_objend}, + {""}, {""}, {""}, {""}, + {"ENTRY", 0, st_C_gnumacro}, + {"@interface", 0, st_C_objprot}, + {""}, + {"domain", C_STAR, st_C_struct}, + {""}, + {"PSEUDO", 0, st_C_gnumacro}, + {""}, {""}, + {"namespace", C_PLPL, st_C_struct}, + {""}, {""}, + {"@implementation",0, st_C_objimpl}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"long", 0, st_C_typespec}, + {"signed", 0, st_C_typespec}, + {"@protocol", 0, st_C_objprot}, + {""}, {""}, {""}, {""}, + {"bool", C_PLPL, st_C_typespec}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"const", 0, st_C_typespec}, + {"explicit", C_PLPL, st_C_typespec}, + {"if", 0, st_C_ignore}, + {""}, + {"operator", C_PLPL, st_C_operator}, + {""}, + {"DEFUN", 0, st_C_gnumacro}, + {""}, {""}, + {"define", 0, st_C_define}, + {""}, {""}, {""}, {""}, {""}, + {"double", 0, st_C_typespec}, + {"struct", 0, st_C_struct}, + {""}, {""}, {""}, {""}, + {"short", 0, st_C_typespec}, + {""}, + {"enum", 0, st_C_enum}, + {"mutable", C_PLPL, st_C_typespec}, + {""}, + {"extern", 0, st_C_extern}, + {"extends", C_JAVA, st_C_javastruct}, + {"package", C_JAVA, st_C_ignore}, + {"while", 0, st_C_ignore}, + {""}, + {"for", 0, st_C_ignore}, + {""}, {""}, {""}, + {"volatile", 0, st_C_typespec}, + {""}, {""}, + {"import", C_JAVA, st_C_ignore}, + {"float", 0, st_C_typespec}, + {"switch", 0, st_C_ignore}, + {"return", 0, st_C_ignore}, + {"implements", C_JAVA, st_C_javastruct}, + {""}, + {"static", 0, st_C_typespec}, + {"typedef", 0, st_C_typedef}, + {"typename", C_PLPL, st_C_typespec}, + {"unsigned", 0, st_C_typespec}, + {""}, {""}, + {"char", 0, st_C_typespec}, + {"class", C_PLPL, st_C_struct}, + {""}, {""}, {""}, + {"void", 0, st_C_typespec}, + {""}, {""}, + {"friend", C_PLPL, st_C_ignore}, + {""}, {""}, {""}, + {"int", 0, st_C_typespec}, + {"union", 0, st_C_struct}, + {""}, {""}, {""}, + {"auto", 0, st_C_typespec}, + {"interface", C_JAVA, st_C_struct}, + {""}, + {"SYSCALL", 0, st_C_gnumacro} }; if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) { register int key = hash (str, len); - if (key <= MAX_HASH_VALUE && key >= MIN_HASH_VALUE) + if (key <= MAX_HASH_VALUE && key >= 0) { - register char *s = wordlist[key].name; + register const char *s = wordlist[key].name; - if (*s == *str && !strncmp (str + 1, s + 1, len - 1)) + if (*str == *s && !strncmp (str + 1, s + 1, len - 1)) return &wordlist[key]; } } @@ -1664,12 +2092,12 @@ in_word_set (str, len) /*%>*/ enum sym_type -C_symtype(str, len, c_ext) +C_symtype (str, len, c_ext) char *str; int len; int c_ext; { - register struct C_stab_entry *se = in_word_set(str, len); + register struct C_stab_entry *se = in_word_set (str, len); if (se == NULL || (se->c_ext && !(c_ext & se->c_ext))) return st_none; @@ -1677,19 +2105,22 @@ C_symtype(str, len, c_ext) } /* - * C functions are recognized using a simple finite automaton. - * funcdef is its state variable. + * C functions and variables are recognized using a simple + * finite automaton. fvdef is its state variable. */ enum { - fnone, /* nothing seen */ - ftagseen, /* function-like tag seen */ - fstartlist, /* just after open parenthesis */ - finlist, /* in parameter list */ - flistseen, /* after parameter list */ - fignore /* before open brace */ -} funcdef; - + fvnone, /* nothing seen */ + foperator, /* func: operator keyword seen (cplpl) */ + fvnameseen, /* function or variable name seen */ + fstartlist, /* func: just after open parenthesis */ + finlist, /* func: in parameter list */ + flistseen, /* func: after parameter list */ + fignore, /* func: before open brace */ + vignore /* var-like: ignore until ';' */ +} fvdef; + +bool fvextern; /* func or var: extern keyword seen; */ /* * typedefs are recognized using a simple finite automaton. @@ -1698,7 +2129,8 @@ enum enum { tnone, /* nothing seen */ - ttypedseen, /* typedef keyword seen */ + tkeyseen, /* typedef keyword seen */ + ttypeseen, /* defined type seen */ tinbody, /* inside typedef body */ tend, /* just before typedef tag */ tignore /* junk after typedef tag */ @@ -1745,6 +2177,7 @@ enum /* * State machine for Objective C protocols and implementations. + * Tom R.Hageman */ enum { @@ -1762,16 +2195,34 @@ enum oignore /* wait for @end */ } objdef; + +/* + * Use this structure to keep info about the token read, and how it + * should be tagged. Used by the make_C_tag function to build a tag. + */ +typedef struct +{ + bool valid; + char *str; + bool named; + int linelen; + int lineno; + long linepos; + char *buffer; +} token; + +token tok; /* latest token read */ + /* * Set this to TRUE, and the next token considered is called a function. * Used only for GNU emacs's function-defining macros. */ -logical next_token_is_func; +bool next_token_is_func; /* * TRUE in the rules part of a yacc file, FALSE outside (parse as C). */ -logical yacc_rules; +bool yacc_rules; /* * methodlen is the length of the method name stored in token_name. @@ -1781,19 +2232,14 @@ int methodlen; /* * consider_token () * checks to see if the current token is at the start of a - * function, or corresponds to a typedef, or is a struct/union/enum - * tag. - * - * *IS_FUNC gets TRUE iff the token is a function or macro with args. - * C_EXT is which language we are looking at. + * function or variable, or corresponds to a typedef, or + * is a struct/union/enum tag, or #define, or an enum constant. * - * In the future we will need some way to adjust where the end of - * the token is; for instance, implementing the C++ keyword - * `operator' properly will adjust the end of the token to be after - * whatever follows `operator'. + * *IS_FUNC gets TRUE iff the token is a function or #define macro + * with args. C_EXT is which language we are looking at. * * Globals - * funcdef IN OUT + * fvdef IN OUT * structdef IN OUT * definedef IN OUT * typdef IN OUT @@ -1801,15 +2247,15 @@ int methodlen; * next_token_is_func IN OUT */ -logical -consider_token (str, len, c, c_ext, cblev, parlev, is_func) +bool +consider_token (str, len, c, c_ext, cblev, parlev, is_func_or_var) register char *str; /* IN: token pointer */ register int len; /* IN: token length */ register char c; /* IN: first char after the token */ int c_ext; /* IN: C extensions mask */ int cblev; /* IN: curly brace level */ int parlev; /* IN: parenthesis level */ - logical *is_func; /* OUT: function found */ + bool *is_func_or_var; /* OUT: function or variable found */ { enum sym_type toktype = C_symtype (str, len, c_ext); @@ -1837,15 +2283,15 @@ consider_token (str, len, c, c_ext, cblev, parlev, is_func) * and constantypedefs is FALSE. */ definedef = dignorerest; - *is_func = (c == '('); - if (!*is_func && !constantypedefs) + *is_func_or_var = (c == '('); + if (!*is_func_or_var && !constantypedefs) return FALSE; else return TRUE; case dignorerest: return FALSE; default: - error ("internal error: definedef value.", 0); + error ("internal error: definedef value.", (char *)NULL); } /* @@ -1857,20 +2303,20 @@ consider_token (str, len, c, c_ext, cblev, parlev, is_func) if (toktype == st_C_typedef) { if (typedefs) - typdef = ttypedseen; - funcdef = fnone; + typdef = tkeyseen; + fvextern = FALSE; + fvdef = fvnone; return FALSE; } break; - case ttypedseen: + case tkeyseen: switch (toktype) { case st_none: case st_C_typespec: - typdef = tend; - break; case st_C_struct: case st_C_enum: + typdef = ttypeseen; break; } /* Do not return here, so the structdef stuff has a chance. */ @@ -1895,17 +2341,16 @@ consider_token (str, len, c, c_ext, cblev, parlev, is_func) * This structdef business is NOT invoked when we are ctags and the * file is plain C. This is because a struct tag may have the same * name as another tag, and this loses with ctags. - * - * This if statement deals with the typdef state machine as - * follows: if typdef==ttypedseen and token is struct/union/class/enum, - * return FALSE. All the other code here is for the structdef - * state machine. */ switch (toktype) { + case st_C_javastruct: + if (structdef == stagseen) + structdef = scolonseen; + return FALSE; case st_C_struct: case st_C_enum: - if (typdef == ttypedseen + if (typdef == tkeyseen || (typedefs_and_cplusplus && cblev == 0 && structdef == snone)) { structdef = skeyseen; @@ -1913,10 +2358,11 @@ consider_token (str, len, c, c_ext, cblev, parlev, is_func) } return FALSE; } + if (structdef == skeyseen) { - /* Save the tag for struct/union/class, for functions that may be - defined inside. */ + /* Save the tag for struct/union/class, for functions and variables + that may be defined inside. */ if (structtype == st_C_struct) structtag = savenstr (str, len); else @@ -1925,14 +2371,24 @@ consider_token (str, len, c, c_ext, cblev, parlev, is_func) return TRUE; } - /* Avoid entering funcdef stuff if typdef is going on. */ if (typdef != tnone) - { - definedef = dnone; - return FALSE; - } - - /* Detect GNU macros. */ + definedef = dnone; + + /* Detect GNU macros. + + Writers of emacs code are recommended to put the + first two args of a DEFUN on the same line. + + The DEFUN macro, used in emacs C source code, has a first arg + that is a string (the lisp function name), and a second arg that + is a C function name. Since etags skips strings, the second arg + is tagged. This is unfortunate, as it would be better to tag the + first arg. The simplest way to deal with this problem would be + to name the tag with a name built from the function name, by + removing the initial 'F' character and substituting '-' for '_'. + Anyway, this assumes that the conventions of naming lisp + functions will never change. Currently, this method is not + implemented. */ if (definedef == dnone && toktype == st_C_gnumacro) { next_token_is_func = TRUE; @@ -1941,14 +2397,12 @@ consider_token (str, len, c, c_ext, cblev, parlev, is_func) if (next_token_is_func) { next_token_is_func = FALSE; - funcdef = fignore; - *is_func = TRUE; + fvdef = fignore; + *is_func_or_var = TRUE; return TRUE; } - /* - * Detecting Objective C constructs. - */ + /* Detect Objective C constructs. */ switch (objdef) { case onone: @@ -1963,7 +2417,7 @@ consider_token (str, len, c, c_ext, cblev, parlev, is_func) } break; case oimplementation: - /* Save the class tag for functions that may be defined inside. */ + /* Save the class tag for functions or variables defined inside. */ objtag = savenstr (str, len); objdef = oinbody; return FALSE; @@ -1971,11 +2425,11 @@ consider_token (str, len, c, c_ext, cblev, parlev, is_func) /* Save the class tag for categories. */ objtag = savenstr (str, len); objdef = otagseen; - *is_func = TRUE; + *is_func_or_var = TRUE; return TRUE; case oparenseen: objdef = ocatseen; - *is_func = TRUE; + *is_func_or_var = TRUE; return TRUE; case oinbody: break; @@ -1984,9 +2438,10 @@ consider_token (str, len, c, c_ext, cblev, parlev, is_func) { objdef = omethodtag; methodlen = len; - GROW_LINEBUFFER (token_name, methodlen+1); + grow_linebuffer (&token_name, methodlen + 1); strncpy (token_name.buffer, str, len); token_name.buffer[methodlen] = '\0'; + token_name.len = methodlen; return TRUE; } return FALSE; @@ -1999,8 +2454,9 @@ consider_token (str, len, c, c_ext, cblev, parlev, is_func) { objdef = omethodtag; methodlen += len; - GROW_LINEBUFFER (token_name, methodlen+1); + grow_linebuffer (&token_name, methodlen + 1); strncat (token_name.buffer, str, len); + token_name.len = methodlen; return TRUE; } return FALSE; @@ -2018,20 +2474,40 @@ consider_token (str, len, c, c_ext, cblev, parlev, is_func) return FALSE; } - /* A function? */ + /* A function, variable or enum constant? */ switch (toktype) { + case st_C_extern: + fvextern = TRUE; + /* FALLTHRU */ case st_C_typespec: - if (funcdef != finlist && funcdef != fignore) - funcdef = fnone; /* should be useless */ + if (fvdef != finlist && fvdef != fignore && fvdef != vignore) + fvdef = fvnone; /* should be useless */ return FALSE; - default: - if (funcdef == fnone) + case st_C_ignore: + fvextern = FALSE; + fvdef = vignore; + return FALSE; + case st_C_operator: + fvdef = foperator; + *is_func_or_var = TRUE; + return TRUE; + case st_none: + if ((c_ext & C_PLPL) && strneq (str+len-10, "::operator", 10)) + { + fvdef = foperator; + *is_func_or_var = TRUE; + return TRUE; + } + if (constantypedefs && structdef == sinbody && structtype == st_C_enum) + return TRUE; + if (fvdef == fvnone) { - funcdef = ftagseen; - *is_func = TRUE; + fvdef = fvnameseen; /* function or variable */ + *is_func_or_var = TRUE; return TRUE; } + break; } return FALSE; @@ -2039,21 +2515,10 @@ consider_token (str, len, c, c_ext, cblev, parlev, is_func) /* * C_entries () - * This routine finds functions, typedefs, #define's and - * struct/union/enum definitions in C syntax and adds them - * to the list. + * This routine finds functions, variables, typedefs, + * #define's, enum constants and struct/union/enum definitions in + * C syntax and adds them to the list. */ -typedef struct -{ - logical valid; - char *str; - logical named; - int linelen; - int lineno; - long linepos; - char *buffer; -} TOKEN; - #define current_lb_is_new (newndx == curndx) #define switch_line_buffers() (curndx = 1 - curndx) @@ -2064,7 +2529,7 @@ typedef struct #define othlinepos (lbs[1-curndx].linepos) #define newlinepos (lbs[newndx].linepos) -#define CNL_SAVE_DEFINEDEF \ +#define CNL_SAVE_DEFINEDEF() \ do { \ curlinepos = charno; \ lineno++; \ @@ -2075,9 +2540,9 @@ do { \ newndx = curndx; \ } while (0) -#define CNL \ +#define CNL() \ do { \ - CNL_SAVE_DEFINEDEF; \ + CNL_SAVE_DEFINEDEF(); \ if (savetok.valid) \ { \ tok = savetok; \ @@ -2086,16 +2551,35 @@ do { \ definedef = dnone; \ } while (0) -/* Ideally this macro should never be called wihen tok.valid is FALSE, - but this would mean that the state machines always guess right. */ -#define make_tag(isfun) do \ -if (tok.valid) { \ - char *name = NULL; \ - if (CTAGS || tok.named) \ - name = savestr (token_name.buffer); \ - pfnote (name, isfun, tok.buffer, tok.linelen, tok.lineno, tok.linepos); \ - tok.valid = FALSE; \ -} while (0) + +void +make_C_tag (isfun) + bool isfun; +{ + /* This function should never be called when tok.valid is FALSE, but + we must protect against invalid input or internal errors. */ + if (tok.valid) + { + if (traditional_tag_style) + { + /* This was the original code. Now we call new_pfnote instead, + which uses the new method for naming tags (see new_pfnote). */ + char *name = NULL; + + if (CTAGS || tok.named) + name = savestr (token_name.buffer); + pfnote (name, isfun, + tok.buffer, tok.linelen, tok.lineno, tok.linepos); + } + else + new_pfnote (token_name.buffer, token_name.len, isfun, + tok.buffer, tok.linelen, tok.lineno, tok.linepos); + tok.valid = FALSE; + } + else if (DEBUG) + abort (); +} + void C_entries (c_ext, inf) @@ -2105,30 +2589,38 @@ C_entries (c_ext, inf) register char c; /* latest char read; '\0' for end of line */ register char *lp; /* pointer one beyond the character `c' */ int curndx, newndx; /* indices for current and new lb */ - TOKEN tok; /* latest token read */ register int tokoff; /* offset in line of start of current token */ register int toklen; /* length of current token */ + char *qualifier; /* string used to qualify names */ + int qlen; /* length of qualifier */ int cblev; /* current curly brace level */ int parlev; /* current parenthesis level */ - logical incomm, inquote, inchar, quotednl, midtoken; - logical cplpl; - TOKEN savetok; /* token saved during preprocessor handling */ + bool incomm, inquote, inchar, quotednl, midtoken; + bool purec, cplpl, cjava; + token savetok; /* token saved during preprocessor handling */ + tokoff = toklen = 0; /* keep compiler quiet */ curndx = newndx = 0; lineno = 0; charno = 0; lp = curlb.buffer; *lp = 0; - funcdef = fnone; typdef = tnone; structdef = snone; - definedef = dnone; objdef = onone; + fvdef = fvnone; fvextern = FALSE; typdef = tnone; + structdef = snone; definedef = dnone; objdef = onone; next_token_is_func = yacc_rules = FALSE; midtoken = inquote = inchar = incomm = quotednl = FALSE; tok.valid = savetok.valid = FALSE; cblev = 0; parlev = 0; - cplpl = c_ext & C_PLPL; + purec = !(c_ext & ~YACC); /* no extensions (apart from possibly yacc) */ + cplpl = (c_ext & C_PLPL) == C_PLPL; + cjava = (c_ext & C_JAVA) == C_JAVA; + if (cjava) + { qualifier = "."; qlen = 1; } + else + { qualifier = "::"; qlen = 2; } while (!feof (inf)) { @@ -2160,7 +2652,7 @@ C_entries (c_ext, inf) case '\0': /* Newlines inside comments do not end macro definitions in traditional cpp. */ - CNL_SAVE_DEFINEDEF; + CNL_SAVE_DEFINEDEF (); break; } continue; @@ -2176,7 +2668,7 @@ C_entries (c_ext, inf) /* Newlines inside strings do not end macro definitions in traditional cpp, even though compilers don't usually accept them. */ - CNL_SAVE_DEFINEDEF; + CNL_SAVE_DEFINEDEF (); break; } continue; @@ -2187,7 +2679,7 @@ C_entries (c_ext, inf) { case '\0': /* Hmmm, something went wrong. */ - CNL; + CNL (); /* FALLTHRU */ case '\'': inchar = FALSE; @@ -2200,13 +2692,19 @@ C_entries (c_ext, inf) { case '"': inquote = TRUE; - if (funcdef != finlist && funcdef != fignore) - funcdef = fnone; + if (fvdef != finlist && fvdef != fignore && fvdef !=vignore) + { + fvextern = FALSE; + fvdef = fvnone; + } continue; case '\'': inchar = TRUE; - if (funcdef != finlist && funcdef != fignore) - funcdef = fnone; + if (fvdef != finlist && fvdef != fignore && fvdef !=vignore) + { + fvextern = FALSE; + fvdef = fvnone; + } continue; case '/': if (*lp == '*') @@ -2227,7 +2725,7 @@ C_entries (c_ext, inf) { /* entering or exiting rules section in yacc file */ lp++; - definedef = dnone; funcdef = fnone; + definedef = dnone; fvdef = fvnone; fvextern = FALSE; typdef = tnone; structdef = snone; next_token_is_func = FALSE; midtoken = inquote = inchar = incomm = quotednl = FALSE; @@ -2241,7 +2739,7 @@ C_entries (c_ext, inf) if (definedef == dnone) { char *cp; - logical cpptoken = TRUE; + bool cpptoken = TRUE; /* Look back on this line. If all blanks, or nonblanks followed by an end of comment, this is a preprocessor @@ -2268,55 +2766,75 @@ C_entries (c_ext, inf) /* Consider token only if some complicated conditions are satisfied. */ if ((definedef != dnone || (cblev == 0 && structdef != scolonseen) - || (cblev == 1 && cplpl && structdef == sinbody)) + || (cblev == 1 && cplpl && structdef == sinbody) + || (structdef == sinbody && purec)) && typdef != tignore && definedef != dignorerest - && funcdef != finlist) + && fvdef != finlist) { if (midtoken) { if (endtoken (c)) { - if (c == ':' && cplpl && *lp == ':' && begtoken(*(lp + 1))) + bool funorvar = FALSE; + + if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1])) { /* * This handles :: in the middle, but not at the - * beginning of an identifier. + * beginning of an identifier. Also, space-separated + * :: is not recognised. */ lp += 2; - toklen += 3; + toklen += 2; + c = lp[-1]; + goto intok; } else { - logical is_func = FALSE; - if (yacc_rules || consider_token (newlb.buffer + tokoff, toklen, c, - c_ext, cblev, parlev, &is_func)) + c_ext, cblev, parlev, &funorvar)) { - if (structdef == sinbody + if (fvdef == foperator) + { + char *oldlp = lp; + lp = skip_spaces (lp-1); + if (*lp != '\0') + lp += 1; + while (*lp != '\0' + && !isspace (*lp) && *lp != '(') + lp += 1; + c = *lp++; + toklen += lp - oldlp; + } + tok.named = FALSE; + if (!purec + && funorvar && definedef == dnone - && is_func) - /* function defined in C++ class body */ + && structdef == sinbody) + /* function or var defined in C++ class body */ { - GROW_LINEBUFFER (token_name, - strlen(structtag)+2+toklen+1); + int len = strlen (structtag) + qlen + toklen; + grow_linebuffer (&token_name, len + 1); strcpy (token_name.buffer, structtag); - strcat (token_name.buffer, "::"); + strcat (token_name.buffer, qualifier); strncat (token_name.buffer, - newlb.buffer+tokoff, toklen); + newlb.buffer + tokoff, toklen); + token_name.len = len; tok.named = TRUE; } else if (objdef == ocatseen) /* Objective C category */ { - GROW_LINEBUFFER (token_name, - strlen(objtag)+2+toklen+1); + int len = strlen (objtag) + 2 + toklen; + grow_linebuffer (&token_name, len + 1); strcpy (token_name.buffer, objtag); strcat (token_name.buffer, "("); strncat (token_name.buffer, - newlb.buffer+tokoff, toklen); + newlb.buffer + tokoff, toklen); strcat (token_name.buffer, ")"); + token_name.len = len; tok.named = TRUE; } else if (objdef == omethodtag @@ -2327,17 +2845,20 @@ C_entries (c_ext, inf) } else { - GROW_LINEBUFFER (token_name, toklen+1); + grow_linebuffer (&token_name, toklen + 1); strncpy (token_name.buffer, - newlb.buffer+tokoff, toklen); + newlb.buffer + tokoff, toklen); token_name.buffer[toklen] = '\0'; - if (structdef == stagseen - || typdef == tend - || (is_func - && definedef == dignorerest)) /* macro */ - tok.named = TRUE; - else - tok.named = FALSE; + token_name.len = toklen; + /* Name macros and members. */ + tok.named = (structdef == stagseen + || typdef == ttypeseen + || typdef == tend + || (funorvar + && definedef == dignorerest) + || (funorvar + && definedef == dnone + && structdef == sinbody)); } tok.lineno = lineno; tok.linelen = tokoff + toklen + 1; @@ -2346,7 +2867,8 @@ C_entries (c_ext, inf) tok.valid = TRUE; if (definedef == dnone - && (funcdef == ftagseen + && (fvdef == fvnameseen + || fvdef == foperator || structdef == stagseen || typdef == tend || objdef != onone)) @@ -2355,12 +2877,13 @@ C_entries (c_ext, inf) switch_line_buffers (); } else - make_tag (is_func); + make_C_tag (funorvar); } midtoken = FALSE; } } /* if (endtoken (c)) */ else if (intoken (c)) + intok: { toklen++; continue; @@ -2371,20 +2894,20 @@ C_entries (c_ext, inf) switch (definedef) { case dnone: - switch (funcdef) + switch (fvdef) { case fstartlist: - funcdef = finlist; + fvdef = finlist; continue; case flistseen: - make_tag (TRUE); - funcdef = fignore; + make_C_tag (TRUE); /* a function */ + fvdef = fignore; break; - case ftagseen: - funcdef = fnone; + case fvnameseen: + fvdef = fvnone; break; } - if (structdef == stagseen) + if (structdef == stagseen && !cjava) structdef = snone; break; case dsharpseen: @@ -2412,30 +2935,32 @@ C_entries (c_ext, inf) { case otagseen: objdef = oignore; - make_tag (TRUE); + make_C_tag (TRUE); /* an Objective C class */ break; case omethodtag: case omethodparm: objdef = omethodcolon; methodlen += 1; - GROW_LINEBUFFER (token_name, methodlen+1); + grow_linebuffer (&token_name, methodlen + 1); strcat (token_name.buffer, ":"); + token_name.len = methodlen; break; } if (structdef == stagseen) structdef = scolonseen; else - switch (funcdef) + switch (fvdef) { - case ftagseen: + case fvnameseen: if (yacc_rules) { - make_tag (FALSE); - funcdef = fignore; + make_C_tag (FALSE); /* a yacc function */ + fvdef = fignore; } break; case fstartlist: - funcdef = fnone; + fvextern = FALSE; + fvdef = fvnone; break; } break; @@ -2446,14 +2971,30 @@ C_entries (c_ext, inf) switch (typdef) { case tend: - make_tag (FALSE); + make_C_tag (FALSE); /* a typedef */ /* FALLTHRU */ default: typdef = tnone; } - if (funcdef != fignore) + switch (fvdef) { - funcdef = fnone; + case fignore: + break; + case fvnameseen: + if ((members && cblev == 1) + || (globals && cblev == 0 && (!fvextern || declarations))) + make_C_tag (FALSE); /* a variable */ + fvextern = FALSE; + fvdef = fvnone; + tok.valid = FALSE; + break; + case flistseen: + if (declarations && (cblev == 0 || cblev == 1)) + make_C_tag (TRUE); /* a function declaration */ + /* FALLTHRU */ + default: + fvextern = FALSE; + fvdef = fvnone; /* The following instruction invalidates the token. Probably the token should be invalidated in all other cases where some state machine is reset. */ @@ -2469,12 +3010,25 @@ C_entries (c_ext, inf) { case omethodtag: case omethodparm: - make_tag (TRUE); + make_C_tag (TRUE); /* an Objective C method */ objdef = oinbody; break; } - if (funcdef != finlist && funcdef != fignore) - funcdef = fnone; + switch (fvdef) + { + case foperator: + case finlist: + case fignore: + case vignore: + break; + case fvnameseen: + if ((members && cblev == 1) + || (globals && cblev == 0 && (!fvextern || declarations))) + make_C_tag (FALSE); /* a variable */ + break; + default: + fvdef = fvnone; + } if (structdef == stagseen) structdef = snone; break; @@ -2484,11 +3038,24 @@ C_entries (c_ext, inf) if (cblev == 0 && typdef == tend) { typdef = tignore; - make_tag (FALSE); + make_C_tag (FALSE); /* a typedef */ break; } - if (funcdef != finlist && funcdef != fignore) - funcdef = fnone; + switch (fvdef) + { + case foperator: + case finlist: + case fignore: + case vignore: + break; + case fvnameseen: + if ((members && cblev == 1) + || (globals && cblev == 0 && (!fvextern || declarations))) + make_C_tag (FALSE); /* a variable */ + /* FALLTHRU */ + default: + fvdef = fvnone; + } if (structdef == stagseen) structdef = snone; break; @@ -2497,29 +3064,25 @@ C_entries (c_ext, inf) break; if (objdef == otagseen && parlev == 0) objdef = oparenseen; - switch (funcdef) + switch (fvdef) { - case fnone: - switch (typdef) + case fvnameseen: + if (typdef == ttypeseen + && tok.valid + && *lp != '*' + && structdef != sinbody) { - case ttypedseen: - case tend: - /* Make sure that the next char is not a '*'. - This handles constructs like: + /* This handles constructs like: typedef void OperatorFun (int fun); */ - if (*lp != '*') - { - typdef = tignore; - make_tag (FALSE); - } - break; - } /* switch (typdef) */ - break; - case ftagseen: - funcdef = fstartlist; + make_C_tag (FALSE); + typdef = tignore; + } + /* FALLTHRU */ + case foperator: + fvdef = fstartlist; break; case flistseen: - funcdef = finlist; + fvdef = finlist; break; } parlev++; @@ -2529,22 +3092,22 @@ C_entries (c_ext, inf) break; if (objdef == ocatseen && parlev == 1) { - make_tag (TRUE); + make_C_tag (TRUE); /* an Objective C category */ objdef = oignore; } if (--parlev == 0) { - switch (funcdef) + switch (fvdef) { case fstartlist: case finlist: - funcdef = flistseen; + fvdef = flistseen; break; } - if (cblev == 0 && typdef == tend) + if (cblev == 0 && (typdef == tend)) { typdef = tignore; - make_tag (FALSE); + make_C_tag (FALSE); /* a typedef */ } } else if (parlev < 0) /* can happen due to ill-conceived #if's. */ @@ -2553,42 +3116,42 @@ C_entries (c_ext, inf) case '{': if (definedef != dnone) break; - if (typdef == ttypedseen) + if (typdef == ttypeseen) typdef = tinbody; switch (structdef) { case skeyseen: /* unnamed struct */ - structtag = "_anonymous_"; structdef = sinbody; + structtag = "_anonymous_"; break; case stagseen: case scolonseen: /* named struct */ structdef = sinbody; - make_tag (FALSE); + make_C_tag (FALSE); /* a struct */ break; } - switch (funcdef) + switch (fvdef) { case flistseen: - make_tag (TRUE); + make_C_tag (TRUE); /* a function */ /* FALLTHRU */ case fignore: - funcdef = fnone; + fvdef = fvnone; break; - case fnone: + case fvnone: switch (objdef) { case otagseen: - make_tag (TRUE); + make_C_tag (TRUE); /* an Objective C class */ objdef = oignore; break; case omethodtag: case omethodparm: - make_tag (TRUE); + make_C_tag (TRUE); /* an Objective C method */ objdef = oinbody; break; default: - /* Neutralize `extern "C" {' grot and look inside structs. */ + /* Neutralize `extern "C" {' grot. */ if (cblev == 0 && structdef == snone && typdef == tnone) cblev = -1; } @@ -2598,8 +3161,8 @@ C_entries (c_ext, inf) case '*': if (definedef != dnone) break; - if (funcdef == fstartlist) - funcdef = fnone; /* avoid tagging `foo' in `foo (*bar()) ()' */ + if (fvdef == fstartlist) + fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */ break; case '}': if (definedef != dnone) @@ -2627,6 +3190,25 @@ C_entries (c_ext, inf) structtag = ""; } break; + case '=': + if (definedef != dnone) + break; + switch (fvdef) + { + case foperator: + case finlist: + case fignore: + case vignore: + break; + case fvnameseen: + if ((members && cblev == 1) + || (globals && cblev == 0 && (!fvextern || declarations))) + make_C_tag (FALSE); /* a variable */ + /* FALLTHRU */ + default: + fvdef = vignore; + } + break; case '+': case '-': if (objdef == oinbody && cblev == 0) @@ -2635,25 +3217,33 @@ C_entries (c_ext, inf) break; } /* FALLTHRU */ - case '=': case '#': case '~': case '&': case '%': case '/': - case '|': case '^': case '!': case '<': case '>': case '.': case '?': + case '#': case '~': case '&': case '%': case '/': case '|': + case '^': case '!': case '<': case '>': case '.': case '?': case ']': if (definedef != dnone) break; - /* These surely cannot follow a function tag. */ - if (funcdef != finlist && funcdef != fignore) - funcdef = fnone; + /* These surely cannot follow a function tag in C. */ + switch (fvdef) + { + case foperator: + case finlist: + case fignore: + case vignore: + break; + default: + fvdef = fvnone; + } break; case '\0': if (objdef == otagseen) { - make_tag (TRUE); + make_C_tag (TRUE); /* an Objective C class */ objdef = oignore; } /* If a macro spans multiple lines don't reset its state. */ if (quotednl) - CNL_SAVE_DEFINEDEF; + CNL_SAVE_DEFINEDEF (); else - CNL; + CNL (); break; } /* switch (c) */ @@ -2687,6 +3277,14 @@ Cplusplus_entries (inf) C_entries (C_PLPL, inf); } +/* Always do Java. */ +void +Cjava_entries (inf) + FILE *inf; +{ + C_entries (C_JAVA, inf); +} + /* Always do C*. */ void Cstar_entries (inf) @@ -2703,19 +3301,43 @@ Yacc_entries (inf) C_entries (YACC, inf); } -/* Fortran parsing */ +/* A useful macro. */ +#define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \ + for (lineno = charno = 0; /* loop initialization */ \ + !feof (file_pointer) /* loop test */ \ + && (lineno++, /* instructions at start of loop */ \ + linecharno = charno, \ + charno += readline (&line_buffer, file_pointer), \ + char_pointer = lb.buffer, \ + TRUE); \ + ) -char *dbp; -logical +/* + * Read a file, but do no processing. This is used to do regexp + * matching on files that have no language defined. + */ +void +just_read_file (inf) + FILE *inf; +{ + register char *dummy; + + LOOP_ON_INPUT_LINES (inf, lb, dummy) + continue; +} + +/* Fortran parsing */ + +bool tail (cp) char *cp; { register int len = 0; - while (*cp && lowcase(*cp) == lowcase(dbp[len])) + while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len])) cp++, len++; - if (*cp == '\0' && !intoken(dbp[len])) + if (*cp == '\0' && !intoken (dbp[len])) { dbp += len; return TRUE; @@ -2726,13 +3348,11 @@ tail (cp) void takeprec () { - while (isspace (*dbp)) - dbp++; + dbp = skip_spaces (dbp); if (*dbp != '*') return; dbp++; - while (isspace (*dbp)) - dbp++; + dbp = skip_spaces (dbp); if (strneq (dbp, "(*)", 3)) { dbp += 3; @@ -2754,8 +3374,7 @@ getit (inf) { register char *cp; - while (isspace (*dbp)) - dbp++; + dbp = skip_spaces (dbp); if (*dbp == '\0') { lineno++; @@ -2765,39 +3384,26 @@ getit (inf) if (dbp[5] != '&') return; dbp += 6; - while (isspace (*dbp)) - dbp++; + dbp = skip_spaces (dbp); } - if (!isalpha (*dbp) - && *dbp != '_' - && *dbp != '$') + if (!isalpha (*dbp) && *dbp != '_' && *dbp != '$') return; - for (cp = dbp + 1; - (*cp - && (isalpha (*cp) || isdigit (*cp) || (*cp == '_') || (*cp == '$'))); - cp++) + for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++) continue; - pfnote ((CTAGS) ? savenstr (dbp, cp-dbp) : NULL, TRUE, + pfnote (savenstr (dbp, cp-dbp), TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno); } + void Fortran_functions (inf) FILE *inf; { - lineno = 0; - charno = 0; - - while (!feof (inf)) + LOOP_ON_INPUT_LINES (inf, lb, dbp) { - lineno++; - linecharno = charno; - charno += readline (&lb, inf); - dbp = lb.buffer; if (*dbp == '%') dbp++; /* Ratfor escape to fortran */ - while (isspace (*dbp)) - dbp++; + dbp = skip_spaces (dbp); if (*dbp == '\0') continue; switch (lowcase (*dbp)) @@ -2821,8 +3427,7 @@ Fortran_functions (inf) case 'd': if (tail ("double")) { - while (isspace (*dbp)) - dbp++; + dbp = skip_spaces (dbp); if (*dbp == '\0') continue; if (tail ("precision")) @@ -2831,8 +3436,7 @@ Fortran_functions (inf) } break; } - while (isspace (*dbp)) - dbp++; + dbp = skip_spaces (dbp); if (*dbp == '\0') continue; switch (lowcase (*dbp)) @@ -2849,19 +3453,186 @@ Fortran_functions (inf) if (tail ("entry")) getit (inf); continue; - case 'p': - if (tail ("program")) + case 'b': + if (tail ("blockdata") || tail ("block data")) { - getit (inf); - continue; + dbp = skip_spaces (dbp); + if (*dbp == '\0') /* assume un-named */ + pfnote (savestr ("blockdata"), TRUE, + lb.buffer, dbp - lb.buffer, lineno, linecharno); + else + getit (inf); /* look for name */ } - if (tail ("procedure")) - getit (inf); continue; } } } +/* + * Philippe Waroquiers , 1998-04-24 + * Ada parsing + */ +/* Once we are positioned after an "interesting" keyword, let's get + the real tag value necessary. */ +void +adagetit (inf, name_qualifier) + FILE *inf; + char *name_qualifier; +{ + register char *cp; + char *name; + char c; + + while (!feof (inf)) + { + dbp = skip_spaces (dbp); + if (*dbp == '\0' + || (dbp[0] == '-' && dbp[1] == '-')) + { + lineno++; + linecharno = charno; + charno += readline (&lb, inf); + dbp = lb.buffer; + } + switch (*dbp) + { + case 'b': + case 'B': + if (tail ("body")) + { + /* Skipping body of procedure body or package body or .... + resetting qualifier to body instead of spec. */ + name_qualifier = "/b"; + continue; + } + break; + case 't': + case 'T': + /* Skipping type of task type or protected type ... */ + if (tail ("type")) + continue; + break; + } + if (*dbp == '"') + { + dbp += 1; + for (cp = dbp; *cp != '\0' && *cp != '"'; cp++) + continue; + } + else + { + dbp = skip_spaces (dbp); + for (cp = dbp; + (*cp != '\0' + && (isalpha (*cp) || isdigit (*cp) || *cp == '_' || *cp == '.')); + cp++) + continue; + if (cp == dbp) + return; + } + c = *cp; + *cp = '\0'; + name = concat (dbp, name_qualifier, ""); + *cp = c; + pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno); + if (c == '"') + dbp = cp + 1; + return; + } +} + +void +Ada_funcs (inf) + FILE *inf; +{ + bool inquote = FALSE; + + LOOP_ON_INPUT_LINES (inf, lb, dbp) + { + while (*dbp != '\0') + { + /* Skip a string i.e. "abcd". */ + if (inquote || (*dbp == '"')) + { + dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"'); + if (dbp != NULL) + { + inquote = FALSE; + dbp += 1; + continue; /* advance char */ + } + else + { + inquote = TRUE; + break; /* advance line */ + } + } + + /* Skip comments. */ + if (dbp[0] == '-' && dbp[1] == '-') + break; /* advance line */ + + /* Skip character enclosed in single quote i.e. 'a' + and skip single quote starting an attribute i.e. 'Image. */ + if (*dbp == '\'') + { + dbp++ ; + if (*dbp != '\0') + dbp++; + continue; + } + + /* Search for beginning of a token. */ + if (!begtoken (*dbp)) + { + dbp++; + continue; /* advance char */ + } + + /* We are at the beginning of a token. */ + switch (*dbp) + { + case 'f': + case 'F': + if (!packages_only && tail ("function")) + adagetit (inf, "/f"); + else + break; /* from switch */ + continue; /* advance char */ + case 'p': + case 'P': + if (!packages_only && tail ("procedure")) + adagetit (inf, "/p"); + else if (tail ("package")) + adagetit (inf, "/s"); + else if (tail ("protected")) /* protected type */ + adagetit (inf, "/t"); + else + break; /* from switch */ + continue; /* advance char */ + case 't': + case 'T': + if (!packages_only && tail ("task")) + adagetit (inf, "/k"); + else if (typedefs && !packages_only && tail ("type")) + { + adagetit (inf, "/t"); + while (*dbp != '\0') + dbp += 1; + } + else + break; /* from switch */ + continue; /* advance char */ + } + + /* Look for the end of the token. */ + while (!endtoken (*dbp)) + dbp++; + + } /* advance char */ + } /* advance line */ +} + /* * Bob Weiner, Motorola Inc., 4/3/94 * Unix and microcontroller assembly tag handling @@ -2873,16 +3644,8 @@ Asm_labels (inf) { register char *cp; - lineno = 0; - charno = 0; - - while (!feof (inf)) + LOOP_ON_INPUT_LINES (inf, lb, cp) { - lineno++; - linecharno = charno; - charno += readline (&lb, inf); - cp = lb.buffer; - /* If first char is alphabetic or one of [_.$], test for colon following identifier. */ if (isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$') @@ -2894,7 +3657,7 @@ Asm_labels (inf) if (*cp == ':' || isspace (*cp)) { /* Found end of label, so copy it and add it to the table. */ - pfnote ((CTAGS) ? savenstr(lb.buffer, cp-lb.buffer) : NULL, TRUE, + pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno); } } @@ -2903,7 +3666,9 @@ Asm_labels (inf) /* * Perl support by Bart Robinson + * enhanced by Michael Ernst * Perl sub names: look for /^sub[ \t\n]+[^ \t\n{]+/ + * Perl variable names: /^(my|local).../ */ void Perl_functions (inf) @@ -2911,32 +3676,134 @@ Perl_functions (inf) { register char *cp; - lineno = 0; - charno = 0; - - while (!feof (inf)) + LOOP_ON_INPUT_LINES (inf, lb, cp) { - lineno++; - linecharno = charno; - charno += readline (&lb, inf); - cp = lb.buffer; - - if (*cp++ == 's' && *cp++ == 'u' && *cp++ == 'b' && isspace(*cp++)) + if (*cp++ == 's' + && *cp++ == 'u' + && *cp++ == 'b' && isspace (*cp++)) { - while (*cp && isspace(*cp)) - cp++; - while (*cp && ! isspace(*cp) && *cp != '{') - cp++; - pfnote ((CTAGS) ? savenstr (lb.buffer, cp-lb.buffer) : NULL, TRUE, - lb.buffer, cp - lb.buffer + 1, lineno, linecharno); - } - } -} - -/* Added by Mosur Mohan, 4/22/88 */ -/* Pascal parsing */ - -/* + cp = skip_spaces (cp); + if (*cp != '\0') + { + char *sp = cp; + while (*cp != '\0' + && !isspace (*cp) && *cp != '{' && *cp != '(') + cp++; + pfnote (savenstr (sp, cp-sp), TRUE, + lb.buffer, cp - lb.buffer + 1, lineno, linecharno); + } + } + else if (globals /* only if tagging global vars is enabled */ + && ((cp = lb.buffer, + *cp++ == 'm' + && *cp++ == 'y') + || (cp = lb.buffer, + *cp++ == 'l' + && *cp++ == 'o' + && *cp++ == 'c' + && *cp++ == 'a' + && *cp++ == 'l')) + && (*cp == '(' || isspace (*cp))) + { + /* After "my" or "local", but before any following paren or space. */ + char *varname = NULL; + + cp = skip_spaces (cp); + if (*cp == '$' || *cp == '@' || *cp == '%') + { + char* varstart = ++cp; + while (isalnum (*cp) || *cp == '_') + cp++; + varname = savenstr (varstart, cp-varstart); + } + else + { + /* Should be examining a variable list at this point; + could insist on seeing an open parenthesis. */ + while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')') + cp++; + } + + /* Perhaps I should back cp up one character, so the TAGS table + doesn't mention (and so depend upon) the following char. */ + pfnote ((CTAGS) ? savenstr (lb.buffer, cp-lb.buffer) : varname, + FALSE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno); + } + } +} + +/* + * Python support by Eric S. Raymond + * Look for /^def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/ + */ +void +Python_functions (inf) + FILE *inf; +{ + register char *cp; + + LOOP_ON_INPUT_LINES (inf, lb, cp) + { + if (*cp++ == 'd' + && *cp++ == 'e' + && *cp++ == 'f' && isspace (*cp++)) + { + cp = skip_spaces (cp); + while (*cp != '\0' && !isspace (*cp) && *cp != '(' && *cp != ':') + cp++; + pfnote (NULL, TRUE, + lb.buffer, cp - lb.buffer + 1, lineno, linecharno); + } + + cp = lb.buffer; + if (*cp++ == 'c' + && *cp++ == 'l' + && *cp++ == 'a' + && *cp++ == 's' + && *cp++ == 's' && isspace (*cp++)) + { + cp = skip_spaces (cp); + while (*cp != '\0' && !isspace (*cp) && *cp != '(' && *cp != ':') + cp++; + pfnote (NULL, TRUE, + lb.buffer, cp - lb.buffer + 1, lineno, linecharno); + } + } +} + +/* Idea by Corny de Souza + * Cobol tag functions + * We could look for anything that could be a paragraph name. + * i.e. anything that starts in column 8 is one word and ends in a full stop. + */ +void +Cobol_paragraphs (inf) + FILE *inf; +{ + register char *bp, *ep; + + LOOP_ON_INPUT_LINES (inf, lb, bp) + { + if (lb.len < 9) + continue; + bp += 8; + + /* If eoln, compiler option or comment ignore whole line. */ + if (bp[-1] != ' ' || !isalnum (bp[0])) + continue; + + for (ep = bp; isalnum (*ep) || *ep == '-'; ep++) + continue; + if (*ep++ == '.') + pfnote (savenstr (bp, ep-bp), TRUE, + lb.buffer, ep - lb.buffer + 1, lineno, linecharno); + } +} + +/* Added by Mosur Mohan, 4/22/88 */ +/* Pascal parsing */ + +/* * Locates tags for procedures & functions. Doesn't do any type- or * var-definitions. It does look for the keyword "extern" or * "forward" immediately following the procedure statement; if found, @@ -2946,12 +3813,12 @@ void Pascal_functions (inf) FILE *inf; { - struct linebuffer tline; /* mostly copied from C_entries */ + linebuffer tline; /* mostly copied from C_entries */ long save_lcno; int save_lineno, save_len; char c, *cp, *namebuf; - logical /* each of these flags is TRUE iff: */ + bool /* each of these flags is TRUE iff: */ incomment, /* point is inside a comment */ inquote, /* point is inside '..' string */ get_tagname, /* point is after PROCEDURE/FUNCTION @@ -2963,11 +3830,12 @@ Pascal_functions (inf) is a FORWARD/EXTERN to be ignored, or whether it is a real tag */ + save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */ + namebuf = NULL; /* keep compiler quiet */ lineno = 0; charno = 0; dbp = lb.buffer; *dbp = '\0'; - save_len = 0; initbuffer (&tline); incomment = inquote = FALSE; @@ -2976,8 +3844,8 @@ Pascal_functions (inf) inparms = FALSE; /* found '(' after "proc" */ verify_tag = FALSE; /* check if "extern" is ahead */ - /* long main loop to get next char */ - while (!feof (inf)) + + while (!feof (inf)) /* long main loop to get next char */ { c = *dbp++; if (c == '\0') /* if end of line */ @@ -2988,8 +3856,8 @@ Pascal_functions (inf) dbp = lb.buffer; if (*dbp == '\0') continue; - if (!((found_tag && verify_tag) || - get_tagname)) + if (!((found_tag && verify_tag) + || get_tagname)) c = *dbp++; /* only if don't need *dbp pointing to the beginning of the name of the procedure or function */ @@ -3077,15 +3945,15 @@ Pascal_functions (inf) continue; /* save all values for later tagging */ - GROW_LINEBUFFER (tline, strlen (lb.buffer) + 1); + grow_linebuffer (&tline, lb.len + 1); strcpy (tline.buffer, lb.buffer); save_lineno = lineno; save_lcno = linecharno; /* grab block name */ - for (cp = dbp + 1; *cp && (!endtoken (*cp)); cp++) + for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++) continue; - namebuf = (CTAGS) ? savenstr (dbp, cp-dbp) : NULL; + namebuf = savenstr (dbp, cp-dbp); dbp = cp; /* set dbp to e-o-token */ save_len = dbp - lb.buffer + 1; get_tagname = FALSE; @@ -3131,12 +3999,12 @@ int L_isquote (strp) register char *strp; { - return ((*(++strp) == 'q' || *strp == 'Q') - && (*(++strp) == 'u' || *strp == 'U') - && (*(++strp) == 'o' || *strp == 'O') - && (*(++strp) == 't' || *strp == 'T') - && (*(++strp) == 'e' || *strp == 'E') - && isspace(*(++strp))); + return ((*++strp == 'q' || *strp == 'Q') + && (*++strp == 'u' || *strp == 'U') + && (*++strp == 'o' || *strp == 'O') + && (*++strp == 't' || *strp == 'T') + && (*++strp == 'e' || *strp == 'E') + && isspace (*++strp)); } void @@ -3146,20 +4014,23 @@ L_getit () if (*dbp == '\'') /* Skip prefix quote */ dbp++; - else if (*dbp == '(' && L_isquote (dbp)) /* Skip "(quote " */ + else if (*dbp == '(') { - dbp += 7; - while (isspace(*dbp)) - dbp++; + if (L_isquote (dbp)) + dbp += 7; /* Skip "(quote " */ + else + dbp += 1; /* Skip "(" before name in (defstruct (foo)) */ + dbp = skip_spaces (dbp); } + for (cp = dbp /*+1*/; - *cp && *cp != '(' && *cp != ' ' && *cp != ')'; + *cp != '\0' && *cp != '(' && !isspace(*cp) && *cp != ')'; cp++) continue; if (cp == dbp) return; - pfnote ((CTAGS) ? savenstr (dbp, cp-dbp) : NULL, TRUE, + pfnote (savenstr (dbp, cp-dbp), TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno); } @@ -3167,23 +4038,14 @@ void Lisp_functions (inf) FILE *inf; { - lineno = 0; - charno = 0; - - while (!feof (inf)) + LOOP_ON_INPUT_LINES (inf, lb, dbp) { - lineno++; - linecharno = charno; - charno += readline (&lb, inf); - dbp = lb.buffer; if (dbp[0] == '(') { if (L_isdef (dbp)) { - while (!isspace (*dbp)) - dbp++; - while (isspace (*dbp)) - dbp++; + dbp = skip_non_spaces (dbp); + dbp = skip_spaces (dbp); L_getit (); } else @@ -3191,7 +4053,7 @@ Lisp_functions (inf) /* Check for (foo::defmumble name-defined ... */ do dbp++; - while (*dbp && !isspace (*dbp) + while (*dbp != '\0' && !isspace (*dbp) && *dbp != ':' && *dbp != '(' && *dbp != ')'); if (*dbp == ':') { @@ -3201,10 +4063,8 @@ Lisp_functions (inf) if (L_isdef (dbp - 1)) { - while (!isspace (*dbp)) - dbp++; - while (isspace (*dbp)) - dbp++; + dbp = skip_non_spaces (dbp); + dbp = skip_spaces (dbp); L_getit (); } } @@ -3213,6 +4073,40 @@ Lisp_functions (inf) } } +/* + * Postscript tag functions + * Just look for lines where the first character is '/' + * Richard Mlynarik + * Also look at "defineps" for PSWrap + * suggested by Masatake YAMATO + */ +void +Postscript_functions (inf) + FILE *inf; +{ + register char *bp, *ep; + + LOOP_ON_INPUT_LINES (inf, lb, bp) + { + if (bp[0] == '/') + { + for (ep = bp+1; + *ep != '\0' && *ep != ' ' && *ep != '{'; + ep++) + continue; + pfnote (savenstr (bp, ep-bp), TRUE, + lb.buffer, ep - lb.buffer + 1, lineno, linecharno); + } + else if (strneq (bp, "defineps", 8)) + { + bp = skip_non_spaces (bp); + bp = skip_spaces (bp); + get_tag (bp); + } + } +} + + /* * Scheme tag functions * look for (def... xyzzy @@ -3221,65 +4115,38 @@ Lisp_functions (inf) * look for (set! xyzzy */ -void get_scheme (); - void Scheme_functions (inf) FILE *inf; { - lineno = 0; - charno = 0; + register char *bp; - while (!feof (inf)) + LOOP_ON_INPUT_LINES (inf, lb, bp) { - lineno++; - linecharno = charno; - charno += readline (&lb, inf); - dbp = lb.buffer; - if (dbp[0] == '(' && - (dbp[1] == 'D' || dbp[1] == 'd') && - (dbp[2] == 'E' || dbp[2] == 'e') && - (dbp[3] == 'F' || dbp[3] == 'f')) + if (bp[0] == '(' + && (bp[1] == 'D' || bp[1] == 'd') + && (bp[2] == 'E' || bp[2] == 'e') + && (bp[3] == 'F' || bp[3] == 'f')) { - while (!isspace (*dbp)) - dbp++; + bp = skip_non_spaces (bp); /* Skip over open parens and white space */ - while (*dbp && (isspace (*dbp) || *dbp == '(')) - dbp++; - get_scheme (); + while (isspace (*bp) || *bp == '(') + bp++; + get_tag (bp); } - if (dbp[0] == '(' && - (dbp[1] == 'S' || dbp[1] == 's') && - (dbp[2] == 'E' || dbp[2] == 'e') && - (dbp[3] == 'T' || dbp[3] == 't') && - (dbp[4] == '!' || dbp[4] == '!') && - (isspace (dbp[5]))) + if (bp[0] == '(' + && (bp[1] == 'S' || bp[1] == 's') + && (bp[2] == 'E' || bp[2] == 'e') + && (bp[3] == 'T' || bp[3] == 't') + && (bp[4] == '!' || bp[4] == '!') + && (isspace (bp[5]))) { - while (!isspace (*dbp)) - dbp++; - /* Skip over white space */ - while (isspace (*dbp)) - dbp++; - get_scheme (); + bp = skip_non_spaces (bp); + bp = skip_spaces (bp); + get_tag (bp); } } } - -void -get_scheme () -{ - register char *cp; - - if (*dbp == '\0') - return; - /* Go till you get to white space or a syntactic break */ - for (cp = dbp + 1; - *cp && *cp != '(' && *cp != ')' && !isspace (*cp); - cp++) - continue; - pfnote ((CTAGS) ? savenstr (dbp, cp-dbp) : NULL, TRUE, - lb.buffer, cp - lb.buffer + 1, lineno, linecharno); -} /* Find tags in TeX and LaTeX input files. */ @@ -3304,9 +4171,6 @@ char *TEX_defenv = "\ void TEX_mode (); struct TEX_tabent *TEX_decode_env (); int TEX_Token (); -#if TeX_named_tokens -void TEX_getit (); -#endif char TEX_esc = '\\'; char TEX_opgrp = '{'; @@ -3319,10 +4183,8 @@ void TeX_functions (inf) FILE *inf; { - char *lasthit; - - lineno = 0; - charno = 0; + char *cp, *lasthit; + register int i; /* Select either \ or ! as escape character. */ TEX_mode (inf); @@ -3331,30 +4193,28 @@ TeX_functions (inf) if (!TEX_toktab) TEX_toktab = TEX_decode_env ("TEXTAGS", TEX_defenv); - while (!feof (inf)) - { /* Scan each line in file */ - lineno++; - linecharno = charno; - charno += readline (&lb, inf); - dbp = lb.buffer; - lasthit = dbp; - while (dbp = etags_strchr (dbp, TEX_esc)) /* Look at each esc in line */ + LOOP_ON_INPUT_LINES (inf, lb, cp) + { + lasthit = cp; + /* Look at each esc in line. */ + while ((cp = etags_strchr (cp, TEX_esc)) != NULL) { - register int i; - - if (!*(++dbp)) + if (*++cp == '\0') break; - linecharno += dbp - lasthit; - lasthit = dbp; + linecharno += cp - lasthit; + lasthit = cp; i = TEX_Token (lasthit); - if (0 <= i) + if (i >= 0) { - pfnote (NULL, TRUE, - lb.buffer, strlen (lb.buffer), lineno, linecharno); -#if TeX_named_tokens - TEX_getit (lasthit, TEX_toktab[i].len); -#endif - break; /* We only save a line once */ + /* We seem to include the TeX command in the tag name. + register char *p; + for (p = lasthit + TEX_toktab[i].len; + *p != '\0' && *p != TEX_clgrp; + p++) + continue; */ + pfnote (/*savenstr (lasthit, p-lasthit)*/ (char *)NULL, TRUE, + lb.buffer, lb.len, lineno, linecharno); + break; /* We only tag a line once */ } } } @@ -3394,6 +4254,8 @@ TEX_mode (inf) TEX_opgrp = '<'; TEX_clgrp = '>'; } + /* If the input file is compressed, inf is a pipe, and rewind may fail. + No attempt is made to correct the situation. */ rewind (inf); } @@ -3414,11 +4276,14 @@ TEX_decode_env (evarname, defenv) if (!env) env = defenv; else - env = concat (env, defenv, ""); + { + char *oldenv = env; + env = concat (oldenv, defenv, ""); + } /* Allocate a token table */ for (size = 1, p = env; p;) - if ((p = etags_strchr (p, ':')) && *(++p)) + if ((p = etags_strchr (p, ':')) && *++p != '\0') size++; /* Add 1 to leave room for null terminator. */ tab = xnew (size + 1, struct TEX_tabent); @@ -3448,32 +4313,10 @@ TEX_decode_env (evarname, defenv) return tab; } -#if TeX_named_tokens -/* Record a tag defined by a TeX command of length LEN and starting at NAME. - The name being defined actually starts at (NAME + LEN + 1). - But we seem to include the TeX command in the tag name. */ -void -TEX_getit (name, len) - char *name; - int len; -{ - char *p = name + len; - - if (*name == '\0') - return; - - /* Let tag name extend to next group close (or end of line) */ - while (*p && *p != TEX_clgrp) - p++; - pfnote (savenstr (name, p-name), TRUE, - lb.buffer, strlen (lb.buffer), lineno, linecharno); -} -#endif - /* If the text at CP matches one of the tag-defining TeX command names, return the pointer to the first occurrence of that command in TEX_toktab. Otherwise return -1. - Keep the capital `T' in `Token' for dumb truncating compilers + Keep the capital `T' in `token' for dumb truncating compilers (this distinguishes it from `TEX_toktab' */ int TEX_Token (cp) @@ -3491,16 +4334,17 @@ TEX_Token (cp) * Prolog support (rewritten) by Anders Lindgren, Mar. 96 * * Assumes that the predicate starts at column 0. - * Only the first clause of a predicate is added. + * Only the first clause of a predicate is added. */ +int prolog_pred (); +void prolog_skip_comment (); +int prolog_atom (); + void Prolog_functions (inf) FILE *inf; { - int prolog_pred (); - void prolog_skip_comment (); - - char * last; + char *cp, *last; int len; int allocated; @@ -3508,32 +4352,24 @@ Prolog_functions (inf) len = 0; last = NULL; - lineno = 0; - linecharno = 0; - charno = 0; - - while (!feof (inf)) + LOOP_ON_INPUT_LINES (inf, lb, cp) { - lineno++; - linecharno += charno; - charno = readline (&lb, inf); - dbp = lb.buffer; - if (dbp[0] == '\0') /* Empty line */ + if (cp[0] == '\0') /* Empty line */ continue; - else if (isspace (dbp[0])) /* Not a predicate */ + else if (isspace (cp[0])) /* Not a predicate */ continue; - else if (dbp[0] == '/' && dbp[1] == '*') /* comment. */ - prolog_skip_comment (&lb, inf, &lineno, &linecharno); - else if (len = prolog_pred (dbp, last)) + else if (cp[0] == '/' && cp[1] == '*') /* comment. */ + prolog_skip_comment (&lb, inf); + else if ((len = prolog_pred (cp, last)) > 0) { /* Predicate. Store the function name so that we only - * generates a tag for the first clause. */ + generate a tag for the first clause. */ if (last == NULL) last = xnew(len + 1, char); else if (len + 1 > allocated) - last = (char *) xrealloc(last, len + 1); + last = xrnew (last, len + 1, char); allocated = len + 1; - strncpy (last, dbp, len); + strncpy (last, cp, len); last[len] = '\0'; } } @@ -3542,7 +4378,7 @@ Prolog_functions (inf) void prolog_skip_comment (plb, inf) - struct linebuffer *plb; + linebuffer *plb; FILE *inf; { char *cp; @@ -3573,18 +4409,15 @@ prolog_pred (s, last) char *s; char *last; /* Name of last clause. */ { - int prolog_atom(); - int prolog_white(); - int pos; int len; - pos = prolog_atom(s, 0); + pos = prolog_atom (s, 0); if (pos < 1) return 0; len = pos; - pos += prolog_white(s, pos); + pos = skip_spaces (s + pos) - s; if ((s[pos] == '(') || (s[pos] == '.')) { @@ -3592,12 +4425,11 @@ prolog_pred (s, last) pos++; /* Save only the first clause. */ - if ((last == NULL) || - (len != strlen(last)) || - (strncmp(s, last, len) != 0)) + if (last == NULL + || len != (int)strlen (last) + || !strneq (s, last, len)) { - pfnote ((CTAGS) ? savenstr (s, len) : NULL, TRUE, - s, pos, lineno, linecharno); + pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno); return len; } } @@ -3636,7 +4468,7 @@ prolog_atom (s, pos) { pos++; - while (1) + while (1) { if (s[pos] == '\'') { @@ -3662,38 +4494,23 @@ prolog_atom (s, pos) else return -1; } - -/* Consume whitespace. Return the number of bytes eaten. */ -int -prolog_white (s, pos) - char *s; - int pos; -{ - int origpos; - - origpos = pos; - - while (isspace(s[pos])) - pos++; - - return pos - origpos; -} -/* +/* * Support for Erlang -- Anders Lindgren, Feb 1996. * * Generates tags for functions, defines, and records. * * Assumes that Erlang functions start at column 0. */ +int erlang_func (); +void erlang_attribute (); +int erlang_atom (); + void Erlang_functions (inf) FILE *inf; { - int erlang_func (); - void erlang_attribute (); - - char * last; + char *cp, *last; int len; int allocated; @@ -3701,41 +4518,33 @@ Erlang_functions (inf) len = 0; last = NULL; - lineno = 0; - linecharno = 0; - charno = 0; - - while (!feof (inf)) + LOOP_ON_INPUT_LINES (inf, lb, cp) { - lineno++; - linecharno += charno; - charno = readline (&lb, inf); - dbp = lb.buffer; - if (dbp[0] == '\0') /* Empty line */ + if (cp[0] == '\0') /* Empty line */ continue; - else if (isspace (dbp[0])) /* Not function nor attribute */ + else if (isspace (cp[0])) /* Not function nor attribute */ continue; - else if (dbp[0] == '%') /* comment */ + else if (cp[0] == '%') /* comment */ continue; - else if (dbp[0] == '"') /* Sometimes, strings start in column one */ + else if (cp[0] == '"') /* Sometimes, strings start in column one */ continue; - else if (dbp[0] == '-') /* attribute, e.g. "-define" */ + else if (cp[0] == '-') /* attribute, e.g. "-define" */ { - erlang_attribute(dbp); + erlang_attribute (cp); last = NULL; } - else if (len = erlang_func (dbp, last)) + else if ((len = erlang_func (cp, last)) > 0) { - /* + /* * Function. Store the function name so that we only * generates a tag for the first clause. */ if (last == NULL) - last = xnew(len + 1, char); + last = xnew (len + 1, char); else if (len + 1 > allocated) - last = (char *) xrealloc(last, len + 1); + last = xrnew (last, len + 1, char); allocated = len + 1; - strncpy (last, dbp, len); + strncpy (last, cp, len); last[len] = '\0'; } } @@ -3757,37 +4566,32 @@ erlang_func (s, last) char *s; char *last; /* Name of last clause. */ { - int erlang_atom (); - int erlang_white (); - int pos; int len; - pos = erlang_atom(s, 0); + pos = erlang_atom (s, 0); if (pos < 1) return 0; len = pos; - pos += erlang_white(s, pos); + pos = skip_spaces (s + pos) - s; - if (s[pos++] == '(') - { - /* Save only the first clause. */ - if ((last == NULL) || - (len != strlen(last)) || - (strncmp(s, last, len) != 0)) + /* Save only the first clause. */ + if (s[pos++] == '(' + && (last == NULL + || len != (int)strlen (last) + || !strneq (s, last, len))) { - pfnote ((CTAGS) ? savenstr (s, len) : NULL, TRUE, - s, pos, lineno, linecharno); + pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno); return len; } - } + return 0; } /* - * Handle attributes. Currently, tags are generated for defines + * Handle attributes. Currently, tags are generated for defines * and records. * * They are on the form: @@ -3799,27 +4603,19 @@ void erlang_attribute (s) char *s; { - int erlang_atom (); - int erlang_white (); - int pos; int len; - if ((strncmp(s, "-define", 7) == 0) || - (strncmp(s, "-record", 7) == 0)) + if (strneq (s, "-define", 7) || strneq (s, "-record", 7)) { - pos = 7; - pos += erlang_white(s, pos); - - if (s[pos++] == '(') + pos = skip_spaces (s + 7) - s; + if (s[pos++] == '(') { - pos += erlang_white(s, pos); - - if (len = erlang_atom(s, pos)) - { - pfnote ((CTAGS) ? savenstr (& s[pos], len) : NULL, TRUE, - s, pos + len, lineno, linecharno); - } + pos = skip_spaces (s + pos) - s; + len = erlang_atom (s, pos); + if (len != 0) + pfnote (savenstr (& s[pos], len), TRUE, + s, pos + len, lineno, linecharno); } } return; @@ -3851,7 +4647,7 @@ erlang_atom (s, pos) { pos++; - while (1) + while (1) { if (s[pos] == '\'') { @@ -3875,24 +4671,9 @@ erlang_atom (s, pos) else return -1; } - -/* Consume whitespace. Return the number of bytes eaten */ -int -erlang_white (s, pos) - char *s; - int pos; -{ - int origpos; - - origpos = pos; - - while (isspace (s[pos])) - pos++; - - return pos - origpos; -} #ifdef ETAGS_REGEXPS + /* Take a string like "/blah/" and turn it into "blah", making sure that the first and last characters are the same, and handling quoted separator characters. Actually, stops on the occurrence of @@ -3905,7 +4686,7 @@ scan_separators (name) { char sep = name[0]; char *copyto = name; - logical quoted = FALSE; + bool quoted = FALSE; for (++name; *name != '\0'; ++name) { @@ -3936,29 +4717,90 @@ scan_separators (name) return name; } +/* Look at the argument of --regex or --no-regex and do the right + thing. Same for each line of a regexp file. */ +void +analyse_regex (regex_arg, ignore_case) + char *regex_arg; + bool ignore_case; +{ + if (regex_arg == NULL) + free_patterns (); /* --no-regex: remove existing regexps */ + + /* A real --regexp option or a line in a regexp file. */ + switch (regex_arg[0]) + { + /* Comments in regexp file or null arg to --regex. */ + case '\0': + case ' ': + case '\t': + break; + + /* Read a regex file. This is recursive and may result in a + loop, which will stop when the file descriptors are exhausted. */ + case '@': + { + FILE *regexfp; + linebuffer regexbuf; + char *regexfile = regex_arg + 1; + + /* regexfile is a file containing regexps, one per line. */ + regexfp = fopen (regexfile, "r"); + if (regexfp == NULL) + { + pfatal (regexfile); + return; + } + initbuffer (®exbuf); + while (readline_internal (®exbuf, regexfp) > 0) + analyse_regex (regexbuf.buffer, ignore_case); + free (regexbuf.buffer); + fclose (regexfp); + } + break; + + /* Regexp to be used for a specific language only. */ + case '{': + { + language *lang; + char *lang_name = regex_arg + 1; + char *cp; + + for (cp = lang_name; *cp != '}'; cp++) + if (*cp == '\0') + { + error ("unterminated language name in regex: %s", regex_arg); + return; + } + *cp = '\0'; + lang = get_language_from_name (lang_name); + if (lang == NULL) + return; + add_regex (cp + 1, ignore_case, lang); + } + break; + + /* Regexp to be used for any language. */ + default: + add_regex (regex_arg, ignore_case, NULL); + break; + } +} + /* Turn a name, which is an ed-style (but Emacs syntax) regular expression, into a real regular expression by compiling it. */ void -add_regex (regexp_pattern) +add_regex (regexp_pattern, ignore_case, lang) char *regexp_pattern; + bool ignore_case; + language *lang; { char *name; const char *err; struct re_pattern_buffer *patbuf; + pattern *pp; - if (regexp_pattern == NULL) - { - /* Remove existing regexps. */ - num_patterns = 0; - patterns = NULL; - return; - } - if (regexp_pattern[0] == '\0') - { - error ("missing regexp", 0); - return; - } if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0]) { error ("%s: unterminated regexp", regexp_pattern); @@ -3967,13 +4809,14 @@ add_regex (regexp_pattern) name = scan_separators (regexp_pattern); if (regexp_pattern[0] == '\0') { - error ("null regexp", 0); + error ("null regexp", (char *)NULL); return; } (void) scan_separators (name); patbuf = xnew (1, struct re_pattern_buffer); - patbuf->translate = NULL; + /* Translation table to fold case if appropriate. */ + patbuf->translate = (ignore_case) ? lc_trans : NULL; patbuf->fastmap = NULL; patbuf->buffer = NULL; patbuf->allocated = 0; @@ -3985,16 +4828,14 @@ add_regex (regexp_pattern) return; } - num_patterns += 1; - if (num_patterns == 1) - patterns = xnew (1, struct pattern); - else - patterns = ((struct pattern *) - xrealloc (patterns, - (num_patterns * sizeof (struct pattern)))); - patterns[num_patterns - 1].pattern = patbuf; - patterns[num_patterns - 1].name_pattern = savestr (name); - patterns[num_patterns - 1].error_signaled = FALSE; + pp = p_head; + p_head = xnew (1, pattern); + p_head->regex = savestr (regexp_pattern); + p_head->p_next = pp; + p_head->language = lang; + p_head->pattern = patbuf; + p_head->name_pattern = savestr (name); + p_head->error_signaled = FALSE; } /* @@ -4006,93 +4847,125 @@ substitute (in, out, regs) char *in, *out; struct re_registers *regs; { - char *result = NULL, *t; - int size = 0; - - /* Pass 1: figure out how much size to allocate. */ - for (t = out; *t; ++t) - { - if (*t == '\\') - { - ++t; - if (!*t) - { - fprintf (stderr, "%s: pattern substitution ends prematurely\n", - progname); - return NULL; - } - if (isdigit (*t)) - { - int dig = *t - '0'; - size += regs->end[dig] - regs->start[dig]; - } - } - } + char *result, *t; + int size, dig, diglen; + + result = NULL; + size = strlen (out); + + /* Pass 1: figure out how much to allocate by finding all \N strings. */ + if (out[size - 1] == '\\') + fatal ("pattern error in \"%s\"", out); + for (t = etags_strchr (out, '\\'); + t != NULL; + t = etags_strchr (t + 2, '\\')) + if (isdigit (t[1])) + { + dig = t[1] - '0'; + diglen = regs->end[dig] - regs->start[dig]; + size += diglen - 2; + } + else + size -= 1; /* Allocate space and do the substitutions. */ result = xnew (size + 1, char); - size = 0; - for (; *out; ++out) - { - if (*out == '\\') - { - ++out; - if (isdigit (*out)) - { - /* Using "dig2" satisfies my debugger. Bleah. */ - int dig2 = *out - '0'; - strncpy (result + size, in + regs->start[dig2], - regs->end[dig2] - regs->start[dig2]); - size += regs->end[dig2] - regs->start[dig2]; - } - else - result[size++] = *out; - } - else - result[size++] = *out; - } - result[size] = '\0'; + + for (t = result; *out != '\0'; out++) + if (*out == '\\' && isdigit (*++out)) + { + /* Using "dig2" satisfies my debugger. Bleah. */ + dig = *out - '0'; + diglen = regs->end[dig] - regs->start[dig]; + strncpy (t, in + regs->start[dig], diglen); + t += diglen; + } + else + *t++ = *out; + *t = '\0'; + + if (DEBUG && (t > result + size || t - result != (int)strlen (result))) + abort (); return result; } + +/* Deallocate all patterns. */ +void +free_patterns () +{ + pattern *pp; + while (p_head != NULL) + { + pp = p_head->p_next; + free (p_head->regex); + free (p_head->name_pattern); + free (p_head); + p_head = pp; + } + return; +} +void +get_tag (bp) + register char *bp; +{ + register char *cp; + + if (*bp == '\0') + return; + /* Go till you get to white space or a syntactic break */ + for (cp = bp + 1; + *cp != '\0' && *cp != '(' && *cp != ')' && !isspace (*cp); + cp++) + continue; + pfnote (savenstr (bp, cp-bp), TRUE, + lb.buffer, cp - lb.buffer + 1, lineno, linecharno); +} + #endif /* ETAGS_REGEXPS */ /* Initialize a linebuffer for use */ void -initbuffer (linebuffer) - struct linebuffer *linebuffer; +initbuffer (lbp) + linebuffer *lbp; { - linebuffer->size = 200; - linebuffer->buffer = xnew (200, char); + lbp->size = 200; + lbp->buffer = xnew (200, char); } /* - * Read a line of text from `stream' into `linebuffer'. - * Return the number of characters read from `stream', - * which is the length of the line including the newline, if any. + * Read a line of text from `stream' into `lbp', excluding the + * newline or CR-NL, if any. Return the number of characters read from + * `stream', which is the length of the line including the newline. + * + * On DOS or Windows we do not count the CR character, if any, before the + * NL, in the returned length; this mirrors the behavior of emacs on those + * platforms (for text files, it translates CR-NL to NL as it reads in the + * file). */ long -readline_internal (linebuffer, stream) - struct linebuffer *linebuffer; +readline_internal (lbp, stream) + linebuffer *lbp; register FILE *stream; { - char *buffer = linebuffer->buffer; - register char *p = linebuffer->buffer; + char *buffer = lbp->buffer; + register char *p = lbp->buffer; register char *pend; int chars_deleted; - pend = p + linebuffer->size; /* Separate to avoid 386/IX compiler bug. */ + pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */ while (1) { register int c = getc (stream); if (p == pend) { - linebuffer->size *= 2; - buffer = (char *) xrealloc (buffer, linebuffer->size); - p += buffer - linebuffer->buffer; - pend = buffer + linebuffer->size; - linebuffer->buffer = buffer; + /* We're at the end of linebuffer: expand it. */ + lbp->size *= 2; + buffer = xrnew (buffer, lbp->size, char); + p += buffer - lbp->buffer; + pend = buffer + lbp->size; + lbp->buffer = buffer; } if (c == EOF) { @@ -4104,99 +4977,92 @@ readline_internal (linebuffer, stream) { if (p > buffer && p[-1] == '\r') { - *--p = '\0'; + p -= 1; +#ifdef DOS_NT + /* Assume CRLF->LF translation will be performed by Emacs + when loading this file, so CRs won't appear in the buffer. + It would be cleaner to compensate within Emacs; + however, Emacs does not know how many CRs were deleted + before any given point in the file. */ + chars_deleted = 1; +#else chars_deleted = 2; +#endif } else { - *p = '\0'; chars_deleted = 1; } + *p = '\0'; break; } *p++ = c; } + lbp->len = p - buffer; - return p - buffer + chars_deleted; + return lbp->len + chars_deleted; } /* - * Like readline_internal, above, but try to match the input - * line against any existing regular expressions. + * Like readline_internal, above, but in addition try to match the + * input line against relevant regular expressions. */ long -readline (linebuffer, stream) - struct linebuffer *linebuffer; +readline (lbp, stream) + linebuffer *lbp; FILE *stream; { /* Read new line. */ - long result = readline_internal (linebuffer, stream); + long result = readline_internal (lbp, stream); #ifdef ETAGS_REGEXPS - int i; + int match; + pattern *pp; - /* Match against all listed patterns. */ - for (i = 0; i < num_patterns; ++i) - { - int match = re_match (patterns[i].pattern, linebuffer->buffer, - (int)result, 0, &patterns[i].regs); - switch (match) - { - case -2: - /* Some error. */ - if (!patterns[i].error_signaled) - { - error ("error while matching pattern %d", i); - patterns[i].error_signaled = TRUE; - } - break; - case -1: - /* No match. */ - break; - default: - /* Match occurred. Construct a tag. */ - if (patterns[i].name_pattern[0] != '\0') - { - /* Make a named tag. */ - char *name = substitute (linebuffer->buffer, - patterns[i].name_pattern, - &patterns[i].regs); - if (name != NULL) - pfnote (name, TRUE, - linebuffer->buffer, match, lineno, linecharno); - } - else - { - /* Make an unnamed tag. */ - pfnote (NULL, TRUE, - linebuffer->buffer, match, lineno, linecharno); - } - break; - } - } + /* Match against relevant patterns. */ + if (lbp->len > 0) + for (pp = p_head; pp != NULL; pp = pp->p_next) + { + /* Only use generic regexps or those for the current language. */ + if (pp->language != NULL && pp->language != curlang) + continue; + + match = re_match (pp->pattern, lbp->buffer, lbp->len, 0, &pp->regs); + switch (match) + { + case -2: + /* Some error. */ + if (!pp->error_signaled) + { + error ("error while matching \"%s\"", pp->regex); + pp->error_signaled = TRUE; + } + break; + case -1: + /* No match. */ + break; + default: + /* Match occurred. Construct a tag. */ + if (pp->name_pattern[0] != '\0') + { + /* Make a named tag. */ + char *name = substitute (lbp->buffer, + pp->name_pattern, &pp->regs); + if (name != NULL) + pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno); + } + else + { + /* Make an unnamed tag. */ + pfnote ((char *)NULL, TRUE, + lbp->buffer, match, lineno, linecharno); + } + break; + } + } #endif /* ETAGS_REGEXPS */ return result; } - -/* - * Read a file, but do no processing. This is used to do regexp - * matching on files that have no language defined. - */ -void -just_read_file (inf) - FILE *inf; -{ - lineno = 0; - charno = 0; - - while (!feof (inf)) - { - ++lineno; - linecharno = charno; - charno += readline (&lb, inf) + 1; - } -} - /* * Return a pointer to a space of size strlen(cp)+1 allocated @@ -4266,6 +5132,26 @@ etags_strchr (sp, c) return NULL; } +/* Skip spaces, return new pointer. */ +char * +skip_spaces (cp) + char *cp; +{ + while (isspace (*cp)) /* isspace('\0')==FALSE */ + cp++; + return cp; +} + +/* Skip non spaces, return new pointer. */ +char * +skip_non_spaces (cp) + char *cp; +{ + while (!iswhite (*cp)) /* iswhite('\0')==TRUE */ + cp++; + return cp; +} + /* Print error message and exit. */ void fatal (s1, s2) @@ -4286,8 +5172,14 @@ pfatal (s1) void suggest_asking_for_help () { - fprintf (stderr, "\tTry `%s --help' for a complete list of options.\n", - progname); + fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n", + progname, +#ifdef LONG_OPTIONS + "--help" +#else + "-h" +#endif + ); exit (BAD); } @@ -4323,19 +5215,7 @@ concat (s1, s2, s3) char * etags_getcwd () { -#ifdef MSDOS - char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */ - - getwd (path); - for (p = path; *p != '\0'; p++) - if (*p == '\\') - *p = '/'; - else - *p = lowcase (*p); - - return strdup (path); -#else /* not MSDOS */ -#if HAVE_GETCWD +#ifdef HAVE_GETCWD int bufsize = 200; char *path = xnew (bufsize, char); @@ -4344,12 +5224,28 @@ etags_getcwd () if (errno != ERANGE) pfatal ("getcwd"); bufsize *= 2; + free (path); path = xnew (bufsize, char); } + canonicalize_filename (path); return path; -#else /* not MSDOS and not HAVE_GETCWD */ - struct linebuffer path; + +#else /* not HAVE_GETCWD */ +#ifdef MSDOS + char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */ + + getwd (path); + + for (p = path; *p != '\0'; p++) + if (*p == '\\') + *p = '/'; + else + *p = lowcase (*p); + + return strdup (path); +#else /* not MSDOS */ + linebuffer path; FILE *pipe; initbuffer (&path); @@ -4359,66 +5255,68 @@ etags_getcwd () pclose (pipe); return path.buffer; -#endif /* not HAVE_GETCWD */ #endif /* not MSDOS */ +#endif /* not HAVE_GETCWD */ } -/* Return a newly allocated string containing the filename - of FILE relative to the absolute directory DIR (which - should end with a slash). */ +/* Return a newly allocated string containing the file name of FILE + relative to the absolute directory DIR (which should end with a slash). */ char * relative_filename (file, dir) char *file, *dir; { - char *fp, *dp, *abs, *res; + char *fp, *dp, *afn, *res; + int i; - /* Find the common root of file and dir. */ - abs = absolute_filename (file, cwd); - fp = abs; + /* Find the common root of file and dir (with a trailing slash). */ + afn = absolute_filename (file, cwd); + fp = afn; dp = dir; while (*fp++ == *dp++) continue; - do - { - fp--; - dp--; - } + fp--, dp--; /* back to the first differing char */ +#ifdef DOS_NT + if (fp == afn && afn[0] != '/') /* cannot build a relative name */ + return afn; +#endif + do /* look at the equal chars until '/' */ + fp--, dp--; while (*fp != '/'); - /* Build a sequence of "../" strings for the resulting relative filename. */ - for (dp = etags_strchr (dp + 1, '/'), res = ""; - dp != NULL; - dp = etags_strchr (dp + 1, '/')) - { - res = concat (res, "../", ""); - } + /* Build a sequence of "../" strings for the resulting relative file name. */ + i = 0; + while ((dp = etags_strchr (dp + 1, '/')) != NULL) + i += 1; + res = xnew (3*i + strlen (fp + 1) + 1, char); + res[0] = '\0'; + while (i-- > 0) + strcat (res, "../"); - /* Add the filename relative to the common root of file and dir. */ - res = concat (res, fp + 1, ""); - free (abs); + /* Add the file name relative to the common root of file and dir. */ + strcat (res, fp + 1); + free (afn); return res; } -/* Return a newly allocated string containing the - absolute filename of FILE given CWD (which should - end with a slash). */ +/* Return a newly allocated string containing the absolute file name + of FILE given DIR (which should end with a slash). */ char * -absolute_filename (file, cwd) - char *file, *cwd; +absolute_filename (file, dir) + char *file, *dir; { char *slashp, *cp, *res; - if (absolutefn (file)) - res = concat (file, "", ""); + if (filename_is_absolute (file)) + res = savestr (file); #ifdef DOS_NT - /* We don't support non-absolute filenames with a drive + /* We don't support non-absolute file names with a drive letter, like `d:NAME' (it's too much hassle). */ else if (file[1] == ':') - fatal ("%s: relative filenames with drive letters not supported", file); + fatal ("%s: relative file names with drive letters not supported", file); #endif else - res = concat (cwd, file, ""); + res = concat (dir, file, ""); /* Delete the "/dirname/.." and "/." substrings. */ slashp = etags_strchr (res, '/'); @@ -4432,25 +5330,17 @@ absolute_filename (file, cwd) cp = slashp; do cp--; - while (cp >= res && !absolutefn (cp)); - if (*cp == '/') - { - strcpy (cp, slashp + 3); - } + while (cp >= res && !filename_is_absolute (cp)); + if (cp < res) + cp = slashp; /* the absolute name begins with "/.." */ #ifdef DOS_NT /* Under MSDOS and NT we get `d:/NAME' as absolute - filename, so the luser could say `d:/../NAME'. + file name, so the luser could say `d:/../NAME'. We silently treat this as `d:/NAME'. */ - else if (cp[1] == ':') - strcpy (cp + 3, slashp + 4); + else if (cp[0] != '/') + cp = slashp; #endif - else /* else (cp == res) */ - { - if (slashp[3] != '\0') - strcpy (cp, slashp + 4); - else - return "."; - } + strcpy (cp, slashp + 3); slashp = cp; continue; } @@ -4464,37 +5354,77 @@ absolute_filename (file, cwd) slashp = etags_strchr (slashp + 1, '/'); } - return res; + if (res[0] == '\0') + return savestr ("/"); + else + return res; } /* Return a newly allocated string containing the absolute - filename of dir where FILE resides given CWD (which should + file name of dir where FILE resides given DIR (which should end with a slash). */ char * -absolute_dirname (file, cwd) - char *file, *cwd; +absolute_dirname (file, dir) + char *file, *dir; { char *slashp, *res; char save; -#ifdef DOS_NT - char *p; - - for (p = file; *p != '\0'; p++) - if (*p == '\\') - *p = '/'; -#endif + canonicalize_filename (file); slashp = etags_strrchr (file, '/'); if (slashp == NULL) - return cwd; + return savestr (dir); save = slashp[1]; slashp[1] = '\0'; - res = absolute_filename (file, cwd); + res = absolute_filename (file, dir); slashp[1] = save; return res; } +/* Whether the argument string is an absolute file name. The argument + string must have been canonicalized with canonicalize_filename. */ +bool +filename_is_absolute (fn) + char *fn; +{ + return (fn[0] == '/' +#ifdef DOS_NT + || (isalpha(fn[0]) && fn[1] == ':' && fn[2] == '/') +#endif + ); +} + +/* Translate backslashes into slashes. Works in place. */ +void +canonicalize_filename (fn) + register char *fn; +{ +#ifdef DOS_NT + /* Canonicalize drive letter case. */ + if (islower (fn[0])) + fn[0] = toupper (fn[0]); + /* Convert backslashes to slashes. */ + for (; *fn != '\0'; fn++) + if (*fn == '\\') + *fn = '/'; +#else + /* No action. */ + fn = NULL; /* shut up the compiler */ +#endif +} + +/* Increase the size of a linebuffer. */ +void +grow_linebuffer (lbp, toksize) + linebuffer *lbp; + int toksize; +{ + while (lbp->size < toksize) + lbp->size *= 2; + lbp->buffer = xrnew (lbp->buffer, lbp->size, char); +} + /* Like malloc but get fatal error if memory is exhausted. */ long * xmalloc (size) @@ -4502,7 +5432,7 @@ xmalloc (size) { long *result = (long *) malloc (size); if (result == NULL) - fatal ("virtual memory exhausted", 0); + fatal ("virtual memory exhausted", (char *)NULL); return result; } @@ -4513,6 +5443,6 @@ xrealloc (ptr, size) { long *result = (long *) realloc (ptr, size); if (result == NULL) - fatal ("virtual memory exhausted"); + fatal ("virtual memory exhausted", (char *)NULL); return result; }