X-Git-Url: https://git.hcoop.net/bpt/emacs.git/blobdiff_plain/e7d3b099a25e2a895b0b9ff3e1f7100082f170a6..7630911d6162623297a052bd75d8866440b73a68:/lib-src/etags.c diff --git a/lib-src/etags.c b/lib-src/etags.c index 6b4b379f13..829fc97fbf 100644 --- a/lib-src/etags.c +++ b/lib-src/etags.c @@ -2,21 +2,21 @@ Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2001, 2002 Free Software Foundation, Inc. and Ken Arnold -This file is not considered part of GNU Emacs. + This file is not considered part of GNU Emacs. -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software Foundation, -Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /* * Authors: @@ -27,14 +27,15 @@ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ * 1989 Sam Kendall added C++. * 1992 Joseph B. Wells improved C and C++ parsing. * 1993 Francesco Potortì reorganised C and C++. - * 1994 Regexp tags by Tom Tromey. + * 1994 Line-by-line regexp tags by Tom Tromey. * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba). * 2002 #line directives by Francesco Potortì. * - * Francesco Potortì has maintained it since 1993. + * Francesco Potortì has maintained and improved it since 1993. + * */ -char pot_etags_version[] = "@(#) pot revision number is 16.10"; +char pot_etags_version[] = "@(#) pot revision number is 16.56"; #define TRUE 1 #define FALSE 0 @@ -176,25 +177,18 @@ If you want regular expression support, you should delete this notice and # define CTAGS FALSE #endif -/* Exit codes for success and failure. */ -#ifdef VMS -# define GOOD 1 -# define BAD 0 -#else -# define GOOD 0 -# define BAD 1 -#endif - #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t)) +#define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t)) #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n)) +#define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n)) #define CHARS 256 /* 2^sizeof(char) */ #define CHAR(x) ((unsigned int)(x) & (CHARS - 1)) -#define iswhite(c) (_wht[CHAR(c)]) /* c is white */ -#define notinname(c) (_nin[CHAR(c)]) /* c is not in a name */ -#define begtoken(c) (_btk[CHAR(c)]) /* c can start token */ -#define intoken(c) (_itk[CHAR(c)]) /* c can be in token */ -#define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens */ +#define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */ +#define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */ +#define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */ +#define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */ +#define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */ #define ISALNUM(c) isalnum (CHAR(c)) #define ISALPHA(c) isalpha (CHAR(c)) @@ -236,11 +230,12 @@ typedef struct typedef struct { char *name; /* language name */ - bool metasource; /* source used to generate other sources */ + char *help; /* detailed help for the language */ Lang_function *function; /* parse function */ - char **filenames; /* names of this language's files */ char **suffixes; /* name suffixes of this language's files */ + char **filenames; /* names of this language's files */ char **interpreters; /* interpreters for this language */ + bool metasource; /* source used to generate other sources */ } language; typedef struct fdesc @@ -253,6 +248,7 @@ typedef struct fdesc language *lang; /* language of file */ char *prop; /* file properties to write in tagfile */ bool usecharno; /* etags tags shall contain char number */ + bool written; /* entry written in the tags file */ } fdesc; typedef struct node_st @@ -260,9 +256,9 @@ typedef struct node_st struct node_st *left, *right; /* left and right sons */ fdesc *fdp; /* description of file to whom tag belongs */ char *name; /* tag name */ - char *pat; /* search pattern */ + char *regex; /* search regexp */ bool valid; /* write this tag on the tag file */ - bool is_func; /* function tag: use pattern in CTAGS mode */ + bool is_func; /* function tag: use regexp in CTAGS mode */ bool been_warned; /* warning already given for duplicated tag */ int lno; /* line number tag is on */ long cno; /* character number line starts on */ @@ -288,9 +284,9 @@ typedef struct enum { at_language, /* a language specification */ at_regexp, /* a regular expression */ - at_icregexp, /* same, but with case ignored */ at_filename, /* a file name */ - at_stdin /* read from stdin here */ + at_stdin, /* read from stdin here */ + at_end /* stop parsing the list */ } arg_type; /* argument type */ language *lang; /* language associated with the argument */ char *what; /* the argument itself */ @@ -298,17 +294,19 @@ typedef struct #ifdef ETAGS_REGEXPS /* Structure defining a regular expression. */ -typedef struct pattern +typedef struct regexp { - struct pattern *p_next; - language *lang; - char *regex; - struct re_pattern_buffer *pat; - struct re_registers regs; - char *name_pattern; - bool error_signaled; - bool ignore_case; -} pattern; + struct regexp *p_next; /* pointer to next in list */ + language *lang; /* if set, use only for this language */ + char *pattern; /* the regexp pattern */ + char *name; /* tag name */ + struct re_pattern_buffer *pat; /* the compiled pattern */ + struct re_registers regs; /* re registers */ + bool error_signaled; /* already signaled for this regexp */ + bool force_explicit_name; /* do not allow implict tag name */ + bool ignore_case; /* ignore case when matching */ + bool multi_line; /* do a multi-line match on the whole file */ +} regexp; #endif /* ETAGS_REGEXPS */ @@ -326,23 +324,24 @@ static void Cplusplus_entries __P((FILE *)); static void Cstar_entries __P((FILE *)); static void Erlang_functions __P((FILE *)); static void Fortran_functions __P((FILE *)); -static void Yacc_entries __P((FILE *)); +static void HTML_labels __P((FILE *)); static void Lisp_functions __P((FILE *)); static void Makefile_targets __P((FILE *)); static void Pascal_functions __P((FILE *)); static void Perl_functions __P((FILE *)); static void PHP_functions __P((FILE *)); -static void Postscript_functions __P((FILE *)); +static void PS_functions __P((FILE *)); static void Prolog_functions __P((FILE *)); static void Python_functions __P((FILE *)); static void Scheme_functions __P((FILE *)); static void TeX_commands __P((FILE *)); static void Texinfo_nodes __P((FILE *)); +static void Yacc_entries __P((FILE *)); static void just_read_file __P((FILE *)); static void print_language_names __P((void)); static void print_version __P((void)); -static void print_help __P((void)); +static void print_help __P((argument *)); int main __P((int, char **)); static compressor *get_compressor_from_suffix __P((char *, char **)); @@ -352,12 +351,12 @@ static language *get_language_from_filename __P((char *, bool)); static void readline __P((linebuffer *, FILE *)); static long readline_internal __P((linebuffer *, FILE *)); static bool nocase_tail __P((char *)); -static char *get_tag __P((char *)); +static void get_tag __P((char *, char **)); #ifdef ETAGS_REGEXPS -static void analyse_regex __P((char *, bool)); -static void add_regex __P((char *, bool, language *)); -static void free_patterns __P((void)); +static void analyse_regex __P((char *)); +static void free_regexps __P((void)); +static void regex_tag_multiline __P((void)); #endif /* ETAGS_REGEXPS */ static void error __P((const char *, const char *)); static void suggest_asking_for_help __P((void)); @@ -366,14 +365,13 @@ static void pfatal __P((char *)); static void add_node __P((node *, node **)); static void init __P((void)); -static void initbuffer __P((linebuffer *)); static void process_file_name __P((char *, language *)); static void process_file __P((FILE *, char *, language *)); static void find_entries __P((FILE *)); static void free_tree __P((node *)); static void free_fdesc __P((fdesc *)); static void pfnote __P((char *, bool, char *, int, int, long)); -static void new_pfnote __P((char *, int, bool, char *, int, int, long)); +static void make_tag __P((char *, int, bool, char *, int, int, long)); static void invalidate_nodes __P((fdesc *, node **)); static void put_entries __P((node *)); @@ -384,13 +382,15 @@ static char *savenstr __P((char *, int)); static char *savestr __P((char *)); static char *etags_strchr __P((const char *, int)); static char *etags_strrchr __P((const char *, int)); -static bool strcaseeq __P((const char *, const char *)); +static int etags_strcasecmp __P((const char *, const char *)); +static int etags_strncasecmp __P((const char *, const char *, int)); static char *etags_getcwd __P((void)); static char *relative_filename __P((char *, char *)); static char *absolute_filename __P((char *, char *)); static char *absolute_dirname __P((char *, char *)); static bool filename_is_absolute __P((char *f)); static void canonicalize_filename __P((char *)); +static void linebuffer_init __P((linebuffer *)); static void linebuffer_setlen __P((linebuffer *, int)); static PTR xmalloc __P((unsigned int)); static PTR xrealloc __P((char *, unsigned int)); @@ -417,6 +417,8 @@ static node *nodehead; /* the head of the binary tree of tags */ static node *last_node; /* the last node created */ static linebuffer lb; /* the current line */ +static linebuffer filebuf; /* a buffer containing the whole file */ +static linebuffer token_name; /* a buffer containing a tag name */ /* boolean "functions" (see init) */ static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS]; @@ -424,7 +426,7 @@ static char /* white chars */ *white = " \f\t\n\r\v", /* not in a name */ - *nonam = " \f\t\n\r()=,;", + *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */ /* token ending chars */ *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?", /* token starting chars */ @@ -450,20 +452,17 @@ static bool vgrind_style; /* -v: create vgrind style index output */ static bool no_warnings; /* -w: suppress warnings */ static bool cxref_style; /* -x: create cxref style output */ static bool cplusplus; /* .[hc] means C++, not C */ -static bool noindentypedefs; /* -I: ignore indentation in C */ +static bool ignoreindent; /* -I: ignore indentation in C */ static bool packages_only; /* --packages-only: in Ada, only tag packages*/ #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */ static bool parsing_stdin; /* --parse-stdin used */ #ifdef ETAGS_REGEXPS -/* List of all regexps. */ -static pattern *p_head; - -/* How many characters in the character set. (From regex.c.) */ -#define CHAR_SET_SIZE 256 -/* Translation table for case-insensitive matching. */ -static char lc_trans[CHAR_SET_SIZE]; +static regexp *p_head; /* list of all regexps */ +static bool need_filebuf; /* some regexes are multi-line */ +#else +# define need_filebuf FALSE #endif /* ETAGS_REGEXPS */ #ifdef LONG_OPTIONS @@ -526,6 +525,22 @@ static compressor compressors[] = /* Ada code */ static char *Ada_suffixes [] = { "ads", "adb", "ada", NULL }; +static char Ada_help [] = +"In Ada code, functions, procedures, packages, tasks and types are\n\ +tags. Use the `--packages-only' option to create tags for\n\ +packages only.\n\ +Ada tag names have suffixes indicating the type of entity:\n\ + Entity type: Qualifier:\n\ + ------------ ----------\n\ + function /f\n\ + procedure /p\n\ + package spec /s\n\ + package body /b\n\ + type /t\n\ + task /k\n\ +Thus, `M-x find-tag bidule/b ' will go directly to the\n\ +body of the package `bidule', while `M-x find-tag bidule '\n\ +will just search for any tag `bidule'."; /* Assembly code */ static char *Asm_suffixes [] = @@ -539,79 +554,187 @@ static char *Asm_suffixes [] = "src", /* BSO/Tasking C compiler output */ NULL }; +static char Asm_help [] = +"In assembler code, labels appearing at the beginning of a line,\n\ +followed by a colon, are tags."; + /* Note that .c and .h can be considered C++, if the --c++ flag was - given, or if the `class' keyowrd is met inside the file. + given, or if the `class' or `template' keyowrds are met inside the file. That is why default_C_entries is called for these. */ static char *default_C_suffixes [] = { "c", "h", NULL }; +static char default_C_help [] = +"In C code, any C function or typedef is a tag, and so are\n\ +definitions of `struct', `union' and `enum'. `#define' macro\n\ +definitions and `enum' constants are tags unless you specify\n\ +`--no-defines'. Global variables are tags unless you specify\n\ +`--no-globals'. Use of `--no-globals' and `--no-defines'\n\ +can make the tags table file much smaller.\n\ +You can tag function declarations and external variables by\n\ +using `--declarations', and struct members by using `--members'."; static char *Cplusplus_suffixes [] = { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx", "M", /* Objective C++ */ "pdb", /* Postscript with C syntax */ NULL }; +static char Cplusplus_help [] = +"In C++ code, all the tag constructs of C code are tagged. (Use\n\ +--help --lang=c --lang=c++ for full help.)\n\ +In addition to C tags, member functions are also recognized, and\n\ +optionally member variables if you use the `--members' option.\n\ +Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\ +and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\ +`operator+'."; static char *Cjava_suffixes [] = { "java", NULL }; +static char Cjava_help [] = +"In Java code, all the tags constructs of C and C++ code are\n\ +tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)"; + static char *Cobol_suffixes [] = { "COB", "cob", NULL }; +static char Cobol_help [] = +"In Cobol code, tags are paragraph names; that is, any word\n\ +starting in column 8 and followed by a period."; static char *Cstar_suffixes [] = { "cs", "hs", NULL }; static char *Erlang_suffixes [] = { "erl", "hrl", NULL }; +static char Erlang_help [] = +"In Erlang code, the tags are the functions, records and macros\n\ +defined in the file."; static char *Fortran_suffixes [] = { "F", "f", "f90", "for", NULL }; +static char Fortran_help [] = +"In Fortran code, functions, subroutines and block data are tags."; + +static char *HTML_suffixes [] = + { "htm", "html", "shtml", NULL }; +static char HTML_help [] = +"In HTML input files, the tags are the `title' and the `h1', `h2',\n\ +`h3' headers. Also, tags are `name=' in anchors and all\n\ +occurrences of `id='."; static char *Lisp_suffixes [] = { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL }; +static char Lisp_help [] = +"In Lisp code, any function defined with `defun', any variable\n\ +defined with `defvar' or `defconst', and in general the first\n\ +argument of any expression that starts with `(def' in column zero\n\ +is a tag."; static char *Makefile_filenames [] = { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL}; +static char Makefile_help [] = +"In makefiles, targets are tags; additionally, variables are tags\n\ +unless you specify `--no-globals'."; + +static char *Objc_suffixes [] = + { "lm", /* Objective lex file */ + "m", /* Objective C file */ + NULL }; +static char Objc_help [] = +"In Objective C code, tags include Objective C definitions for classes,\n\ +class categories, methods and protocols. Tags for variables and\n\ +functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'."; static char *Pascal_suffixes [] = { "p", "pas", NULL }; +static char Pascal_help [] = +"In Pascal code, the tags are the functions and procedures defined\n\ +in the file."; static char *Perl_suffixes [] = { "pl", "pm", NULL }; - static char *Perl_interpreters [] = { "perl", "@PERL@", NULL }; +static char Perl_help [] = +"In Perl code, the tags are the packages, subroutines and variables\n\ +defined by the `package', `sub', `my' and `local' keywords. Use\n\ +`--globals' if you want to tag global variables. Tags for\n\ +subroutines are named `PACKAGE::SUB'. The name for subroutines\n\ +defined in the default package is `main::SUB'."; static char *PHP_suffixes [] = { "php", "php3", "php4", NULL }; +static char PHP_help [] = +"In PHP code, tags are functions, classes and defines. When using\n\ +the `--members' option, vars are tags too."; static char *plain_C_suffixes [] = - { "lm", /* Objective lex file */ - "m", /* Objective C file */ - "pc", /* Pro*C file */ + { "pc", /* Pro*C file */ NULL }; -static char *Postscript_suffixes [] = +static char *PS_suffixes [] = { "ps", "psw", NULL }; /* .psw is for PSWrap */ +static char PS_help [] = +"In PostScript code, the tags are the functions."; static char *Prolog_suffixes [] = { "prolog", NULL }; +static char Prolog_help [] = +"In Prolog code, tags are predicates and rules at the beginning of\n\ +line."; static char *Python_suffixes [] = { "py", NULL }; +static char Python_help [] = +"In Python code, `def' or `class' at the beginning of a line\n\ +generate a tag."; /* Can't do the `SCM' or `scm' prefix with a version number. */ static char *Scheme_suffixes [] = { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL }; +static char Scheme_help [] = +"In Scheme code, tags include anything defined with `def' or with a\n\ +construct whose name starts with `def'. They also include\n\ +variables set with `set!' at top level in the file."; static char *TeX_suffixes [] = { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL }; +static char TeX_help [] = +"In LaTeX text, the argument of any of the commands `\\chapter',\n\ +`\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\ +`\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\ +`\\index', `\\def', `\\newcommand', `\\renewcommand',\n\ +`\\newenvironment' or `\\renewenvironment' is a tag.\n\ +\n\ +Other commands can be specified by setting the environment variable\n\ +`TEXTAGS' to a colon-separated list like, for example,\n\ + TEXTAGS=\"mycommand:myothercommand\"."; + static char *Texinfo_suffixes [] = { "texi", "texinfo", "txi", NULL }; +static char Texinfo_help [] = +"for texinfo files, lines starting with @node are tagged."; static char *Yacc_suffixes [] = { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */ +static char Yacc_help [] = +"In Bison or Yacc input files, each rule defines as a tag the\n\ +nonterminal it constructs. The portions of the file that contain\n\ +C code are parsed as C code (use --help --lang=c --lang=yacc\n\ +for full help)."; + +static char auto_help [] = +"`auto' is not a real language, it indicates to use\n\ +a default language for files base on file name suffix and file contents."; + +static char none_help [] = +"`none' is not a real language, it indicates to only do\n\ +regexp processing on files."; + +static char no_lang_help [] = +"No detailed help available for this language."; + /* * Table of languages. @@ -622,31 +745,33 @@ static char *Yacc_suffixes [] = static language lang_names [] = { - { "ada", FALSE, Ada_funcs, NULL, Ada_suffixes, NULL }, - { "asm", FALSE, Asm_labels, NULL, Asm_suffixes, NULL }, - { "c", FALSE, default_C_entries, NULL, default_C_suffixes, NULL }, - { "c++", FALSE, Cplusplus_entries, NULL, Cplusplus_suffixes, NULL }, - { "c*", FALSE, Cstar_entries, NULL, Cstar_suffixes, NULL }, - { "cobol", FALSE, Cobol_paragraphs, NULL, Cobol_suffixes, NULL }, - { "erlang", FALSE, Erlang_functions, NULL, Erlang_suffixes, NULL }, - { "fortran", FALSE, Fortran_functions, NULL, Fortran_suffixes, NULL }, - { "java", FALSE, Cjava_entries, NULL, Cjava_suffixes, NULL }, - { "lisp", FALSE, Lisp_functions, NULL, Lisp_suffixes, NULL }, - { "makefile", FALSE, Makefile_targets, Makefile_filenames, NULL, NULL }, - { "pascal", FALSE, Pascal_functions, NULL, Pascal_suffixes, NULL }, - { "perl", FALSE, Perl_functions,NULL, Perl_suffixes, Perl_interpreters }, - { "php", FALSE, PHP_functions, NULL, PHP_suffixes, NULL }, - { "postscript",FALSE, Postscript_functions,NULL, Postscript_suffixes, NULL }, - { "proc", FALSE, plain_C_entries, NULL, plain_C_suffixes, NULL }, - { "prolog", FALSE, Prolog_functions, NULL, Prolog_suffixes, NULL }, - { "python", FALSE, Python_functions, NULL, Python_suffixes, NULL }, - { "scheme", FALSE, Scheme_functions, NULL, Scheme_suffixes, NULL }, - { "tex", FALSE, TeX_commands, NULL, TeX_suffixes, NULL }, - { "texinfo", FALSE, Texinfo_nodes, NULL, Texinfo_suffixes, NULL }, - { "yacc", TRUE, Yacc_entries, NULL, Yacc_suffixes, NULL }, - { "auto", FALSE, NULL }, /* default guessing scheme */ - { "none", FALSE, just_read_file }, /* regexp matching only */ - { NULL, FALSE, NULL } /* end of list */ + { "ada", Ada_help, Ada_funcs, Ada_suffixes }, + { "asm", Asm_help, Asm_labels, Asm_suffixes }, + { "c", default_C_help, default_C_entries, default_C_suffixes }, + { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes }, + { "c*", no_lang_help, Cstar_entries, Cstar_suffixes }, + { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes }, + { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes }, + { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes }, + { "html", HTML_help, HTML_labels, HTML_suffixes }, + { "java", Cjava_help, Cjava_entries, Cjava_suffixes }, + { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes }, + { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames}, + { "objc", Objc_help, plain_C_entries, Objc_suffixes }, + { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes }, + { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters}, + { "php", PHP_help, PHP_functions, PHP_suffixes }, + { "postscript",PS_help, PS_functions, PS_suffixes }, + { "proc", no_lang_help, plain_C_entries, plain_C_suffixes }, + { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes }, + { "python", Python_help, Python_functions, Python_suffixes }, + { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes }, + { "tex", TeX_help, TeX_commands, TeX_suffixes }, + { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes }, + { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE}, + { "auto", auto_help }, /* default guessing scheme */ + { "none", none_help, just_read_file }, /* regexp matching only */ + { NULL } /* end of list */ }; @@ -669,21 +794,25 @@ default file names and dot suffixes:"); printf (" .%s", *ext); puts (""); } - puts ("Where `auto' means use default language for files based on file\n\ + puts ("where `auto' means use default language for files based on file\n\ name suffix, and `none' means only do regexp processing on files.\n\ If no language is specified and no matching suffix is found,\n\ the first line of the file is read for a sharp-bang (#!) sequence\n\ followed by the name of an interpreter. If no such sequence is found,\n\ Fortran is tried first; if no tags are found, C is tried next.\n\ -When parsing any C file, a \"class\" keyword switches to C++.\n\ -Compressed files are supported using gzip and bzip2."); +When parsing any C file, a \"class\" or \"template\" keyword\n\ +switches to C++."); + puts ("Compressed files are supported using gzip and bzip2.\n\ +\n\ +For detailed help on a given language use, for example,\n\ +etags --help --lang=ada."); } #ifndef EMACS_NAME -# define EMACS_NAME "GNU Emacs" +# define EMACS_NAME "standalone" #endif #ifndef VERSION -# define VERSION "21" +# define VERSION "version" #endif static void print_version () @@ -692,12 +821,27 @@ print_version () puts ("Copyright (C) 2002 Free Software Foundation, Inc. and Ken Arnold"); puts ("This program is distributed under the same terms as Emacs"); - exit (GOOD); + exit (EXIT_SUCCESS); } static void -print_help () +print_help (argbuffer) + argument *argbuffer; { + bool help_for_lang = FALSE; + + for (; argbuffer->arg_type != at_end; argbuffer++) + if (argbuffer->arg_type == at_language) + { + if (help_for_lang) + puts (""); + puts (argbuffer->lang->help); + help_for_lang = TRUE; + } + + if (help_for_lang) + exit (EXIT_SUCCESS); + printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\ \n\ These are the options accepted by %s.\n", progname, progname); @@ -726,7 +870,7 @@ Relative ones are stored relative to the output file's directory.\n"); /* This option is mostly obsolete, because etags can now automatically detect C++. Retained for backward compatibility and for debugging and experimentation. In principle, we could want to tag as C++ even - before any "class" keyword. + before any "class" or "template" keyword. puts ("-C, --c++\n\ Treat files whose name suffix defaults to C language as C++ files."); */ @@ -765,27 +909,27 @@ Relative ones are stored relative to the output file's directory.\n"); Do not create tag entries for global variables in some\n\ languages. This makes the tags file smaller."); puts ("--members\n\ - Create tag entries for member variables in C and derived languages."); + Create tag entries for members of structures in some languages."); #ifdef ETAGS_REGEXPS - puts ("-r /REGEXP/, --regex=/REGEXP/ or --regex=@regexfile\n\ - Make a tag for each line matching pattern REGEXP in the following\n\ - files. {LANGUAGE}/REGEXP/ uses REGEXP for LANGUAGE files only.\n\ - regexfile is a file containing one REGEXP per line.\n\ - REGEXP is anchored (as if preceded by ^).\n\ - The form /REGEXP/NAME/ creates a named tag.\n\ + puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\ + Make a tag for each line matching a regular expression pattern\n\ + in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\ + files only. REGEXFILE is a file containing one REGEXP per line.\n\ + REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\ + optional. The TAGREGEXP pattern is anchored (as if preceded by ^)."); + puts (" If TAGNAME/ is present, the tags created are named.\n\ For example Tcl named tags can be created with:\n\ - --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\""); - puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\ - Like -r, --regex but ignore case when matching expressions."); + --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\ + MODS are optional one-letter modifiers: `i' means to ignore case,\n\ + `m' means to allow multi-line matches, `s' implies `m' and\n\ + causes dot to match any character, including newline."); puts ("-R, --no-regex\n\ Don't create tags from regexps for the following files."); #endif /* ETAGS_REGEXPS */ puts ("-I, --ignore-indentation\n\ - Don't rely on indentation quite as much as normal. Currently,\n\ - this means not to assume that a closing brace in the first\n\ - column is the final brace of a function or structure\n\ - definition in C and C++."); + In C and C++ do not assume that a closing brace in the first\n\ + column is the final brace of a function or structure definition."); puts ("-o FILE, --output=FILE\n\ Write the tags to FILE."); puts ("--parse-stdin=NAME\n\ @@ -828,14 +972,16 @@ Relative ones are stored relative to the output file's directory.\n"); puts ("-V, --version\n\ Print the version of the program.\n\ -h, --help\n\ - Print this help message."); + Print this help message.\n\ + Followed by one or more `--language' options prints detailed\n\ + help about tag generation for the specified languages."); print_language_names (); puts (""); puts ("Report bugs to bug-gnu-emacs@gnu.org"); - exit (GOOD); + exit (EXIT_SUCCESS); } @@ -975,6 +1121,7 @@ main (argc, argv) argument *argbuffer; int current_arg, file_count; linebuffer filename_lb; + bool help_asked = FALSE; #ifdef VMS bool got_err; #endif @@ -996,14 +1143,6 @@ main (argc, argv) is small. */ argbuffer = xnew (argc, argument); -#ifdef ETAGS_REGEXPS - /* Set syntax for regular expression routines. */ - re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS); - /* Translation table for case-insensitive search. */ - for (i = 0; i < CHAR_SET_SIZE; i++) - lc_trans[i] = lowcase (i); -#endif /* ETAGS_REGEXPS */ - /* * If etags, always find typedefs and structure tags. Why not? * Also default to find macro constants, enum constants and @@ -1061,12 +1200,13 @@ main (argc, argv) { error ("-o option may only be given once.", (char *)NULL); suggest_asking_for_help (); + /* NOTREACHED */ } tagfile = optarg; break; case 'I': case 'S': /* for backward compatibility */ - noindentypedefs = TRUE; + ignoreindent = TRUE; break; case 'l': { @@ -1079,6 +1219,10 @@ main (argc, argv) } } break; + case 'c': + /* Backward compatibility: support obsolete --ignore-case-regexp. */ + optarg = concat (optarg, "i", ""); /* memory leak here */ + /* FALLTHRU */ case 'r': argbuffer[current_arg].arg_type = at_regexp; argbuffer[current_arg].what = optarg; @@ -1089,17 +1233,12 @@ main (argc, argv) argbuffer[current_arg].what = NULL; ++current_arg; break; - case 'c': - argbuffer[current_arg].arg_type = at_icregexp; - argbuffer[current_arg].what = optarg; - ++current_arg; - break; case 'V': print_version (); break; case 'h': case 'H': - print_help (); + help_asked = TRUE; break; /* Etags options */ @@ -1118,9 +1257,10 @@ main (argc, argv) case 'w': no_warnings = TRUE; break; default: suggest_asking_for_help (); + /* NOTREACHED */ } - for (; optind < argc; ++optind) + for (; optind < argc; optind++) { argbuffer[current_arg].arg_type = at_filename; argbuffer[current_arg].what = argv[optind]; @@ -1128,10 +1268,17 @@ main (argc, argv) ++file_count; } + argbuffer[current_arg].arg_type = at_end; + + if (help_asked) + print_help (argbuffer); + /* NOTREACHED */ + if (nincluded_files == 0 && file_count == 0) { error ("no input files specified.", (char *)NULL); suggest_asking_for_help (); + /* NOTREACHED */ } if (tagfile == NULL) @@ -1150,8 +1297,10 @@ main (argc, argv) init (); /* set up boolean "functions" */ - initbuffer (&lb); - initbuffer (&filename_lb); + linebuffer_init (&lb); + linebuffer_init (&filename_lb); + linebuffer_init (&filebuf); + linebuffer_init (&token_name); if (!CTAGS) { @@ -1174,7 +1323,7 @@ main (argc, argv) /* * Loop through files finding functions. */ - for (i = 0; i < current_arg; ++i) + for (i = 0; i < current_arg; i++) { static language *lang; /* non-NULL if language is forced */ char *this_file; @@ -1186,10 +1335,7 @@ main (argc, argv) break; #ifdef ETAGS_REGEXPS case at_regexp: - analyse_regex (argbuffer[i].what, FALSE); - break; - case at_icregexp: - analyse_regex (argbuffer[i].what, TRUE); + analyse_regex (argbuffer[i].what); break; #endif case at_filename: @@ -1232,21 +1378,33 @@ main (argc, argv) } #ifdef ETAGS_REGEXPS - free_patterns (); + free_regexps (); #endif /* ETAGS_REGEXPS */ + free (lb.buffer); + free (filebuf.buffer); + free (token_name.buffer); if (!CTAGS || cxref_style) { - put_entries (nodehead); + put_entries (nodehead); /* write the remainig tags (ETAGS) */ free_tree (nodehead); nodehead = NULL; if (!CTAGS) - while (nincluded_files-- > 0) - fprintf (tagf, "\f\n%s,include\n", *included_files++); + { + fdesc *fdp; + + /* Output file entries that have no tags. */ + for (fdp = fdhead; fdp != NULL; fdp = fdp->next) + if (!fdp->written) + fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname); + + while (nincluded_files-- > 0) + fprintf (tagf, "\f\n%s,include\n", *included_files++); + } if (fclose (tagf) == EOF) pfatal (tagfile); - exit (GOOD); + exit (EXIT_SUCCESS); } if (update) @@ -1265,7 +1423,7 @@ main (argc, argv) sprintf (cmd, "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS", tagfile, argbuffer[i].what, tagfile); - if (system (cmd) != GOOD) + if (system (cmd) != EXIT_SUCCESS) fatal ("failed to execute shell command", (char *)NULL); } append_to_tagfile = TRUE; @@ -1274,7 +1432,7 @@ main (argc, argv) tagf = fopen (tagfile, append_to_tagfile ? "a" : "w"); if (tagf == NULL) pfatal (tagfile); - put_entries (nodehead); + put_entries (nodehead); /* write all the tags (CTAGS) */ free_tree (nodehead); nodehead = NULL; if (fclose (tagf) == EOF) @@ -1286,7 +1444,7 @@ main (argc, argv) sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile); exit (system (cmd)); } - return GOOD; + return EXIT_SUCCESS; } @@ -1571,6 +1729,7 @@ process_file (fh, fn, lang) } fdp->usecharno = TRUE; /* use char position when making tags */ fdp->prop = NULL; + fdp->written = FALSE; /* not written on tags file yet */ fdhead = fdp; curfdp = fdhead; /* the current file description */ @@ -1648,7 +1807,6 @@ find_entries (inf) FILE *inf; { char *cp; - node *old_last_node; language *lang = curfdp->lang; Lang_function *parser = NULL; @@ -1703,7 +1861,7 @@ find_entries (inf) /* We rewind here, even if inf may be a pipe. We fail if the length of the first line is longer than the pipe block size, which is unlikely. */ - rewind (inf); + rewind (inf); /* Else try to guess the language given the case insensitive file name. */ if (parser == NULL) @@ -1716,6 +1874,26 @@ find_entries (inf) } } + /* Else try Fortran or C. */ + if (parser == NULL) + { + node *old_last_node = last_node; + + curfdp->lang = get_language_from_langname ("fortran"); + find_entries (inf); + + if (old_last_node == last_node) + /* No Fortran entries found. Try C. */ + { + /* We do not tag if rewind fails. + Only the file name will be recorded in the tags file. */ + rewind (inf); + curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c"); + find_entries (inf); + } + return; + } + if (!no_line_directive && curfdp->lang != NULL && curfdp->lang->metasource) /* It may be that this is a bingo.y file, and we already parsed a bingo.c @@ -1733,12 +1911,8 @@ find_entries (inf) { fdesc *badfdp = *fdpp; - if (DEBUG) - fprintf (stderr, - "Removing references to \"%s\" obtained from \"%s\"\n", - badfdp->taggedfname, badfdp->infname); - - /* Delete the tags referring to badfdp. */ + /* Delete the tags referring to badfdp->taggedfname + that were obtained from badfdp->infname. */ invalidate_nodes (badfdp, &nodehead); *fdpp = badfdp->next; /* remove the bad description from the list */ @@ -1748,35 +1922,89 @@ find_entries (inf) fdpp = &(*fdpp)->next; /* advance the list pointer */ } - if (parser != NULL) - { - /* Generic initialisations before reading from file. */ - lineno = 0; /* reset global line number */ - charno = 0; /* reset global char number */ - linecharno = 0; /* reset global char number of line start */ + assert (parser != NULL); - parser (inf); - return; - } + /* Generic initialisations before reading from file. */ + linebuffer_setlen (&filebuf, 0); /* reset the file buffer */ - /* Else try Fortran. */ - old_last_node = last_node; - curfdp->lang = get_language_from_langname ("fortran"); - find_entries (inf); + /* Generic initialisations before parsing file with readline. */ + lineno = 0; /* reset global line number */ + charno = 0; /* reset global char number */ + linecharno = 0; /* reset global char number of line start */ + + parser (inf); + +#ifdef ETAGS_REGEXPS + regex_tag_multiline (); +#endif /* ETAGS_REGEXPS */ +} + + +/* + * Check whether an implicitly named tag should be created, + * then call `pfnote'. + * NAME is a string that is internally copied by this function. + * + * TAGS format specification + * Idea by Sam Kendall (1997) + * The following is explained in some more detail in etc/ETAGS.EBNF. + * + * make_tag creates tags with "implicit tag names" (unnamed tags) + * if the following are all true, assuming NONAM=" \f\t\n\r()=,;": + * 1. NAME does not contain any of the characters in NONAM; + * 2. LINESTART contains name as either a rightmost, or rightmost but + * one character, substring; + * 3. the character, if any, immediately before NAME in LINESTART must + * be a character in NONAM; + * 4. the character, if any, immediately after NAME in LINESTART must + * also be a character in NONAM. + * + * The implementation uses the notinname() macro, which recognises the + * characters stored in the string `nonam'. + * etags.el needs to use the same characters that are in NONAM. + */ +static void +make_tag (name, namelen, is_func, linestart, linelen, lno, cno) + char *name; /* tag name, or NULL if unnamed */ + int namelen; /* tag length */ + bool is_func; /* tag is a function */ + char *linestart; /* start of the line where tag is */ + int linelen; /* length of the line where tag is */ + int lno; /* line number */ + long cno; /* character number */ +{ + bool named = (name != NULL && namelen > 0); - if (old_last_node == last_node) - /* No Fortran entries found. Try C. */ + if (!CTAGS && named) /* maybe set named to false */ + /* Let's try to make an implicit tag name, that is, create an unnamed tag + such that etags.el can guess a name from it. */ { - /* We do not tag if rewind fails. - Only the file name will be recorded in the tags file. */ - rewind (inf); - curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c"); - find_entries (inf); + int i; + register char *cp = name; + + for (i = 0; i < namelen; i++) + if (notinname (*cp++)) + break; + if (i == namelen) /* rule #1 */ + { + cp = linestart + linelen - namelen; + if (notinname (linestart[linelen-1])) + cp -= 1; /* rule #4 */ + if (cp >= linestart /* rule #2 */ + && (cp == linestart + || notinname (cp[-1])) /* rule #3 */ + && strneq (name, cp, namelen)) /* rule #2 */ + named = FALSE; /* use implicit tag name */ + } } - return; + + if (named) + name = savenstr (name, namelen); + else + name = NULL; + pfnote (name, is_func, linestart, linelen, lno, cno); } - /* Record a tag. */ static void pfnote (name, is_func, linestart, linelen, lno, cno) @@ -1789,6 +2017,7 @@ pfnote (name, is_func, linestart, linelen, lno, cno) { register node *np; + assert (name == NULL || name[0] != '\0'); if (CTAGS && name == NULL) return; @@ -1823,72 +2052,16 @@ pfnote (name, is_func, linestart, linelen, lno, cno) if (CTAGS && !cxref_style) { if (strlen (linestart) < 50) - np->pat = concat (linestart, "$", ""); + np->regex = concat (linestart, "$", ""); else - np->pat = savenstr (linestart, 50); + np->regex = savenstr (linestart, 50); } else - np->pat = savenstr (linestart, linelen); + np->regex = savenstr (linestart, linelen); add_node (np, &nodehead); } -/* - * TAGS format specification - * Idea by Sam Kendall (1997) - * - * pfnote should emit the optimized form [unnamed tag] only if: - * 1. name does not contain any of the characters " \t\r\n(),;"; - * 2. linestart contains name as either a rightmost, or rightmost but - * one character, substring; - * 3. the character, if any, immediately before name in linestart must - * be one of the characters " \t(),;"; - * 4. the character, if any, immediately after name in linestart must - * also be one of the characters " \t(),;". - * - * The real implementation uses the notinname() macro, which recognises - * characters slightly different from " \t\r\n(),;". See the variable - * `nonam'. - */ -#define traditional_tag_style TRUE -static void -new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno) - char *name; /* tag name, or NULL if unnamed */ - int namelen; /* tag length */ - bool is_func; /* tag is a function */ - char *linestart; /* start of the line where tag is */ - int linelen; /* length of the line where tag is */ - int lno; /* line number */ - long cno; /* character number */ -{ - register char *cp; - bool named; - - named = TRUE; - if (!CTAGS) - { - for (cp = name; !notinname (*cp); cp++) - continue; - if (*cp == '\0') /* rule #1 */ - { - cp = linestart + linelen - namelen; - if (notinname (linestart[linelen-1])) - cp -= 1; /* rule #4 */ - if (cp >= linestart /* rule #2 */ - && (cp == linestart - || notinname (cp[-1])) /* rule #3 */ - && strneq (name, cp, namelen)) /* rule #2 */ - named = FALSE; /* use unnamed tag */ - } - } - - if (named) - name = savenstr (name, namelen); - else - name = NULL; - pfnote (name, is_func, linestart, linelen, lno, cno); -} - /* * free_tree () * recurse on left children, iterate on right children. @@ -1903,7 +2076,7 @@ free_tree (np) free_tree (np->left); if (np->name != NULL) free (np->name); - free (np->pat); + free (np->regex); free (np); np = node_right; } @@ -2030,20 +2203,22 @@ invalidate_nodes (badfdp, npp) if (np->left != NULL) invalidate_nodes (badfdp, &np->left); if (np->fdp == badfdp) - np-> valid = FALSE; + np->valid = FALSE; if (np->right != NULL) invalidate_nodes (badfdp, &np->right); } else { - node **next = &np->left; + assert (np->fdp != NULL); if (np->fdp == badfdp) { - *npp = *next; /* detach the sublist from the list */ + *npp = np->left; /* detach the sublist from the list */ np->left = NULL; /* isolate it */ free_tree (np); /* free it */ + invalidate_nodes (badfdp, npp); } - invalidate_nodes (badfdp, next); + else + invalidate_nodes (badfdp, &np->left); } } @@ -2075,15 +2250,16 @@ total_size_of_entries (np) register int total = 0; for (; np != NULL; np = np->right) - { - total += strlen (np->pat) + 1; /* pat\177 */ - if (np->name != NULL) - total += strlen (np->name) + 1; /* name\001 */ - total += number_len ((long) np->lno) + 1; /* lno, */ - if (np->cno != invalidcharno) /* cno */ - total += number_len (np->cno); - total += 1; /* newline */ - } + if (np->valid) + { + total += strlen (np->regex) + 1; /* pat\177 */ + if (np->name != NULL) + total += strlen (np->name) + 1; /* name\001 */ + total += number_len ((long) np->lno) + 1; /* lno, */ + if (np->cno != invalidcharno) /* cno */ + total += number_len (np->cno); + total += 1; /* newline */ + } return total; } @@ -2113,8 +2289,9 @@ put_entries (np) fdp = np->fdp; fprintf (tagf, "\f\n%s,%d\n", fdp->taggedfname, total_size_of_entries (np)); + fdp->written = TRUE; } - fputs (np->pat, tagf); + fputs (np->regex, tagf); fputc ('\177', tagf); if (np->name != NULL) { @@ -2139,7 +2316,7 @@ put_entries (np) np->name, np->fdp->taggedfname, (np->lno + 63) / 64); else fprintf (stdout, "%-16s %3d %-16s %s\n", - np->name, np->lno, np->fdp->taggedfname, np->pat); + np->name, np->lno, np->fdp->taggedfname, np->regex); } else { @@ -2150,7 +2327,7 @@ put_entries (np) putc (searchar, tagf); putc ('^', tagf); - for (sp = np->pat; *sp; sp++) + for (sp = np->regex; *sp; sp++) { if (*sp == '\\' || *sp == searchar) putc ('\\', tagf); @@ -2534,15 +2711,22 @@ static enum */ static struct tok { - bool valid; - bool named; - int offset; - int length; - int lineno; - long linepos; - char *line; + char *line; /* string containing the token */ + int offset; /* where the token starts in LINE */ + int length; /* token length */ + /* + The previous members can be used to pass strings around for generic + purposes. The following ones specifically refer to creating tags. In this + case the token contained here is the pattern that will be used to create a + tag. + */ + bool valid; /* do not create a tag; the token should be + invalidated whenever a state machine is + reset prematurely */ + bool named; /* create a named tag */ + int lineno; /* source line number of tag */ + long linepos; /* source char number of tag */ } token; /* latest token read */ -static linebuffer token_name; /* its name */ /* * Variables and functions for dealing with nested structures. @@ -2560,7 +2744,7 @@ static struct { } cstack; /* stack for nested declaration tags */ /* Current struct nesting depth (namespace, class, struct, union, enum). */ #define nestlev (cstack.nl) -/* After struct keyword or in struct body, not inside an nested function. */ +/* After struct keyword or in struct body, not inside a nested function. */ #define instruct (structdef == snone && nestlev > 0 \ && cblev == cstack.cblev[nestlev-1] + 1) @@ -2778,8 +2962,8 @@ consider_token (str, len, c, c_extp, cblev, parlev, is_func_or_var) return FALSE; case st_C_template: case st_C_class: - if (cblev == 0 - && (*c_extp & C_AUTO) /* automatic detection of C++ language */ + if ((*c_extp & C_AUTO) /* automatic detection of C++ language */ + && cblev == 0 && definedef == dnone && structdef == snone && typdef == tnone && fvdef == fvnone) *c_extp = (*c_extp | C_PLPL) & ~C_AUTO; @@ -2884,8 +3068,16 @@ consider_token (str, len, c, c_extp, cblev, parlev, is_func_or_var) fvextern = TRUE; /* FALLTHRU */ case st_C_typespec: - if (fvdef != finlist && fvdef != fignore && fvdef != vignore) - fvdef = fvnone; /* should be useless */ + switch (fvdef) + { + case finlist: + case flistseen: + case fignore: + case vignore: + break; + default: + fvdef = fvnone; + } return FALSE; case st_C_ignore: fvextern = FALSE; @@ -2915,8 +3107,10 @@ consider_token (str, len, c, c_extp, cblev, parlev, is_func_or_var) fvdef = vignore; return FALSE; } - if ((*c_extp & C_PLPL) && strneq (str+len-10, "::operator", 10)) + if (len >= 10 && strneq (str+len-10, "::operator", 10)) { + if (*c_extp & C_AUTO) /* automatic detection of C++ */ + *c_extp = (*c_extp | C_PLPL) & ~C_AUTO; fvdef = foperator; *is_func_or_var = TRUE; return TRUE; @@ -2953,6 +3147,10 @@ static struct #define curlinepos (lbs[curndx].linepos) #define newlinepos (lbs[newndx].linepos) +#define plainc ((c_ext & C_EXT) == C_PLAIN) +#define cplpl (c_ext & C_PLPL) +#define cjava ((c_ext & C_JAVA) == C_JAVA) + #define CNL_SAVE_DEFINEDEF() \ do { \ curlinepos = charno; \ @@ -2980,31 +3178,18 @@ make_C_tag (isfun) { /* This function should never be called when token.valid is FALSE, but we must protect against invalid input or internal errors. */ - if (DEBUG || token.valid) - { - if (traditional_tag_style) - { - /* This was the original code. Now we call new_pfnote instead, - which uses the new method for naming tags (see new_pfnote). */ - char *name = NULL; + if (!DEBUG && !token.valid) + return; - if (CTAGS || token.named) - name = savestr (token_name.buffer); - if (DEBUG && !token.valid) - { - if (token.named) - name = concat (name, "##invalid##", ""); - else - name = savestr ("##invalid##"); - } - pfnote (name, isfun, token.line, - token.offset+token.length+1, token.lineno, token.linepos); - } - else - new_pfnote (token_name.buffer, token_name.len, isfun, token.line, - token.offset+token.length+1, token.lineno, token.linepos); - token.valid = FALSE; - } + if (token.valid) + make_tag (token_name.buffer, token_name.len, isfun, token.line, + token.offset+token.length+1, token.lineno, token.linepos); + else /* this case is optimised away if !DEBUG */ + make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""), + token_name.len + 17, isfun, token.line, + token.offset+token.length+1, token.lineno, token.linepos); + + token.valid = FALSE; } @@ -3030,14 +3215,12 @@ C_entries (c_ext, inf) int parlev; /* current parenthesis level */ int typdefcblev; /* cblev where a typedef struct body begun */ bool incomm, inquote, inchar, quotednl, midtoken; - bool cplpl, cjava; bool yacc_rules; /* in the rules part of a yacc file */ struct tok savetoken; /* token saved during preprocessor handling */ - initbuffer (&token_name); - initbuffer (&lbs[0].lb); - initbuffer (&lbs[1].lb); + linebuffer_init (&lbs[0].lb); + linebuffer_init (&lbs[1].lb); if (cstack.size == 0) { cstack.size = (DEBUG) ? 1 : 4; @@ -3058,8 +3241,6 @@ C_entries (c_ext, inf) token.valid = savetoken.valid = FALSE; cblev = 0; parlev = 0; - cplpl = (c_ext & C_PLPL) == C_PLPL; - cjava = (c_ext & C_JAVA) == C_JAVA; if (cjava) { qualifier = "."; qlen = 1; } else @@ -3225,13 +3406,13 @@ C_entries (c_ext, inf) { if (endtoken (c)) { - if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1])) + if (c == ':' && *lp == ':' && begtoken (lp[1])) + /* This handles :: in the middle, + but not at the beginning of an identifier. + Also, space-separated :: is not recognised. */ { - /* - * This handles :: in the middle, but not at the - * beginning of an identifier. Also, space-separated - * :: is not recognised. - */ + if (c_ext & C_AUTO) /* automatic detection of C++ */ + c_ext = (c_ext | C_PLPL) & ~C_AUTO; lp += 2; toklen += 2; c = lp[-1]; @@ -3258,7 +3439,7 @@ C_entries (c_ext, inf) toklen += lp - oldlp; } token.named = FALSE; - if ((c_ext & C_EXT) /* not pure C */ + if (!plainc && nestlev > 0 && definedef == dnone) /* in struct body */ { @@ -3374,8 +3555,11 @@ C_entries (c_ext, inf) fvdef = finlist; continue; case flistseen: - make_C_tag (TRUE); /* a function */ - fvdef = fignore; + if (plainc || declarations) + { + make_C_tag (TRUE); /* a function */ + fvdef = fignore; + } break; case fvnameseen: fvdef = fvnone; @@ -3428,7 +3612,17 @@ C_entries (c_ext, inf) break; } if (structdef == stagseen) - structdef = scolonseen; + { + structdef = scolonseen; + break; + } + /* Should be useless, but may be work as a safety net. */ + if (cplpl && fvdef == flistseen) + { + make_C_tag (TRUE); /* a function */ + fvdef = fignore; + break; + } break; case ';': if (definedef != dnone) @@ -3447,7 +3641,7 @@ C_entries (c_ext, inf) switch (fvdef) { case fignore: - if (typdef == tignore) + if (typdef == tignore || cplpl) fvdef = fvnone; break; case fvnameseen: @@ -3459,20 +3653,17 @@ C_entries (c_ext, inf) token.valid = FALSE; break; case flistseen: - if ((declarations && typdef == tnone && !instruct) - || (members && typdef != tignore && instruct)) + if (declarations + && (typdef == tnone || (typdef != tignore && instruct))) make_C_tag (TRUE); /* a function declaration */ /* FALLTHRU */ default: fvextern = FALSE; fvdef = fvnone; if (declarations - && structdef == stagseen && (c_ext & C_PLPL)) + && cplpl && structdef == stagseen) make_C_tag (FALSE); /* forward declaration */ else - /* The following instruction invalidates the token. - Probably the token should be invalidated in all other - cases where some state machine is reset prematurely. */ token.valid = FALSE; } /* switch (fvdef) */ /* FALLTHRU */ @@ -3675,18 +3866,25 @@ C_entries (c_ext, inf) if (definedef != dnone) break; if (fvdef == fstartlist) - fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */ + { + fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */ + token.valid = FALSE; + } break; case '}': if (definedef != dnone) break; - if (!noindentypedefs && lp == newlb.buffer + 1) + if (!ignoreindent && lp == newlb.buffer + 1) { + if (cblev != 0) + token.valid = FALSE; cblev = 0; /* reset curly brace level if first column */ parlev = 0; /* also reset paren level, just in case... */ } else if (cblev > 0) cblev--; + else + token.valid = FALSE; /* something gone amiss, token unreliable */ popclass_above (cblev); structdef = snone; /* Only if typdef == tinbody is typdefcblev significant. */ @@ -3770,7 +3968,6 @@ C_entries (c_ext, inf) } /* while not eof */ - free (token_name.buffer); free (lbs[0].lb.buffer); free (lbs[1].lb.buffer); } @@ -3831,8 +4028,9 @@ Yacc_entries (inf) #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \ for (; /* loop initialization */ \ !feof (file_pointer) /* loop test */ \ - && (char_pointer = lb.buffer, /* instructions at start of loop */ \ - readline (&line_buffer, file_pointer), \ + && /* instructions at start of loop */ \ + (readline (&line_buffer, file_pointer), \ + char_pointer = line_buffer.buffer, \ TRUE); \ ) #define LOOKING_AT(cp, keyword) /* keyword is a constant string */ \ @@ -3903,8 +4101,8 @@ F_getit (inf) return; for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++) continue; - pfnote (savenstr (dbp, cp-dbp), TRUE, - lb.buffer, cp - lb.buffer + 1, lineno, linecharno); + make_tag (dbp, cp-dbp, TRUE, + lb.buffer, cp - lb.buffer + 1, lineno, linecharno); } @@ -3971,8 +4169,8 @@ Fortran_functions (inf) { dbp = skip_spaces (dbp); if (*dbp == '\0') /* assume un-named */ - pfnote (savestr ("blockdata"), TRUE, - lb.buffer, dbp - lb.buffer, lineno, linecharno); + make_tag ("blockdata", 9, TRUE, + lb.buffer, dbp - lb.buffer, lineno, linecharno); else F_getit (inf); /* look for name */ } @@ -3985,7 +4183,7 @@ Fortran_functions (inf) /* * Ada parsing * Original code by - * Philippe Waroquiers (1998) + * Philippe Waroquiers (1998) */ static void Ada_getit __P((FILE *, char *)); @@ -4048,7 +4246,9 @@ Ada_getit (inf, name_qualifier) *cp = '\0'; name = concat (dbp, name_qualifier, ""); *cp = c; - pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno); + make_tag (name, strlen (name), TRUE, + lb.buffer, cp - lb.buffer + 1, lineno, linecharno); + free (name); if (c == '"') dbp = cp + 1; return; @@ -4060,6 +4260,7 @@ Ada_funcs (inf) FILE *inf; { bool inquote = FALSE; + bool skip_till_semicolumn = FALSE; LOOP_ON_INPUT_LINES (inf, lb, dbp) { @@ -4096,6 +4297,14 @@ Ada_funcs (inf) continue; } + if (skip_till_semicolumn) + { + if (*dbp == ';') + skip_till_semicolumn = FALSE; + dbp++; + continue; /* advance char */ + } + /* Search for beginning of a token. */ if (!begtoken (*dbp)) { @@ -4122,6 +4331,16 @@ Ada_funcs (inf) else break; /* from switch */ continue; /* advance char */ + + case 'u': + if (typedefs && !packages_only && nocase_tail ("use")) + { + /* when tagging types, avoid tagging use type Pack.Typename; + for this, we will skip everything till a ; */ + skip_till_semicolumn = TRUE; + continue; /* advance char */ + } + case 't': if (!packages_only && nocase_tail ("task")) Ada_getit (inf, "/k"); @@ -4167,11 +4386,9 @@ Asm_labels (inf) while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$') cp++; if (*cp == ':' || iswhite (*cp)) - { - /* Found end of label, so copy it and add it to the table. */ - pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE, + /* Found end of label, so copy it and add it to the table. */ + make_tag (lb.buffer, cp - lb.buffer, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno); - } } } } @@ -4199,56 +4416,63 @@ Perl_functions (inf) if (LOOKING_AT (cp, "package")) { free (package); - package = get_tag (cp); - if (package == NULL) /* can't parse package name */ - package = savestr (""); - else - package = savestr(package); /* make a copy */ + get_tag (cp, &package); } else if (LOOKING_AT (cp, "sub")) { - char *name, *fullname, *pos; + char *pos; char *sp = cp; while (!notinname (*cp)) cp++; if (cp == sp) - continue; - name = savenstr (sp, cp-sp); - if ((pos = etags_strchr (name, ':')) != NULL && pos[1] == ':') - fullname = name; + continue; /* nothing found */ + if ((pos = etags_strchr (sp, ':')) != NULL + && pos < cp && pos[1] == ':') + /* The name is already qualified. */ + make_tag (sp, cp - sp, TRUE, + lb.buffer, cp - lb.buffer + 1, lineno, linecharno); else - fullname = concat (package, "::", name); - pfnote (fullname, TRUE, - lb.buffer, cp - lb.buffer + 1, lineno, linecharno); - if (name != fullname) - free (name); + /* Qualify it. */ + { + char savechar, *name; + + savechar = *cp; + *cp = '\0'; + name = concat (package, "::", sp); + *cp = savechar; + make_tag (name, strlen(name), TRUE, + lb.buffer, cp - lb.buffer + 1, lineno, linecharno); + free (name); + } } - else if (globals /* only if tagging global vars is enabled */ - && (LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local"))) + else if (globals) /* only if we are tagging global vars */ { + /* Skip a qualifier, if any. */ + bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local"); /* After "my" or "local", but before any following paren or space. */ - char *varname = NULL; + char *varstart = cp; - if (*cp == '$' || *cp == '@' || *cp == '%') + if (qual /* should this be removed? If yes, how? */ + && (*cp == '$' || *cp == '@' || *cp == '%')) { - char* varstart = ++cp; - while (ISALNUM (*cp) || *cp == '_') + varstart += 1; + do cp++; - varname = savenstr (varstart, cp-varstart); + while (ISALNUM (*cp) || *cp == '_'); } - else + else if (qual) { /* Should be examining a variable list at this point; could insist on seeing an open parenthesis. */ while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')') cp++; } + else + continue; - /* Perhaps I should back cp up one character, so the TAGS table - doesn't mention (and so depend upon) the following char. */ - pfnote (varname, FALSE, - lb.buffer, cp - lb.buffer + 1, lineno, linecharno); + make_tag (varstart, cp - varstart, FALSE, + lb.buffer, cp - lb.buffer + 1, lineno, linecharno); } } } @@ -4274,8 +4498,8 @@ Python_functions (inf) char *name = cp; while (!notinname (*cp) && *cp != ':') cp++; - pfnote (savenstr (name, cp-name), TRUE, - lb.buffer, cp - lb.buffer + 1, lineno, linecharno); + make_tag (name, cp - name, TRUE, + lb.buffer, cp - lb.buffer + 1, lineno, linecharno); } } } @@ -4307,8 +4531,8 @@ PHP_functions (inf) { while (!notinname (*cp)) cp++; - pfnote (savenstr (name, cp-name), TRUE, - lb.buffer, cp - lb.buffer + 1, lineno, linecharno); + make_tag (name, cp - name, TRUE, + lb.buffer, cp - lb.buffer + 1, lineno, linecharno); search_identifier = FALSE; } else if (LOOKING_AT (cp, "function")) @@ -4320,8 +4544,8 @@ PHP_functions (inf) name = cp; while (!notinname (*cp)) cp++; - pfnote (savenstr (name, cp-name), TRUE, - lb.buffer, cp - lb.buffer + 1, lineno, linecharno); + make_tag (name, cp - name, TRUE, + lb.buffer, cp - lb.buffer + 1, lineno, linecharno); } else search_identifier = TRUE; @@ -4333,8 +4557,8 @@ PHP_functions (inf) name = cp; while (*cp != '\0' && !iswhite (*cp)) cp++; - pfnote (savenstr (name, cp-name), FALSE, - lb.buffer, cp - lb.buffer + 1, lineno, linecharno); + make_tag (name, cp - name, FALSE, + lb.buffer, cp - lb.buffer + 1, lineno, linecharno); } else search_identifier = TRUE; @@ -4348,8 +4572,8 @@ PHP_functions (inf) name = cp; while (*cp != quote && *cp != '\0') cp++; - pfnote (savenstr (name, cp-name), FALSE, - lb.buffer, cp - lb.buffer + 1, lineno, linecharno); + make_tag (name, cp - name, FALSE, + lb.buffer, cp - lb.buffer + 1, lineno, linecharno); } else if (members && LOOKING_AT (cp, "var") @@ -4358,8 +4582,8 @@ PHP_functions (inf) name = cp; while (!notinname(*cp)) cp++; - pfnote (savenstr (name, cp-name), FALSE, - lb.buffer, cp - lb.buffer + 1, lineno, linecharno); + make_tag (name, cp - name, FALSE, + lb.buffer, cp - lb.buffer + 1, lineno, linecharno); } } } @@ -4390,15 +4614,15 @@ Cobol_paragraphs (inf) for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++) continue; if (*ep++ == '.') - pfnote (savenstr (bp, ep-bp), TRUE, - lb.buffer, ep - lb.buffer + 1, lineno, linecharno); + make_tag (bp, ep - bp, TRUE, + lb.buffer, ep - lb.buffer + 1, lineno, linecharno); } } /* * Makefile support - * Idea by Assar Westerlund (2001) + * Ideas by Assar Westerlund (2001) */ static void Makefile_targets (inf) @@ -4412,9 +4636,9 @@ Makefile_targets (inf) continue; while (*bp != '\0' && *bp != '=' && *bp != ':') bp++; - if (*bp == ':') - pfnote (savenstr (lb.buffer, bp - lb.buffer), TRUE, - lb.buffer, bp - lb.buffer + 1, lineno, linecharno); + if (*bp == ':' || (globals && *bp == '=')) + make_tag (lb.buffer, bp - lb.buffer, TRUE, + lb.buffer, bp - lb.buffer + 1, lineno, linecharno); } } @@ -4434,8 +4658,8 @@ Pascal_functions (inf) { linebuffer tline; /* mostly copied from C_entries */ long save_lcno; - int save_lineno, save_len; - char c, *cp, *namebuf; + int save_lineno, namelen, taglen; + char c, *name; bool /* each of these flags is TRUE iff: */ incomment, /* point is inside a comment */ @@ -4449,15 +4673,15 @@ Pascal_functions (inf) is a FORWARD/EXTERN to be ignored, or whether it is a real tag */ - save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */ - namebuf = NULL; /* keep compiler quiet */ + save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */ + name = NULL; /* keep compiler quiet */ dbp = lb.buffer; *dbp = '\0'; - initbuffer (&tline); + linebuffer_init (&tline); incomment = inquote = FALSE; found_tag = FALSE; /* have a proc name; check if extern */ - get_tagname = FALSE; /* have found "procedure" keyword */ + get_tagname = FALSE; /* found "procedure" keyword */ inparms = FALSE; /* found '(' after "proc" */ verify_tag = FALSE; /* check if "extern" is ahead */ @@ -4526,7 +4750,7 @@ Pascal_functions (inf) } if (found_tag && verify_tag && (*dbp != ' ')) { - /* check if this is an "extern" declaration */ + /* Check if this is an "extern" declaration. */ if (*dbp == '\0') continue; if (lowcase (*dbp == 'e')) @@ -4539,7 +4763,7 @@ Pascal_functions (inf) } else if (lowcase (*dbp) == 'f') { - if (nocase_tail ("forward")) /* check for forward reference */ + if (nocase_tail ("forward")) /* check for forward reference */ { found_tag = FALSE; verify_tag = FALSE; @@ -4549,37 +4773,41 @@ Pascal_functions (inf) { found_tag = FALSE; verify_tag = FALSE; - pfnote (namebuf, TRUE, - tline.buffer, save_len, save_lineno, save_lcno); + make_tag (name, namelen, TRUE, + tline.buffer, taglen, save_lineno, save_lcno); continue; } } if (get_tagname) /* grab name of proc or fn */ { + char *cp; + if (*dbp == '\0') continue; - /* save all values for later tagging */ + /* Find block name. */ + for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++) + continue; + + /* Save all values for later tagging. */ linebuffer_setlen (&tline, lb.len); strcpy (tline.buffer, lb.buffer); save_lineno = lineno; save_lcno = linecharno; + name = tline.buffer + (dbp - lb.buffer); + namelen = cp - dbp; + taglen = cp - lb.buffer + 1; - /* grab block name */ - for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++) - continue; - namebuf = savenstr (dbp, cp-dbp); dbp = cp; /* set dbp to e-o-token */ - save_len = dbp - lb.buffer + 1; get_tagname = FALSE; found_tag = TRUE; continue; - /* and proceed to check for "extern" */ + /* And proceed to check for "extern". */ } else if (!incomment && !inquote && !found_tag) { - /* check for proc/fn keywords */ + /* Check for proc/fn keywords. */ switch (lowcase (c)) { case 'p': @@ -4592,7 +4820,7 @@ Pascal_functions (inf) continue; } } - } /* while not eof */ + } /* while not eof */ free (tline.buffer); } @@ -4618,7 +4846,7 @@ L_getit () /* Ok, then skip "(" before name in (defstruct (foo)) */ dbp = skip_spaces (dbp); } - get_tag (dbp); + get_tag (dbp, NULL); } static void @@ -4669,7 +4897,7 @@ Lisp_functions (inf) * Masatake Yamato (1999) */ static void -Postscript_functions (inf) +PS_functions (inf) FILE *inf; { register char *bp, *ep; @@ -4682,11 +4910,11 @@ Postscript_functions (inf) *ep != '\0' && *ep != ' ' && *ep != '{'; ep++) continue; - pfnote (savenstr (bp, ep-bp), TRUE, - lb.buffer, ep - lb.buffer + 1, lineno, linecharno); + make_tag (bp, ep - bp, TRUE, + lb.buffer, ep - lb.buffer + 1, lineno, linecharno); } else if (LOOKING_AT (bp, "defineps")) - get_tag (bp); + get_tag (bp, NULL); } } @@ -4714,10 +4942,10 @@ Scheme_functions (inf) /* Skip over open parens and white space */ while (notinname (*bp)) bp++; - get_tag (bp); + get_tag (bp, NULL); } if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!")) - get_tag (bp); + get_tag (bp, NULL); } } @@ -4779,8 +5007,7 @@ TeX_commands (inf) if (strneq (cp, key->buffer, key->len)) { register char *p; - char *name; - int linelen; + int namelen, linelen; bool opgrp = FALSE; cp = skip_spaces (cp + key->len); @@ -4794,7 +5021,7 @@ TeX_commands (inf) *p != TEX_opgrp && *p != TEX_clgrp); p++) continue; - name = savenstr (cp, p-cp); + namelen = p - cp; linelen = lb.len; if (!opgrp || *p == TEX_clgrp) { @@ -4802,7 +5029,8 @@ TeX_commands (inf) *p++; linelen = p - lb.buffer + 1; } - pfnote (name, TRUE, lb.buffer, linelen, lineno, linecharno); + make_tag (cp, namelen, TRUE, + lb.buffer, linelen, lineno, linecharno); goto tex_next_line; /* We only tag a line once */ } } @@ -4912,8 +5140,134 @@ Texinfo_nodes (inf) start = cp; while (*cp != '\0' && *cp != ',') cp++; - pfnote (savenstr (start, cp - start), TRUE, - lb.buffer, cp - lb.buffer + 1, lineno, linecharno); + make_tag (start, cp - start, TRUE, + lb.buffer, cp - lb.buffer + 1, lineno, linecharno); + } +} + + +/* Similar to LOOKING_AT but does not use notinname, does not skip */ +#define LOOKING_AT_NOCASE(cp, kw) /* kw is a constant string */ \ + (strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \ + && ((cp) += sizeof(kw)-1)) /* skip spaces */ + +/* + * HTML support. + * Contents of , <h1>, <h2>, <h3> are tags. + * Contents of <a name=xxx> are tags with name xxx. + * + * Francesco Potortì, 2002. + */ +static void +HTML_labels (inf) + FILE * inf; +{ + bool getnext = FALSE; /* next text outside of HTML tags is a tag */ + bool skiptag = FALSE; /* skip to the end of the current HTML tag */ + bool intag = FALSE; /* inside an html tag, looking for ID= */ + bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */ + char *end; + + + linebuffer_setlen (&token_name, 0); /* no name in buffer */ + + LOOP_ON_INPUT_LINES (inf, lb, dbp) + for (;;) /* loop on the same line */ + { + if (skiptag) /* skip HTML tag */ + { + while (*dbp != '\0' && *dbp != '>') + dbp++; + if (*dbp == '>') + { + dbp += 1; + skiptag = FALSE; + continue; /* look on the same line */ + } + break; /* go to next line */ + } + + else if (intag) /* look for "name=" or "id=" */ + { + while (*dbp != '\0' && *dbp != '>' + && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i') + dbp++; + if (*dbp == '\0') + break; /* go to next line */ + if (*dbp == '>') + { + dbp += 1; + intag = FALSE; + continue; /* look on the same line */ + } + if ((inanchor && LOOKING_AT_NOCASE (dbp, "name=")) + || LOOKING_AT_NOCASE (dbp, "id=")) + { + bool quoted = (dbp[0] == '"'); + + if (quoted) + for (end = ++dbp; *end != '\0' && *end != '"'; end++) + continue; + else + for (end = dbp; *end != '\0' && intoken (*end); end++) + continue; + linebuffer_setlen (&token_name, end - dbp); + strncpy (token_name.buffer, dbp, end - dbp); + token_name.buffer[end - dbp] = '\0'; + + dbp = end; + intag = FALSE; /* we found what we looked for */ + skiptag = TRUE; /* skip to the end of the tag */ + getnext = TRUE; /* then grab the text */ + continue; /* look on the same line */ + } + dbp += 1; + } + + else if (getnext) /* grab next tokens and tag them */ + { + dbp = skip_spaces (dbp); + if (*dbp == '\0') + break; /* go to next line */ + if (*dbp == '<') + { + intag = TRUE; + inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2])); + continue; /* look on the same line */ + } + + for (end = dbp + 1; *end != '\0' && *end != '<'; end++) + continue; + make_tag (token_name.buffer, token_name.len, TRUE, + dbp, end - dbp, lineno, linecharno); + linebuffer_setlen (&token_name, 0); /* no name in buffer */ + getnext = FALSE; + break; /* go to next line */ + } + + else /* look for an interesting HTML tag */ + { + while (*dbp != '\0' && *dbp != '<') + dbp++; + if (*dbp == '\0') + break; /* go to next line */ + intag = TRUE; + if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2])) + { + inanchor = TRUE; + continue; /* look on the same line */ + } + else if (LOOKING_AT_NOCASE (dbp, "<title>") + || LOOKING_AT_NOCASE (dbp, "<h1>") + || LOOKING_AT_NOCASE (dbp, "<h2>") + || LOOKING_AT_NOCASE (dbp, "<h3>")) + { + intag = FALSE; + getnext = TRUE; + continue; /* look on the same line */ + } + dbp += 1; + } } } @@ -5016,7 +5370,7 @@ prolog_pr (s, last) || len != strlen (last) || !strneq (s, last, len))) { - pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno); + make_tag (s, len, TRUE, s, pos, lineno, linecharno); return len; } else @@ -5092,7 +5446,7 @@ prolog_atom (s, pos) */ static int erlang_func __P((char *, char *)); static void erlang_attribute __P((char *)); -static int erlang_atom __P((char *, int)); +static int erlang_atom __P((char *)); static void Erlang_functions (inf) @@ -5157,7 +5511,7 @@ erlang_func (s, last) int pos; int len; - pos = erlang_atom (s, 0); + pos = erlang_atom (s); if (pos < 1) return 0; @@ -5170,7 +5524,7 @@ erlang_func (s, last) || len != (int)strlen (last) || !strneq (s, last, len))) { - pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno); + make_tag (s, len, TRUE, s, pos, lineno, linecharno); return len; } @@ -5191,19 +5545,14 @@ static void erlang_attribute (s) char *s; { - int pos; - int len; + char *cp = s; - if (LOOKING_AT (s, "-define") || LOOKING_AT (s, "-record")) + if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record")) + && *cp++ == '(') { - if (s[pos++] == '(') - { - pos = skip_spaces (s + pos) - s; - len = erlang_atom (s, pos); - if (len != 0) - pfnote (savenstr (& s[pos], len), TRUE, - s, pos + len, lineno, linecharno); - } + int len = erlang_atom (skip_spaces (cp)); + if (len > 0) + make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno); } return; } @@ -5214,66 +5563,46 @@ erlang_attribute (s) * Return the number of bytes consumed, or -1 if there was an error. */ static int -erlang_atom (s, pos) +erlang_atom (s) char *s; - int pos; { - int origpos; - - origpos = pos; + int pos = 0; if (ISALPHA (s[pos]) || s[pos] == '_') { /* The atom is unquoted. */ - pos++; - while (ISALNUM (s[pos]) || s[pos] == '_') + do pos++; - return pos - origpos; + while (ISALNUM (s[pos]) || s[pos] == '_'); } else if (s[pos] == '\'') { + for (pos++; s[pos] != '\''; pos++) + if (s[pos] == '\0' /* multiline quoted atoms are ignored */ + || (s[pos] == '\\' && s[++pos] == '\0')) + return 0; pos++; - - for (;;) - { - if (s[pos] == '\'') - { - pos++; - break; - } - else if (s[pos] == '\0') - /* Multiline quoted atoms are ignored. */ - return -1; - else if (s[pos] == '\\') - { - if (s[pos+1] == '\0') - return -1; - pos += 2; - } - else - pos++; - } - return pos - origpos; } - else - return -1; + + return pos; } #ifdef ETAGS_REGEXPS static char *scan_separators __P((char *)); -static void analyse_regex __P((char *, bool)); -static void add_regex __P((char *, bool, language *)); +static void add_regex __P((char *, language *)); static char *substitute __P((char *, char *, struct re_registers *)); -/* Take a string like "/blah/" and turn it into "blah", making sure - that the first and last characters are the same, and handling - quoted separator characters. Actually, stops on the occurrence of - an unquoted separator. Also turns "\t" into a Tab character, and - similarly for all character escape sequences supported by Gcc. - Returns pointer to terminating separator. Works in place. Null - terminates name string. */ +/* + * Take a string like "/blah/" and turn it into "blah", verifying + * that the first and last characters are the same, and handling + * quoted separator characters. Actually, stops on the occurrence of + * an unquoted separator. Also process \t, \n, etc. and turn into + * appropriate characters. Works in place. Null terminates name string. + * Returns pointer to terminating separator, or NULL for + * unterminated regexps. + */ static char * scan_separators (name) char *name; @@ -5288,15 +5617,15 @@ scan_separators (name) { switch (*name) { - case 'a': *copyto++ = '\007'; break; - case 'b': *copyto++ = '\b'; break; - case 'd': *copyto++ = 0177; break; - case 'e': *copyto++ = 033; break; - case 'f': *copyto++ = '\f'; break; - case 'n': *copyto++ = '\n'; break; - case 'r': *copyto++ = '\r'; break; - case 't': *copyto++ = '\t'; break; - case 'v': *copyto++ = '\v'; break; + case 'a': *copyto++ = '\007'; break; /* BEL (bell) */ + case 'b': *copyto++ = '\b'; break; /* BS (back space) */ + case 'd': *copyto++ = 0177; break; /* DEL (delete) */ + case 'e': *copyto++ = 033; break; /* ESC (delete) */ + case 'f': *copyto++ = '\f'; break; /* FF (form feed) */ + case 'n': *copyto++ = '\n'; break; /* NL (new line) */ + case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */ + case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */ + case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */ default: if (*name == sep) *copyto++ = sep; @@ -5317,6 +5646,8 @@ scan_separators (name) else *copyto++ = *name; } + if (*name != sep) + name = NULL; /* signal unterminated regexp */ /* Terminate copied string. */ *copyto = '\0'; @@ -5326,13 +5657,12 @@ scan_separators (name) /* Look at the argument of --regex or --no-regex and do the right thing. Same for each line of a regexp file. */ static void -analyse_regex (regex_arg, ignore_case) +analyse_regex (regex_arg) char *regex_arg; - bool ignore_case; { if (regex_arg == NULL) { - free_patterns (); /* --no-regex: remove existing regexps */ + free_regexps (); /* --no-regex: remove existing regexps */ return; } @@ -5360,9 +5690,9 @@ analyse_regex (regex_arg, ignore_case) pfatal (regexfile); return; } - initbuffer (®exbuf); + linebuffer_init (®exbuf); while (readline_internal (®exbuf, regexfp) > 0) - analyse_regex (regexbuf.buffer, ignore_case); + analyse_regex (regexbuf.buffer); free (regexbuf.buffer); fclose (regexfp); } @@ -5381,70 +5711,136 @@ analyse_regex (regex_arg, ignore_case) error ("unterminated language name in regex: %s", regex_arg); return; } - *cp = '\0'; + *cp++ = '\0'; lang = get_language_from_langname (lang_name); if (lang == NULL) return; - add_regex (cp + 1, ignore_case, lang); + add_regex (cp, lang); } break; /* Regexp to be used for any language. */ default: - add_regex (regex_arg, ignore_case, NULL); + add_regex (regex_arg, NULL); break; } } -/* Turn a name, which is an ed-style (but Emacs syntax) regular - expression, into a real regular expression by compiling it. */ +/* Separate the regexp pattern, compile it, + and care for optional name and modifiers. */ static void -add_regex (regexp_pattern, ignore_case, lang) +add_regex (regexp_pattern, lang) char *regexp_pattern; - bool ignore_case; language *lang; { static struct re_pattern_buffer zeropattern; - char *name; + char sep, *pat, *name, *modifiers; const char *err; struct re_pattern_buffer *patbuf; - pattern *pp; + regexp *rp; + bool + force_explicit_name = TRUE, /* do not use implicit tag names */ + ignore_case = FALSE, /* case is significant */ + multi_line = FALSE, /* matches are done one line at a time */ + single_line = FALSE; /* dot does not match newline */ - if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0]) + if (strlen(regexp_pattern) < 3) { - error ("%s: unterminated regexp", regexp_pattern); + error ("null regexp", (char *)NULL); return; } + sep = regexp_pattern[0]; name = scan_separators (regexp_pattern); - if (regexp_pattern[0] == '\0') + if (name == NULL) { - error ("null regexp", (char *)NULL); + error ("%s: unterminated regexp", regexp_pattern); return; } - (void) scan_separators (name); + if (name[1] == sep) + { + error ("null name for regexp \"%s\"", regexp_pattern); + return; + } + modifiers = scan_separators (name); + if (modifiers == NULL) /* no terminating separator --> no name */ + { + modifiers = name; + name = ""; + } + else + modifiers += 1; /* skip separator */ + + /* Parse regex modifiers. */ + for (; modifiers[0] != '\0'; modifiers++) + switch (modifiers[0]) + { + case 'N': + if (modifiers == name) + error ("forcing explicit tag name but no name, ignoring", NULL); + force_explicit_name = TRUE; + break; + case 'i': + ignore_case = TRUE; + break; + case 's': + single_line = TRUE; + /* FALLTHRU */ + case 'm': + multi_line = TRUE; + need_filebuf = TRUE; + break; + default: + { + char wrongmod [2]; + wrongmod[0] = modifiers[0]; + wrongmod[1] = '\0'; + error ("invalid regexp modifier `%s', ignoring", wrongmod); + } + break; + } patbuf = xnew (1, struct re_pattern_buffer); *patbuf = zeropattern; if (ignore_case) - patbuf->translate = lc_trans; /* translation table to fold case */ + { + static char lc_trans[CHARS]; + int i; + for (i = 0; i < CHARS; i++) + lc_trans[i] = lowcase (i); + patbuf->translate = lc_trans; /* translation table to fold case */ + } + + if (multi_line) + pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */ + else + pat = regexp_pattern; - err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf); + if (single_line) + re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE); + else + re_set_syntax (RE_SYNTAX_EMACS); + + err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf); + if (multi_line) + free (pat); if (err != NULL) { error ("%s while compiling pattern", err); return; } - pp = p_head; - p_head = xnew (1, pattern); - p_head->regex = savestr (regexp_pattern); - p_head->p_next = pp; + rp = p_head; + p_head = xnew (1, regexp); + p_head->pattern = savestr (regexp_pattern); + p_head->p_next = rp; p_head->lang = lang; p_head->pat = patbuf; - p_head->name_pattern = savestr (name); + p_head->name = savestr (name); p_head->error_signaled = FALSE; + p_head->force_explicit_name = force_explicit_name; p_head->ignore_case = ignore_case; + p_head->multi_line = multi_line; } /* @@ -5478,6 +5874,7 @@ substitute (in, out, regs) size -= 1; /* Allocate space and do the substitutions. */ + assert (size >= 0); result = xnew (size + 1, char); for (t = result; *out != '\0'; out++) @@ -5492,26 +5889,111 @@ substitute (in, out, regs) *t++ = *out; *t = '\0'; - assert (t <= result + size && t - result == (int)strlen (result)); + assert (t <= result + size); + assert (t - result == (int)strlen (result)); return result; } -/* Deallocate all patterns. */ +/* Deallocate all regexps. */ static void -free_patterns () +free_regexps () { - pattern *pp; + regexp *rp; while (p_head != NULL) { - pp = p_head->p_next; - free (p_head->regex); - free (p_head->name_pattern); + rp = p_head->p_next; + free (p_head->pattern); + free (p_head->name); free (p_head); - p_head = pp; + p_head = rp; } return; } + +/* + * Reads the whole file as a single string from `filebuf' and looks for + * multi-line regular expressions, creating tags on matches. + * readline already dealt with normal regexps. + * + * Idea by Ben Wing <ben@666.com> (2002). + */ +static void +regex_tag_multiline () +{ + char *buffer = filebuf.buffer; + regexp *rp; + char *name; + + for (rp = p_head; rp != NULL; rp = rp->p_next) + { + int match = 0; + + if (!rp->multi_line) + continue; /* skip normal regexps */ + + /* Generic initialisations before parsing file from memory. */ + lineno = 1; /* reset global line number */ + charno = 0; /* reset global char number */ + linecharno = 0; /* reset global char number of line start */ + + /* Only use generic regexps or those for the current language. */ + if (rp->lang != NULL && rp->lang != curfdp->lang) + continue; + + while (match >= 0 && match < filebuf.len) + { + match = re_search (rp->pat, buffer, filebuf.len, charno, + filebuf.len - match, &rp->regs); + switch (match) + { + case -2: + /* Some error. */ + if (!rp->error_signaled) + { + error ("regexp stack overflow while matching \"%s\"", + rp->pattern); + rp->error_signaled = TRUE; + } + break; + case -1: + /* No match. */ + break; + default: + if (match == rp->regs.end[0]) + { + if (!rp->error_signaled) + { + error ("regexp matches the empty string: \"%s\"", + rp->pattern); + rp->error_signaled = TRUE; + } + match = -3; /* exit from while loop */ + break; + } + + /* Match occurred. Construct a tag. */ + while (charno < rp->regs.end[0]) + if (buffer[charno++] == '\n') + lineno++, linecharno = charno; + name = rp->name; + if (name[0] == '\0') + name = NULL; + else /* make a named tag */ + name = substitute (buffer, rp->name, &rp->regs); + if (rp->force_explicit_name) + /* Force explicit tag name, if a name is there. */ + pfnote (name, TRUE, buffer + linecharno, + charno - linecharno + 1, lineno, linecharno); + else + make_tag (name, strlen (name), TRUE, buffer + linecharno, + charno - linecharno + 1, lineno, linecharno); + break; + } + } + } +} + #endif /* ETAGS_REGEXPS */ @@ -5531,32 +6013,24 @@ nocase_tail (cp) return FALSE; } -static char * -get_tag (bp) +static void +get_tag (bp, namepp) register char *bp; + char **namepp; { - register char *cp, *name; + register char *cp = bp; - if (*bp == '\0') - return NULL; - /* Go till you get to white space or a syntactic break */ - for (cp = bp + 1; !notinname (*cp); cp++) - continue; - name = savenstr (bp, cp-bp); - pfnote (name, TRUE, - lb.buffer, cp - lb.buffer + 1, lineno, linecharno); - return name; -} + if (*bp != '\0') + { + /* Go till you get to white space or a syntactic break */ + for (cp = bp + 1; !notinname (*cp); cp++) + continue; + make_tag (bp, cp - bp, TRUE, + lb.buffer, cp - lb.buffer + 1, lineno, linecharno); + } -/* Initialize a linebuffer for use */ -static void -initbuffer (lbp) - linebuffer *lbp; -{ - lbp->size = (DEBUG) ? 3 : 200; - lbp->buffer = xnew (lbp->size, char); - lbp->buffer[0] = '\0'; - lbp->len = 0; + if (namepp != NULL) + *namepp = savenstr (bp, cp - bp); } /* @@ -5564,10 +6038,13 @@ initbuffer (lbp) * newline or CR-NL, if any. Return the number of characters read from * `stream', which is the length of the line including the newline. * - * On DOS or Windows we do not count the CR character, if any, before the - * NL, in the returned length; this mirrors the behavior of emacs on those + * On DOS or Windows we do not count the CR character, if any before the + * NL, in the returned length; this mirrors the behavior of Emacs on those * platforms (for text files, it translates CR-NL to NL as it reads in the * file). + * + * If multi-line regular expressions are requested, each line read is + * appended to `filebuf'. */ static long readline_internal (lbp, stream) @@ -5626,12 +6103,28 @@ readline_internal (lbp, stream) } lbp->len = p - buffer; + if (need_filebuf /* we need filebuf for multi-line regexps */ + && chars_deleted > 0) /* not at EOF */ + { + while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */ + { + /* Expand filebuf. */ + filebuf.size *= 2; + xrnew (filebuf.buffer, filebuf.size, char); + } + strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len); + filebuf.len += lbp->len; + filebuf.buffer[filebuf.len++] = '\n'; + filebuf.buffer[filebuf.len] = '\0'; + } + return lbp->len + chars_deleted; } /* * Like readline_internal, above, but in addition try to match the - * input line against relevant regular expressions. + * input line against relevant regular expressions and manage #line + * directives. */ static void readline (lbp, stream) @@ -5736,6 +6229,8 @@ readline (lbp, stream) fdhead->infabsdir = savestr (curfdp->infabsdir); fdhead->taggedfname = taggedfname; fdhead->usecharno = FALSE; + fdhead->prop = NULL; + fdhead->written = FALSE; curfdp = fdhead; } } @@ -5752,8 +6247,8 @@ readline (lbp, stream) { if (result > 0) { - /* Do a tail recursion on ourselves, thus discarding the contents - of the line buffer. */ + /* Do a tail recursion on ourselves, thus discarding the contents + of the line buffer. */ readline (lbp, stream); return; } @@ -5766,46 +6261,56 @@ readline (lbp, stream) #ifdef ETAGS_REGEXPS { int match; - pattern *pp; + regexp *rp; + char *name; - /* Match against relevant patterns. */ + /* Match against relevant regexps. */ if (lbp->len > 0) - for (pp = p_head; pp != NULL; pp = pp->p_next) + for (rp = p_head; rp != NULL; rp = rp->p_next) { - /* Only use generic regexps or those for the current language. */ - if (pp->lang != NULL && pp->lang != fdhead->lang) + /* Only use generic regexps or those for the current language. + Also do not use multiline regexps, which is the job of + regex_tag_multiline. */ + if ((rp->lang != NULL && rp->lang != fdhead->lang) + || rp->multi_line) continue; - match = re_match (pp->pat, lbp->buffer, lbp->len, 0, &pp->regs); + match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs); switch (match) { case -2: /* Some error. */ - if (!pp->error_signaled) + if (!rp->error_signaled) { - error ("error while matching \"%s\"", pp->regex); - pp->error_signaled = TRUE; + error ("regexp stack overflow while matching \"%s\"", + rp->pattern); + rp->error_signaled = TRUE; } break; case -1: /* No match. */ break; - default: - /* Match occurred. Construct a tag. */ - if (pp->name_pattern[0] != '\0') + case 0: + /* Empty string matched. */ + if (!rp->error_signaled) { - /* Make a named tag. */ - char *name = substitute (lbp->buffer, - pp->name_pattern, &pp->regs); - if (name != NULL) - pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno); + error ("regexp matches the empty string: \"%s\"", rp->pattern); + rp->error_signaled = TRUE; } + break; + default: + /* Match occurred. Construct a tag. */ + name = rp->name; + if (name[0] == '\0') + name = NULL; + else /* make a named tag */ + name = substitute (lbp->buffer, rp->name, &rp->regs); + if (rp->force_explicit_name) + /* Force explicit tag name, if a name is there. */ + pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno); else - { - /* Make an unnamed tag. */ - pfnote ((char *)NULL, TRUE, + make_tag (name, strlen (name), TRUE, lbp->buffer, match, lineno, linecharno); - } break; } } @@ -5884,13 +6389,12 @@ etags_strchr (sp, c) } /* - * Return TRUE if the two strings are equal, ignoring case for alphabetic - * characters. + * Compare two strings, ignoring case for alphabetic characters. * - * Analogous to BSD's strcasecmp, included for portability. + * Same as BSD's strcasecmp, included for portability. */ -static bool -strcaseeq (s1, s2) +static int +etags_strcasecmp (s1, s2) register const char *s1; register const char *s2; { @@ -5900,7 +6404,35 @@ strcaseeq (s1, s2) : *s1 == *s2)) s1++, s2++; - return (*s1 == *s2); + return (ISALPHA (*s1) && ISALPHA (*s2) + ? lowcase (*s1) - lowcase (*s2) + : *s1 - *s2); +} + +/* + * Compare two strings, ignoring case for alphabetic characters. + * Stop after a given number of characters + * + * Same as BSD's strncasecmp, included for portability. + */ +static int +etags_strncasecmp (s1, s2, n) + register const char *s1; + register const char *s2; + register int n; +{ + while (*s1 != '\0' && n-- > 0 + && (ISALPHA (*s1) && ISALPHA (*s2) + ? lowcase (*s1) == lowcase (*s2) + : *s1 == *s2)) + s1++, s2++; + + if (n < 0) + return 0; + else + return (ISALPHA (*s1) && ISALPHA (*s2) + ? lowcase (*s1) - lowcase (*s2) + : *s1 - *s2); } /* Skip spaces, return new pointer. */ @@ -5929,7 +6461,7 @@ fatal (s1, s2) char *s1, *s2; { error (s1, s2); - exit (BAD); + exit (EXIT_FAILURE); } static void @@ -5937,21 +6469,21 @@ pfatal (s1) char *s1; { perror (s1); - exit (BAD); + exit (EXIT_FAILURE); } static void suggest_asking_for_help () { - fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n", - progname, + #ifdef LONG_OPTIONS - "--help" +fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n", + progname, "--help"); #else - "-h" +fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n", + progname, "-h"); #endif - ); - exit (BAD); + exit (EXIT_FAILURE); } /* Print error message. `s1' is printf control string, `s2' is arg for it. */ @@ -6021,7 +6553,7 @@ etags_getcwd () linebuffer path; FILE *pipe; - initbuffer (&path); + linebuffer_init (&path); pipe = (FILE *) popen ("pwd 2>/dev/null", "r"); if (pipe == NULL || readline_internal (&path, pipe) == 0) pfatal ("pwd"); @@ -6187,6 +6719,18 @@ canonicalize_filename (fn) #endif } + +/* Initialize a linebuffer for use */ +static void +linebuffer_init (lbp) + linebuffer *lbp; +{ + lbp->size = (DEBUG) ? 3 : 200; + lbp->buffer = xnew (lbp->size, char); + lbp->buffer[0] = '\0'; + lbp->len = 0; +} + /* Set the minimum size of a string contained in a linebuffer. */ static void linebuffer_setlen (lbp, toksize) @@ -6201,7 +6745,7 @@ linebuffer_setlen (lbp, toksize) lbp->len = toksize; } -/* Like malloc but get fatal error if memory is exhausted. */ +/* Like malloc but get fatal error if memory is exhausted. */ static PTR xmalloc (size) unsigned int size; @@ -6229,6 +6773,11 @@ xrealloc (ptr, size) * indent-tabs-mode: t * tab-width: 8 * fill-column: 79 - * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node") + * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp") * End: */ + +/* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051 + (do not change this comment) */ + +/* etags.c ends here */