Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2001, 2002
Free Software Foundation, Inc. and Ken Arnold
-This file is not considered part of GNU Emacs.
+ This file is not considered part of GNU Emacs.
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software Foundation,
-Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
/*
* Authors:
* 1989 Sam Kendall added C++.
* 1992 Joseph B. Wells improved C and C++ parsing.
* 1993 Francesco Potortì reorganised C and C++.
- * 1994 Regexp tags by Tom Tromey.
+ * 1994 Line-by-line regexp tags by Tom Tromey.
* 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
* 2002 #line directives by Francesco Potortì.
*
- * Francesco Potortì <pot@gnu.org> has maintained it since 1993.
+ * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
+ *
*/
-char pot_etags_version[] = "@(#) pot revision number is 16.10";
+char pot_etags_version[] = "@(#) pot revision number is 16.56";
#define TRUE 1
#define FALSE 0
# define CTAGS FALSE
#endif
-/* Exit codes for success and failure. */
-#ifdef VMS
-# define GOOD 1
-# define BAD 0
-#else
-# define GOOD 0
-# define BAD 1
-#endif
-
#define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
+#define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
#define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
+#define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
#define CHARS 256 /* 2^sizeof(char) */
#define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
-#define iswhite(c) (_wht[CHAR(c)]) /* c is white */
-#define notinname(c) (_nin[CHAR(c)]) /* c is not in a name */
-#define begtoken(c) (_btk[CHAR(c)]) /* c can start token */
-#define intoken(c) (_itk[CHAR(c)]) /* c can be in token */
-#define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens */
+#define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
+#define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
+#define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
+#define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
+#define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
#define ISALNUM(c) isalnum (CHAR(c))
#define ISALPHA(c) isalpha (CHAR(c))
typedef struct
{
char *name; /* language name */
- bool metasource; /* source used to generate other sources */
+ char *help; /* detailed help for the language */
Lang_function *function; /* parse function */
- char **filenames; /* names of this language's files */
char **suffixes; /* name suffixes of this language's files */
+ char **filenames; /* names of this language's files */
char **interpreters; /* interpreters for this language */
+ bool metasource; /* source used to generate other sources */
} language;
typedef struct fdesc
language *lang; /* language of file */
char *prop; /* file properties to write in tagfile */
bool usecharno; /* etags tags shall contain char number */
+ bool written; /* entry written in the tags file */
} fdesc;
typedef struct node_st
struct node_st *left, *right; /* left and right sons */
fdesc *fdp; /* description of file to whom tag belongs */
char *name; /* tag name */
- char *pat; /* search pattern */
+ char *regex; /* search regexp */
bool valid; /* write this tag on the tag file */
- bool is_func; /* function tag: use pattern in CTAGS mode */
+ bool is_func; /* function tag: use regexp in CTAGS mode */
bool been_warned; /* warning already given for duplicated tag */
int lno; /* line number tag is on */
long cno; /* character number line starts on */
enum {
at_language, /* a language specification */
at_regexp, /* a regular expression */
- at_icregexp, /* same, but with case ignored */
at_filename, /* a file name */
- at_stdin /* read from stdin here */
+ at_stdin, /* read from stdin here */
+ at_end /* stop parsing the list */
} arg_type; /* argument type */
language *lang; /* language associated with the argument */
char *what; /* the argument itself */
#ifdef ETAGS_REGEXPS
/* Structure defining a regular expression. */
-typedef struct pattern
+typedef struct regexp
{
- struct pattern *p_next;
- language *lang;
- char *regex;
- struct re_pattern_buffer *pat;
- struct re_registers regs;
- char *name_pattern;
- bool error_signaled;
- bool ignore_case;
-} pattern;
+ struct regexp *p_next; /* pointer to next in list */
+ language *lang; /* if set, use only for this language */
+ char *pattern; /* the regexp pattern */
+ char *name; /* tag name */
+ struct re_pattern_buffer *pat; /* the compiled pattern */
+ struct re_registers regs; /* re registers */
+ bool error_signaled; /* already signaled for this regexp */
+ bool force_explicit_name; /* do not allow implict tag name */
+ bool ignore_case; /* ignore case when matching */
+ bool multi_line; /* do a multi-line match on the whole file */
+} regexp;
#endif /* ETAGS_REGEXPS */
static void Cstar_entries __P((FILE *));
static void Erlang_functions __P((FILE *));
static void Fortran_functions __P((FILE *));
-static void Yacc_entries __P((FILE *));
+static void HTML_labels __P((FILE *));
static void Lisp_functions __P((FILE *));
static void Makefile_targets __P((FILE *));
static void Pascal_functions __P((FILE *));
static void Perl_functions __P((FILE *));
static void PHP_functions __P((FILE *));
-static void Postscript_functions __P((FILE *));
+static void PS_functions __P((FILE *));
static void Prolog_functions __P((FILE *));
static void Python_functions __P((FILE *));
static void Scheme_functions __P((FILE *));
static void TeX_commands __P((FILE *));
static void Texinfo_nodes __P((FILE *));
+static void Yacc_entries __P((FILE *));
static void just_read_file __P((FILE *));
static void print_language_names __P((void));
static void print_version __P((void));
-static void print_help __P((void));
+static void print_help __P((argument *));
int main __P((int, char **));
static compressor *get_compressor_from_suffix __P((char *, char **));
static void readline __P((linebuffer *, FILE *));
static long readline_internal __P((linebuffer *, FILE *));
static bool nocase_tail __P((char *));
-static char *get_tag __P((char *));
+static void get_tag __P((char *, char **));
#ifdef ETAGS_REGEXPS
-static void analyse_regex __P((char *, bool));
-static void add_regex __P((char *, bool, language *));
-static void free_patterns __P((void));
+static void analyse_regex __P((char *));
+static void free_regexps __P((void));
+static void regex_tag_multiline __P((void));
#endif /* ETAGS_REGEXPS */
static void error __P((const char *, const char *));
static void suggest_asking_for_help __P((void));
static void add_node __P((node *, node **));
static void init __P((void));
-static void initbuffer __P((linebuffer *));
static void process_file_name __P((char *, language *));
static void process_file __P((FILE *, char *, language *));
static void find_entries __P((FILE *));
static void free_tree __P((node *));
static void free_fdesc __P((fdesc *));
static void pfnote __P((char *, bool, char *, int, int, long));
-static void new_pfnote __P((char *, int, bool, char *, int, int, long));
+static void make_tag __P((char *, int, bool, char *, int, int, long));
static void invalidate_nodes __P((fdesc *, node **));
static void put_entries __P((node *));
static char *savestr __P((char *));
static char *etags_strchr __P((const char *, int));
static char *etags_strrchr __P((const char *, int));
-static bool strcaseeq __P((const char *, const char *));
+static int etags_strcasecmp __P((const char *, const char *));
+static int etags_strncasecmp __P((const char *, const char *, int));
static char *etags_getcwd __P((void));
static char *relative_filename __P((char *, char *));
static char *absolute_filename __P((char *, char *));
static char *absolute_dirname __P((char *, char *));
static bool filename_is_absolute __P((char *f));
static void canonicalize_filename __P((char *));
+static void linebuffer_init __P((linebuffer *));
static void linebuffer_setlen __P((linebuffer *, int));
static PTR xmalloc __P((unsigned int));
static PTR xrealloc __P((char *, unsigned int));
static node *last_node; /* the last node created */
static linebuffer lb; /* the current line */
+static linebuffer filebuf; /* a buffer containing the whole file */
+static linebuffer token_name; /* a buffer containing a tag name */
/* boolean "functions" (see init) */
static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
/* white chars */
*white = " \f\t\n\r\v",
/* not in a name */
- *nonam = " \f\t\n\r()=,;",
+ *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
/* token ending chars */
*endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
/* token starting chars */
static bool no_warnings; /* -w: suppress warnings */
static bool cxref_style; /* -x: create cxref style output */
static bool cplusplus; /* .[hc] means C++, not C */
-static bool noindentypedefs; /* -I: ignore indentation in C */
+static bool ignoreindent; /* -I: ignore indentation in C */
static bool packages_only; /* --packages-only: in Ada, only tag packages*/
#define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
static bool parsing_stdin; /* --parse-stdin used */
#ifdef ETAGS_REGEXPS
-/* List of all regexps. */
-static pattern *p_head;
-
-/* How many characters in the character set. (From regex.c.) */
-#define CHAR_SET_SIZE 256
-/* Translation table for case-insensitive matching. */
-static char lc_trans[CHAR_SET_SIZE];
+static regexp *p_head; /* list of all regexps */
+static bool need_filebuf; /* some regexes are multi-line */
+#else
+# define need_filebuf FALSE
#endif /* ETAGS_REGEXPS */
#ifdef LONG_OPTIONS
/* Ada code */
static char *Ada_suffixes [] =
{ "ads", "adb", "ada", NULL };
+static char Ada_help [] =
+"In Ada code, functions, procedures, packages, tasks and types are\n\
+tags. Use the `--packages-only' option to create tags for\n\
+packages only.\n\
+Ada tag names have suffixes indicating the type of entity:\n\
+ Entity type: Qualifier:\n\
+ ------------ ----------\n\
+ function /f\n\
+ procedure /p\n\
+ package spec /s\n\
+ package body /b\n\
+ type /t\n\
+ task /k\n\
+Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
+body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
+will just search for any tag `bidule'.";
/* Assembly code */
static char *Asm_suffixes [] =
"src", /* BSO/Tasking C compiler output */
NULL
};
+static char Asm_help [] =
+"In assembler code, labels appearing at the beginning of a line,\n\
+followed by a colon, are tags.";
+
/* Note that .c and .h can be considered C++, if the --c++ flag was
- given, or if the `class' keyowrd is met inside the file.
+ given, or if the `class' or `template' keyowrds are met inside the file.
That is why default_C_entries is called for these. */
static char *default_C_suffixes [] =
{ "c", "h", NULL };
+static char default_C_help [] =
+"In C code, any C function or typedef is a tag, and so are\n\
+definitions of `struct', `union' and `enum'. `#define' macro\n\
+definitions and `enum' constants are tags unless you specify\n\
+`--no-defines'. Global variables are tags unless you specify\n\
+`--no-globals'. Use of `--no-globals' and `--no-defines'\n\
+can make the tags table file much smaller.\n\
+You can tag function declarations and external variables by\n\
+using `--declarations', and struct members by using `--members'.";
static char *Cplusplus_suffixes [] =
{ "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
"M", /* Objective C++ */
"pdb", /* Postscript with C syntax */
NULL };
+static char Cplusplus_help [] =
+"In C++ code, all the tag constructs of C code are tagged. (Use\n\
+--help --lang=c --lang=c++ for full help.)\n\
+In addition to C tags, member functions are also recognized, and\n\
+optionally member variables if you use the `--members' option.\n\
+Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
+and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
+`operator+'.";
static char *Cjava_suffixes [] =
{ "java", NULL };
+static char Cjava_help [] =
+"In Java code, all the tags constructs of C and C++ code are\n\
+tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
+
static char *Cobol_suffixes [] =
{ "COB", "cob", NULL };
+static char Cobol_help [] =
+"In Cobol code, tags are paragraph names; that is, any word\n\
+starting in column 8 and followed by a period.";
static char *Cstar_suffixes [] =
{ "cs", "hs", NULL };
static char *Erlang_suffixes [] =
{ "erl", "hrl", NULL };
+static char Erlang_help [] =
+"In Erlang code, the tags are the functions, records and macros\n\
+defined in the file.";
static char *Fortran_suffixes [] =
{ "F", "f", "f90", "for", NULL };
+static char Fortran_help [] =
+"In Fortran code, functions, subroutines and block data are tags.";
+
+static char *HTML_suffixes [] =
+ { "htm", "html", "shtml", NULL };
+static char HTML_help [] =
+"In HTML input files, the tags are the `title' and the `h1', `h2',\n\
+`h3' headers. Also, tags are `name=' in anchors and all\n\
+occurrences of `id='.";
static char *Lisp_suffixes [] =
{ "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
+static char Lisp_help [] =
+"In Lisp code, any function defined with `defun', any variable\n\
+defined with `defvar' or `defconst', and in general the first\n\
+argument of any expression that starts with `(def' in column zero\n\
+is a tag.";
static char *Makefile_filenames [] =
{ "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
+static char Makefile_help [] =
+"In makefiles, targets are tags; additionally, variables are tags\n\
+unless you specify `--no-globals'.";
+
+static char *Objc_suffixes [] =
+ { "lm", /* Objective lex file */
+ "m", /* Objective C file */
+ NULL };
+static char Objc_help [] =
+"In Objective C code, tags include Objective C definitions for classes,\n\
+class categories, methods and protocols. Tags for variables and\n\
+functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.";
static char *Pascal_suffixes [] =
{ "p", "pas", NULL };
+static char Pascal_help [] =
+"In Pascal code, the tags are the functions and procedures defined\n\
+in the file.";
static char *Perl_suffixes [] =
{ "pl", "pm", NULL };
-
static char *Perl_interpreters [] =
{ "perl", "@PERL@", NULL };
+static char Perl_help [] =
+"In Perl code, the tags are the packages, subroutines and variables\n\
+defined by the `package', `sub', `my' and `local' keywords. Use\n\
+`--globals' if you want to tag global variables. Tags for\n\
+subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
+defined in the default package is `main::SUB'.";
static char *PHP_suffixes [] =
{ "php", "php3", "php4", NULL };
+static char PHP_help [] =
+"In PHP code, tags are functions, classes and defines. When using\n\
+the `--members' option, vars are tags too.";
static char *plain_C_suffixes [] =
- { "lm", /* Objective lex file */
- "m", /* Objective C file */
- "pc", /* Pro*C file */
+ { "pc", /* Pro*C file */
NULL };
-static char *Postscript_suffixes [] =
+static char *PS_suffixes [] =
{ "ps", "psw", NULL }; /* .psw is for PSWrap */
+static char PS_help [] =
+"In PostScript code, the tags are the functions.";
static char *Prolog_suffixes [] =
{ "prolog", NULL };
+static char Prolog_help [] =
+"In Prolog code, tags are predicates and rules at the beginning of\n\
+line.";
static char *Python_suffixes [] =
{ "py", NULL };
+static char Python_help [] =
+"In Python code, `def' or `class' at the beginning of a line\n\
+generate a tag.";
/* Can't do the `SCM' or `scm' prefix with a version number. */
static char *Scheme_suffixes [] =
{ "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
+static char Scheme_help [] =
+"In Scheme code, tags include anything defined with `def' or with a\n\
+construct whose name starts with `def'. They also include\n\
+variables set with `set!' at top level in the file.";
static char *TeX_suffixes [] =
{ "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
+static char TeX_help [] =
+"In LaTeX text, the argument of any of the commands `\\chapter',\n\
+`\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
+`\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
+`\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
+`\\newenvironment' or `\\renewenvironment' is a tag.\n\
+\n\
+Other commands can be specified by setting the environment variable\n\
+`TEXTAGS' to a colon-separated list like, for example,\n\
+ TEXTAGS=\"mycommand:myothercommand\".";
+
static char *Texinfo_suffixes [] =
{ "texi", "texinfo", "txi", NULL };
+static char Texinfo_help [] =
+"for texinfo files, lines starting with @node are tagged.";
static char *Yacc_suffixes [] =
{ "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
+static char Yacc_help [] =
+"In Bison or Yacc input files, each rule defines as a tag the\n\
+nonterminal it constructs. The portions of the file that contain\n\
+C code are parsed as C code (use --help --lang=c --lang=yacc\n\
+for full help).";
+
+static char auto_help [] =
+"`auto' is not a real language, it indicates to use\n\
+a default language for files base on file name suffix and file contents.";
+
+static char none_help [] =
+"`none' is not a real language, it indicates to only do\n\
+regexp processing on files.";
+
+static char no_lang_help [] =
+"No detailed help available for this language.";
+
/*
* Table of languages.
static language lang_names [] =
{
- { "ada", FALSE, Ada_funcs, NULL, Ada_suffixes, NULL },
- { "asm", FALSE, Asm_labels, NULL, Asm_suffixes, NULL },
- { "c", FALSE, default_C_entries, NULL, default_C_suffixes, NULL },
- { "c++", FALSE, Cplusplus_entries, NULL, Cplusplus_suffixes, NULL },
- { "c*", FALSE, Cstar_entries, NULL, Cstar_suffixes, NULL },
- { "cobol", FALSE, Cobol_paragraphs, NULL, Cobol_suffixes, NULL },
- { "erlang", FALSE, Erlang_functions, NULL, Erlang_suffixes, NULL },
- { "fortran", FALSE, Fortran_functions, NULL, Fortran_suffixes, NULL },
- { "java", FALSE, Cjava_entries, NULL, Cjava_suffixes, NULL },
- { "lisp", FALSE, Lisp_functions, NULL, Lisp_suffixes, NULL },
- { "makefile", FALSE, Makefile_targets, Makefile_filenames, NULL, NULL },
- { "pascal", FALSE, Pascal_functions, NULL, Pascal_suffixes, NULL },
- { "perl", FALSE, Perl_functions,NULL, Perl_suffixes, Perl_interpreters },
- { "php", FALSE, PHP_functions, NULL, PHP_suffixes, NULL },
- { "postscript",FALSE, Postscript_functions,NULL, Postscript_suffixes, NULL },
- { "proc", FALSE, plain_C_entries, NULL, plain_C_suffixes, NULL },
- { "prolog", FALSE, Prolog_functions, NULL, Prolog_suffixes, NULL },
- { "python", FALSE, Python_functions, NULL, Python_suffixes, NULL },
- { "scheme", FALSE, Scheme_functions, NULL, Scheme_suffixes, NULL },
- { "tex", FALSE, TeX_commands, NULL, TeX_suffixes, NULL },
- { "texinfo", FALSE, Texinfo_nodes, NULL, Texinfo_suffixes, NULL },
- { "yacc", TRUE, Yacc_entries, NULL, Yacc_suffixes, NULL },
- { "auto", FALSE, NULL }, /* default guessing scheme */
- { "none", FALSE, just_read_file }, /* regexp matching only */
- { NULL, FALSE, NULL } /* end of list */
+ { "ada", Ada_help, Ada_funcs, Ada_suffixes },
+ { "asm", Asm_help, Asm_labels, Asm_suffixes },
+ { "c", default_C_help, default_C_entries, default_C_suffixes },
+ { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
+ { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
+ { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
+ { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
+ { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
+ { "html", HTML_help, HTML_labels, HTML_suffixes },
+ { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
+ { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
+ { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
+ { "objc", Objc_help, plain_C_entries, Objc_suffixes },
+ { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
+ { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
+ { "php", PHP_help, PHP_functions, PHP_suffixes },
+ { "postscript",PS_help, PS_functions, PS_suffixes },
+ { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
+ { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
+ { "python", Python_help, Python_functions, Python_suffixes },
+ { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
+ { "tex", TeX_help, TeX_commands, TeX_suffixes },
+ { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
+ { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
+ { "auto", auto_help }, /* default guessing scheme */
+ { "none", none_help, just_read_file }, /* regexp matching only */
+ { NULL } /* end of list */
};
\f
printf (" .%s", *ext);
puts ("");
}
- puts ("Where `auto' means use default language for files based on file\n\
+ puts ("where `auto' means use default language for files based on file\n\
name suffix, and `none' means only do regexp processing on files.\n\
If no language is specified and no matching suffix is found,\n\
the first line of the file is read for a sharp-bang (#!) sequence\n\
followed by the name of an interpreter. If no such sequence is found,\n\
Fortran is tried first; if no tags are found, C is tried next.\n\
-When parsing any C file, a \"class\" keyword switches to C++.\n\
-Compressed files are supported using gzip and bzip2.");
+When parsing any C file, a \"class\" or \"template\" keyword\n\
+switches to C++.");
+ puts ("Compressed files are supported using gzip and bzip2.\n\
+\n\
+For detailed help on a given language use, for example,\n\
+etags --help --lang=ada.");
}
#ifndef EMACS_NAME
-# define EMACS_NAME "GNU Emacs"
+# define EMACS_NAME "standalone"
#endif
#ifndef VERSION
-# define VERSION "21"
+# define VERSION "version"
#endif
static void
print_version ()
puts ("Copyright (C) 2002 Free Software Foundation, Inc. and Ken Arnold");
puts ("This program is distributed under the same terms as Emacs");
- exit (GOOD);
+ exit (EXIT_SUCCESS);
}
static void
-print_help ()
+print_help (argbuffer)
+ argument *argbuffer;
{
+ bool help_for_lang = FALSE;
+
+ for (; argbuffer->arg_type != at_end; argbuffer++)
+ if (argbuffer->arg_type == at_language)
+ {
+ if (help_for_lang)
+ puts ("");
+ puts (argbuffer->lang->help);
+ help_for_lang = TRUE;
+ }
+
+ if (help_for_lang)
+ exit (EXIT_SUCCESS);
+
printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
\n\
These are the options accepted by %s.\n", progname, progname);
/* This option is mostly obsolete, because etags can now automatically
detect C++. Retained for backward compatibility and for debugging and
experimentation. In principle, we could want to tag as C++ even
- before any "class" keyword.
+ before any "class" or "template" keyword.
puts ("-C, --c++\n\
Treat files whose name suffix defaults to C language as C++ files.");
*/
Do not create tag entries for global variables in some\n\
languages. This makes the tags file smaller.");
puts ("--members\n\
- Create tag entries for member variables in C and derived languages.");
+ Create tag entries for members of structures in some languages.");
#ifdef ETAGS_REGEXPS
- puts ("-r /REGEXP/, --regex=/REGEXP/ or --regex=@regexfile\n\
- Make a tag for each line matching pattern REGEXP in the following\n\
- files. {LANGUAGE}/REGEXP/ uses REGEXP for LANGUAGE files only.\n\
- regexfile is a file containing one REGEXP per line.\n\
- REGEXP is anchored (as if preceded by ^).\n\
- The form /REGEXP/NAME/ creates a named tag.\n\
+ puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
+ Make a tag for each line matching a regular expression pattern\n\
+ in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
+ files only. REGEXFILE is a file containing one REGEXP per line.\n\
+ REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
+ optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
+ puts (" If TAGNAME/ is present, the tags created are named.\n\
For example Tcl named tags can be created with:\n\
- --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\"");
- puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\
- Like -r, --regex but ignore case when matching expressions.");
+ --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
+ MODS are optional one-letter modifiers: `i' means to ignore case,\n\
+ `m' means to allow multi-line matches, `s' implies `m' and\n\
+ causes dot to match any character, including newline.");
puts ("-R, --no-regex\n\
Don't create tags from regexps for the following files.");
#endif /* ETAGS_REGEXPS */
puts ("-I, --ignore-indentation\n\
- Don't rely on indentation quite as much as normal. Currently,\n\
- this means not to assume that a closing brace in the first\n\
- column is the final brace of a function or structure\n\
- definition in C and C++.");
+ In C and C++ do not assume that a closing brace in the first\n\
+ column is the final brace of a function or structure definition.");
puts ("-o FILE, --output=FILE\n\
Write the tags to FILE.");
puts ("--parse-stdin=NAME\n\
puts ("-V, --version\n\
Print the version of the program.\n\
-h, --help\n\
- Print this help message.");
+ Print this help message.\n\
+ Followed by one or more `--language' options prints detailed\n\
+ help about tag generation for the specified languages.");
print_language_names ();
puts ("");
puts ("Report bugs to bug-gnu-emacs@gnu.org");
- exit (GOOD);
+ exit (EXIT_SUCCESS);
}
\f
argument *argbuffer;
int current_arg, file_count;
linebuffer filename_lb;
+ bool help_asked = FALSE;
#ifdef VMS
bool got_err;
#endif
is small. */
argbuffer = xnew (argc, argument);
-#ifdef ETAGS_REGEXPS
- /* Set syntax for regular expression routines. */
- re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS);
- /* Translation table for case-insensitive search. */
- for (i = 0; i < CHAR_SET_SIZE; i++)
- lc_trans[i] = lowcase (i);
-#endif /* ETAGS_REGEXPS */
-
/*
* If etags, always find typedefs and structure tags. Why not?
* Also default to find macro constants, enum constants and
{
error ("-o option may only be given once.", (char *)NULL);
suggest_asking_for_help ();
+ /* NOTREACHED */
}
tagfile = optarg;
break;
case 'I':
case 'S': /* for backward compatibility */
- noindentypedefs = TRUE;
+ ignoreindent = TRUE;
break;
case 'l':
{
}
}
break;
+ case 'c':
+ /* Backward compatibility: support obsolete --ignore-case-regexp. */
+ optarg = concat (optarg, "i", ""); /* memory leak here */
+ /* FALLTHRU */
case 'r':
argbuffer[current_arg].arg_type = at_regexp;
argbuffer[current_arg].what = optarg;
argbuffer[current_arg].what = NULL;
++current_arg;
break;
- case 'c':
- argbuffer[current_arg].arg_type = at_icregexp;
- argbuffer[current_arg].what = optarg;
- ++current_arg;
- break;
case 'V':
print_version ();
break;
case 'h':
case 'H':
- print_help ();
+ help_asked = TRUE;
break;
/* Etags options */
case 'w': no_warnings = TRUE; break;
default:
suggest_asking_for_help ();
+ /* NOTREACHED */
}
- for (; optind < argc; ++optind)
+ for (; optind < argc; optind++)
{
argbuffer[current_arg].arg_type = at_filename;
argbuffer[current_arg].what = argv[optind];
++file_count;
}
+ argbuffer[current_arg].arg_type = at_end;
+
+ if (help_asked)
+ print_help (argbuffer);
+ /* NOTREACHED */
+
if (nincluded_files == 0 && file_count == 0)
{
error ("no input files specified.", (char *)NULL);
suggest_asking_for_help ();
+ /* NOTREACHED */
}
if (tagfile == NULL)
init (); /* set up boolean "functions" */
- initbuffer (&lb);
- initbuffer (&filename_lb);
+ linebuffer_init (&lb);
+ linebuffer_init (&filename_lb);
+ linebuffer_init (&filebuf);
+ linebuffer_init (&token_name);
if (!CTAGS)
{
/*
* Loop through files finding functions.
*/
- for (i = 0; i < current_arg; ++i)
+ for (i = 0; i < current_arg; i++)
{
static language *lang; /* non-NULL if language is forced */
char *this_file;
break;
#ifdef ETAGS_REGEXPS
case at_regexp:
- analyse_regex (argbuffer[i].what, FALSE);
- break;
- case at_icregexp:
- analyse_regex (argbuffer[i].what, TRUE);
+ analyse_regex (argbuffer[i].what);
break;
#endif
case at_filename:
}
#ifdef ETAGS_REGEXPS
- free_patterns ();
+ free_regexps ();
#endif /* ETAGS_REGEXPS */
+ free (lb.buffer);
+ free (filebuf.buffer);
+ free (token_name.buffer);
if (!CTAGS || cxref_style)
{
- put_entries (nodehead);
+ put_entries (nodehead); /* write the remainig tags (ETAGS) */
free_tree (nodehead);
nodehead = NULL;
if (!CTAGS)
- while (nincluded_files-- > 0)
- fprintf (tagf, "\f\n%s,include\n", *included_files++);
+ {
+ fdesc *fdp;
+
+ /* Output file entries that have no tags. */
+ for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
+ if (!fdp->written)
+ fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
+
+ while (nincluded_files-- > 0)
+ fprintf (tagf, "\f\n%s,include\n", *included_files++);
+ }
if (fclose (tagf) == EOF)
pfatal (tagfile);
- exit (GOOD);
+ exit (EXIT_SUCCESS);
}
if (update)
sprintf (cmd,
"mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
tagfile, argbuffer[i].what, tagfile);
- if (system (cmd) != GOOD)
+ if (system (cmd) != EXIT_SUCCESS)
fatal ("failed to execute shell command", (char *)NULL);
}
append_to_tagfile = TRUE;
tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
if (tagf == NULL)
pfatal (tagfile);
- put_entries (nodehead);
+ put_entries (nodehead); /* write all the tags (CTAGS) */
free_tree (nodehead);
nodehead = NULL;
if (fclose (tagf) == EOF)
sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
exit (system (cmd));
}
- return GOOD;
+ return EXIT_SUCCESS;
}
}
fdp->usecharno = TRUE; /* use char position when making tags */
fdp->prop = NULL;
+ fdp->written = FALSE; /* not written on tags file yet */
fdhead = fdp;
curfdp = fdhead; /* the current file description */
FILE *inf;
{
char *cp;
- node *old_last_node;
language *lang = curfdp->lang;
Lang_function *parser = NULL;
/* We rewind here, even if inf may be a pipe. We fail if the
length of the first line is longer than the pipe block size,
which is unlikely. */
- rewind (inf);
+ rewind (inf);
/* Else try to guess the language given the case insensitive file name. */
if (parser == NULL)
}
}
+ /* Else try Fortran or C. */
+ if (parser == NULL)
+ {
+ node *old_last_node = last_node;
+
+ curfdp->lang = get_language_from_langname ("fortran");
+ find_entries (inf);
+
+ if (old_last_node == last_node)
+ /* No Fortran entries found. Try C. */
+ {
+ /* We do not tag if rewind fails.
+ Only the file name will be recorded in the tags file. */
+ rewind (inf);
+ curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
+ find_entries (inf);
+ }
+ return;
+ }
+
if (!no_line_directive
&& curfdp->lang != NULL && curfdp->lang->metasource)
/* It may be that this is a bingo.y file, and we already parsed a bingo.c
{
fdesc *badfdp = *fdpp;
- if (DEBUG)
- fprintf (stderr,
- "Removing references to \"%s\" obtained from \"%s\"\n",
- badfdp->taggedfname, badfdp->infname);
-
- /* Delete the tags referring to badfdp. */
+ /* Delete the tags referring to badfdp->taggedfname
+ that were obtained from badfdp->infname. */
invalidate_nodes (badfdp, &nodehead);
*fdpp = badfdp->next; /* remove the bad description from the list */
fdpp = &(*fdpp)->next; /* advance the list pointer */
}
- if (parser != NULL)
- {
- /* Generic initialisations before reading from file. */
- lineno = 0; /* reset global line number */
- charno = 0; /* reset global char number */
- linecharno = 0; /* reset global char number of line start */
+ assert (parser != NULL);
- parser (inf);
- return;
- }
+ /* Generic initialisations before reading from file. */
+ linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
- /* Else try Fortran. */
- old_last_node = last_node;
- curfdp->lang = get_language_from_langname ("fortran");
- find_entries (inf);
+ /* Generic initialisations before parsing file with readline. */
+ lineno = 0; /* reset global line number */
+ charno = 0; /* reset global char number */
+ linecharno = 0; /* reset global char number of line start */
+
+ parser (inf);
+
+#ifdef ETAGS_REGEXPS
+ regex_tag_multiline ();
+#endif /* ETAGS_REGEXPS */
+}
+
+\f
+/*
+ * Check whether an implicitly named tag should be created,
+ * then call `pfnote'.
+ * NAME is a string that is internally copied by this function.
+ *
+ * TAGS format specification
+ * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
+ * The following is explained in some more detail in etc/ETAGS.EBNF.
+ *
+ * make_tag creates tags with "implicit tag names" (unnamed tags)
+ * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
+ * 1. NAME does not contain any of the characters in NONAM;
+ * 2. LINESTART contains name as either a rightmost, or rightmost but
+ * one character, substring;
+ * 3. the character, if any, immediately before NAME in LINESTART must
+ * be a character in NONAM;
+ * 4. the character, if any, immediately after NAME in LINESTART must
+ * also be a character in NONAM.
+ *
+ * The implementation uses the notinname() macro, which recognises the
+ * characters stored in the string `nonam'.
+ * etags.el needs to use the same characters that are in NONAM.
+ */
+static void
+make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
+ char *name; /* tag name, or NULL if unnamed */
+ int namelen; /* tag length */
+ bool is_func; /* tag is a function */
+ char *linestart; /* start of the line where tag is */
+ int linelen; /* length of the line where tag is */
+ int lno; /* line number */
+ long cno; /* character number */
+{
+ bool named = (name != NULL && namelen > 0);
- if (old_last_node == last_node)
- /* No Fortran entries found. Try C. */
+ if (!CTAGS && named) /* maybe set named to false */
+ /* Let's try to make an implicit tag name, that is, create an unnamed tag
+ such that etags.el can guess a name from it. */
{
- /* We do not tag if rewind fails.
- Only the file name will be recorded in the tags file. */
- rewind (inf);
- curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
- find_entries (inf);
+ int i;
+ register char *cp = name;
+
+ for (i = 0; i < namelen; i++)
+ if (notinname (*cp++))
+ break;
+ if (i == namelen) /* rule #1 */
+ {
+ cp = linestart + linelen - namelen;
+ if (notinname (linestart[linelen-1]))
+ cp -= 1; /* rule #4 */
+ if (cp >= linestart /* rule #2 */
+ && (cp == linestart
+ || notinname (cp[-1])) /* rule #3 */
+ && strneq (name, cp, namelen)) /* rule #2 */
+ named = FALSE; /* use implicit tag name */
+ }
}
- return;
+
+ if (named)
+ name = savenstr (name, namelen);
+ else
+ name = NULL;
+ pfnote (name, is_func, linestart, linelen, lno, cno);
}
-\f
/* Record a tag. */
static void
pfnote (name, is_func, linestart, linelen, lno, cno)
{
register node *np;
+ assert (name == NULL || name[0] != '\0');
if (CTAGS && name == NULL)
return;
if (CTAGS && !cxref_style)
{
if (strlen (linestart) < 50)
- np->pat = concat (linestart, "$", "");
+ np->regex = concat (linestart, "$", "");
else
- np->pat = savenstr (linestart, 50);
+ np->regex = savenstr (linestart, 50);
}
else
- np->pat = savenstr (linestart, linelen);
+ np->regex = savenstr (linestart, linelen);
add_node (np, &nodehead);
}
-/*
- * TAGS format specification
- * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
- *
- * pfnote should emit the optimized form [unnamed tag] only if:
- * 1. name does not contain any of the characters " \t\r\n(),;";
- * 2. linestart contains name as either a rightmost, or rightmost but
- * one character, substring;
- * 3. the character, if any, immediately before name in linestart must
- * be one of the characters " \t(),;";
- * 4. the character, if any, immediately after name in linestart must
- * also be one of the characters " \t(),;".
- *
- * The real implementation uses the notinname() macro, which recognises
- * characters slightly different from " \t\r\n(),;". See the variable
- * `nonam'.
- */
-#define traditional_tag_style TRUE
-static void
-new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno)
- char *name; /* tag name, or NULL if unnamed */
- int namelen; /* tag length */
- bool is_func; /* tag is a function */
- char *linestart; /* start of the line where tag is */
- int linelen; /* length of the line where tag is */
- int lno; /* line number */
- long cno; /* character number */
-{
- register char *cp;
- bool named;
-
- named = TRUE;
- if (!CTAGS)
- {
- for (cp = name; !notinname (*cp); cp++)
- continue;
- if (*cp == '\0') /* rule #1 */
- {
- cp = linestart + linelen - namelen;
- if (notinname (linestart[linelen-1]))
- cp -= 1; /* rule #4 */
- if (cp >= linestart /* rule #2 */
- && (cp == linestart
- || notinname (cp[-1])) /* rule #3 */
- && strneq (name, cp, namelen)) /* rule #2 */
- named = FALSE; /* use unnamed tag */
- }
- }
-
- if (named)
- name = savenstr (name, namelen);
- else
- name = NULL;
- pfnote (name, is_func, linestart, linelen, lno, cno);
-}
-
/*
* free_tree ()
* recurse on left children, iterate on right children.
free_tree (np->left);
if (np->name != NULL)
free (np->name);
- free (np->pat);
+ free (np->regex);
free (np);
np = node_right;
}
if (np->left != NULL)
invalidate_nodes (badfdp, &np->left);
if (np->fdp == badfdp)
- np-> valid = FALSE;
+ np->valid = FALSE;
if (np->right != NULL)
invalidate_nodes (badfdp, &np->right);
}
else
{
- node **next = &np->left;
+ assert (np->fdp != NULL);
if (np->fdp == badfdp)
{
- *npp = *next; /* detach the sublist from the list */
+ *npp = np->left; /* detach the sublist from the list */
np->left = NULL; /* isolate it */
free_tree (np); /* free it */
+ invalidate_nodes (badfdp, npp);
}
- invalidate_nodes (badfdp, next);
+ else
+ invalidate_nodes (badfdp, &np->left);
}
}
register int total = 0;
for (; np != NULL; np = np->right)
- {
- total += strlen (np->pat) + 1; /* pat\177 */
- if (np->name != NULL)
- total += strlen (np->name) + 1; /* name\001 */
- total += number_len ((long) np->lno) + 1; /* lno, */
- if (np->cno != invalidcharno) /* cno */
- total += number_len (np->cno);
- total += 1; /* newline */
- }
+ if (np->valid)
+ {
+ total += strlen (np->regex) + 1; /* pat\177 */
+ if (np->name != NULL)
+ total += strlen (np->name) + 1; /* name\001 */
+ total += number_len ((long) np->lno) + 1; /* lno, */
+ if (np->cno != invalidcharno) /* cno */
+ total += number_len (np->cno);
+ total += 1; /* newline */
+ }
return total;
}
fdp = np->fdp;
fprintf (tagf, "\f\n%s,%d\n",
fdp->taggedfname, total_size_of_entries (np));
+ fdp->written = TRUE;
}
- fputs (np->pat, tagf);
+ fputs (np->regex, tagf);
fputc ('\177', tagf);
if (np->name != NULL)
{
np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
else
fprintf (stdout, "%-16s %3d %-16s %s\n",
- np->name, np->lno, np->fdp->taggedfname, np->pat);
+ np->name, np->lno, np->fdp->taggedfname, np->regex);
}
else
{
putc (searchar, tagf);
putc ('^', tagf);
- for (sp = np->pat; *sp; sp++)
+ for (sp = np->regex; *sp; sp++)
{
if (*sp == '\\' || *sp == searchar)
putc ('\\', tagf);
*/
static struct tok
{
- bool valid;
- bool named;
- int offset;
- int length;
- int lineno;
- long linepos;
- char *line;
+ char *line; /* string containing the token */
+ int offset; /* where the token starts in LINE */
+ int length; /* token length */
+ /*
+ The previous members can be used to pass strings around for generic
+ purposes. The following ones specifically refer to creating tags. In this
+ case the token contained here is the pattern that will be used to create a
+ tag.
+ */
+ bool valid; /* do not create a tag; the token should be
+ invalidated whenever a state machine is
+ reset prematurely */
+ bool named; /* create a named tag */
+ int lineno; /* source line number of tag */
+ long linepos; /* source char number of tag */
} token; /* latest token read */
-static linebuffer token_name; /* its name */
/*
* Variables and functions for dealing with nested structures.
} cstack; /* stack for nested declaration tags */
/* Current struct nesting depth (namespace, class, struct, union, enum). */
#define nestlev (cstack.nl)
-/* After struct keyword or in struct body, not inside an nested function. */
+/* After struct keyword or in struct body, not inside a nested function. */
#define instruct (structdef == snone && nestlev > 0 \
&& cblev == cstack.cblev[nestlev-1] + 1)
return FALSE;
case st_C_template:
case st_C_class:
- if (cblev == 0
- && (*c_extp & C_AUTO) /* automatic detection of C++ language */
+ if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
+ && cblev == 0
&& definedef == dnone && structdef == snone
&& typdef == tnone && fvdef == fvnone)
*c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
fvextern = TRUE;
/* FALLTHRU */
case st_C_typespec:
- if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
- fvdef = fvnone; /* should be useless */
+ switch (fvdef)
+ {
+ case finlist:
+ case flistseen:
+ case fignore:
+ case vignore:
+ break;
+ default:
+ fvdef = fvnone;
+ }
return FALSE;
case st_C_ignore:
fvextern = FALSE;
fvdef = vignore;
return FALSE;
}
- if ((*c_extp & C_PLPL) && strneq (str+len-10, "::operator", 10))
+ if (len >= 10 && strneq (str+len-10, "::operator", 10))
{
+ if (*c_extp & C_AUTO) /* automatic detection of C++ */
+ *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
fvdef = foperator;
*is_func_or_var = TRUE;
return TRUE;
#define curlinepos (lbs[curndx].linepos)
#define newlinepos (lbs[newndx].linepos)
+#define plainc ((c_ext & C_EXT) == C_PLAIN)
+#define cplpl (c_ext & C_PLPL)
+#define cjava ((c_ext & C_JAVA) == C_JAVA)
+
#define CNL_SAVE_DEFINEDEF() \
do { \
curlinepos = charno; \
{
/* This function should never be called when token.valid is FALSE, but
we must protect against invalid input or internal errors. */
- if (DEBUG || token.valid)
- {
- if (traditional_tag_style)
- {
- /* This was the original code. Now we call new_pfnote instead,
- which uses the new method for naming tags (see new_pfnote). */
- char *name = NULL;
+ if (!DEBUG && !token.valid)
+ return;
- if (CTAGS || token.named)
- name = savestr (token_name.buffer);
- if (DEBUG && !token.valid)
- {
- if (token.named)
- name = concat (name, "##invalid##", "");
- else
- name = savestr ("##invalid##");
- }
- pfnote (name, isfun, token.line,
- token.offset+token.length+1, token.lineno, token.linepos);
- }
- else
- new_pfnote (token_name.buffer, token_name.len, isfun, token.line,
- token.offset+token.length+1, token.lineno, token.linepos);
- token.valid = FALSE;
- }
+ if (token.valid)
+ make_tag (token_name.buffer, token_name.len, isfun, token.line,
+ token.offset+token.length+1, token.lineno, token.linepos);
+ else /* this case is optimised away if !DEBUG */
+ make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
+ token_name.len + 17, isfun, token.line,
+ token.offset+token.length+1, token.lineno, token.linepos);
+
+ token.valid = FALSE;
}
int parlev; /* current parenthesis level */
int typdefcblev; /* cblev where a typedef struct body begun */
bool incomm, inquote, inchar, quotednl, midtoken;
- bool cplpl, cjava;
bool yacc_rules; /* in the rules part of a yacc file */
struct tok savetoken; /* token saved during preprocessor handling */
- initbuffer (&token_name);
- initbuffer (&lbs[0].lb);
- initbuffer (&lbs[1].lb);
+ linebuffer_init (&lbs[0].lb);
+ linebuffer_init (&lbs[1].lb);
if (cstack.size == 0)
{
cstack.size = (DEBUG) ? 1 : 4;
token.valid = savetoken.valid = FALSE;
cblev = 0;
parlev = 0;
- cplpl = (c_ext & C_PLPL) == C_PLPL;
- cjava = (c_ext & C_JAVA) == C_JAVA;
if (cjava)
{ qualifier = "."; qlen = 1; }
else
{
if (endtoken (c))
{
- if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
+ if (c == ':' && *lp == ':' && begtoken (lp[1]))
+ /* This handles :: in the middle,
+ but not at the beginning of an identifier.
+ Also, space-separated :: is not recognised. */
{
- /*
- * This handles :: in the middle, but not at the
- * beginning of an identifier. Also, space-separated
- * :: is not recognised.
- */
+ if (c_ext & C_AUTO) /* automatic detection of C++ */
+ c_ext = (c_ext | C_PLPL) & ~C_AUTO;
lp += 2;
toklen += 2;
c = lp[-1];
toklen += lp - oldlp;
}
token.named = FALSE;
- if ((c_ext & C_EXT) /* not pure C */
+ if (!plainc
&& nestlev > 0 && definedef == dnone)
/* in struct body */
{
fvdef = finlist;
continue;
case flistseen:
- make_C_tag (TRUE); /* a function */
- fvdef = fignore;
+ if (plainc || declarations)
+ {
+ make_C_tag (TRUE); /* a function */
+ fvdef = fignore;
+ }
break;
case fvnameseen:
fvdef = fvnone;
break;
}
if (structdef == stagseen)
- structdef = scolonseen;
+ {
+ structdef = scolonseen;
+ break;
+ }
+ /* Should be useless, but may be work as a safety net. */
+ if (cplpl && fvdef == flistseen)
+ {
+ make_C_tag (TRUE); /* a function */
+ fvdef = fignore;
+ break;
+ }
break;
case ';':
if (definedef != dnone)
switch (fvdef)
{
case fignore:
- if (typdef == tignore)
+ if (typdef == tignore || cplpl)
fvdef = fvnone;
break;
case fvnameseen:
token.valid = FALSE;
break;
case flistseen:
- if ((declarations && typdef == tnone && !instruct)
- || (members && typdef != tignore && instruct))
+ if (declarations
+ && (typdef == tnone || (typdef != tignore && instruct)))
make_C_tag (TRUE); /* a function declaration */
/* FALLTHRU */
default:
fvextern = FALSE;
fvdef = fvnone;
if (declarations
- && structdef == stagseen && (c_ext & C_PLPL))
+ && cplpl && structdef == stagseen)
make_C_tag (FALSE); /* forward declaration */
else
- /* The following instruction invalidates the token.
- Probably the token should be invalidated in all other
- cases where some state machine is reset prematurely. */
token.valid = FALSE;
} /* switch (fvdef) */
/* FALLTHRU */
if (definedef != dnone)
break;
if (fvdef == fstartlist)
- fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
+ {
+ fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
+ token.valid = FALSE;
+ }
break;
case '}':
if (definedef != dnone)
break;
- if (!noindentypedefs && lp == newlb.buffer + 1)
+ if (!ignoreindent && lp == newlb.buffer + 1)
{
+ if (cblev != 0)
+ token.valid = FALSE;
cblev = 0; /* reset curly brace level if first column */
parlev = 0; /* also reset paren level, just in case... */
}
else if (cblev > 0)
cblev--;
+ else
+ token.valid = FALSE; /* something gone amiss, token unreliable */
popclass_above (cblev);
structdef = snone;
/* Only if typdef == tinbody is typdefcblev significant. */
} /* while not eof */
- free (token_name.buffer);
free (lbs[0].lb.buffer);
free (lbs[1].lb.buffer);
}
#define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
for (; /* loop initialization */ \
!feof (file_pointer) /* loop test */ \
- && (char_pointer = lb.buffer, /* instructions at start of loop */ \
- readline (&line_buffer, file_pointer), \
+ && /* instructions at start of loop */ \
+ (readline (&line_buffer, file_pointer), \
+ char_pointer = line_buffer.buffer, \
TRUE); \
)
#define LOOKING_AT(cp, keyword) /* keyword is a constant string */ \
return;
for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
continue;
- pfnote (savenstr (dbp, cp-dbp), TRUE,
- lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+ make_tag (dbp, cp-dbp, TRUE,
+ lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
}
{
dbp = skip_spaces (dbp);
if (*dbp == '\0') /* assume un-named */
- pfnote (savestr ("blockdata"), TRUE,
- lb.buffer, dbp - lb.buffer, lineno, linecharno);
+ make_tag ("blockdata", 9, TRUE,
+ lb.buffer, dbp - lb.buffer, lineno, linecharno);
else
F_getit (inf); /* look for name */
}
/*
* Ada parsing
* Original code by
- * Philippe Waroquiers <philippe.waroquiers@eurocontrol.be> (1998)
+ * Philippe Waroquiers (1998)
*/
static void Ada_getit __P((FILE *, char *));
*cp = '\0';
name = concat (dbp, name_qualifier, "");
*cp = c;
- pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+ make_tag (name, strlen (name), TRUE,
+ lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+ free (name);
if (c == '"')
dbp = cp + 1;
return;
FILE *inf;
{
bool inquote = FALSE;
+ bool skip_till_semicolumn = FALSE;
LOOP_ON_INPUT_LINES (inf, lb, dbp)
{
continue;
}
+ if (skip_till_semicolumn)
+ {
+ if (*dbp == ';')
+ skip_till_semicolumn = FALSE;
+ dbp++;
+ continue; /* advance char */
+ }
+
/* Search for beginning of a token. */
if (!begtoken (*dbp))
{
else
break; /* from switch */
continue; /* advance char */
+
+ case 'u':
+ if (typedefs && !packages_only && nocase_tail ("use"))
+ {
+ /* when tagging types, avoid tagging use type Pack.Typename;
+ for this, we will skip everything till a ; */
+ skip_till_semicolumn = TRUE;
+ continue; /* advance char */
+ }
+
case 't':
if (!packages_only && nocase_tail ("task"))
Ada_getit (inf, "/k");
while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
cp++;
if (*cp == ':' || iswhite (*cp))
- {
- /* Found end of label, so copy it and add it to the table. */
- pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE,
+ /* Found end of label, so copy it and add it to the table. */
+ make_tag (lb.buffer, cp - lb.buffer, TRUE,
lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
- }
}
}
}
if (LOOKING_AT (cp, "package"))
{
free (package);
- package = get_tag (cp);
- if (package == NULL) /* can't parse package name */
- package = savestr ("");
- else
- package = savestr(package); /* make a copy */
+ get_tag (cp, &package);
}
else if (LOOKING_AT (cp, "sub"))
{
- char *name, *fullname, *pos;
+ char *pos;
char *sp = cp;
while (!notinname (*cp))
cp++;
if (cp == sp)
- continue;
- name = savenstr (sp, cp-sp);
- if ((pos = etags_strchr (name, ':')) != NULL && pos[1] == ':')
- fullname = name;
+ continue; /* nothing found */
+ if ((pos = etags_strchr (sp, ':')) != NULL
+ && pos < cp && pos[1] == ':')
+ /* The name is already qualified. */
+ make_tag (sp, cp - sp, TRUE,
+ lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
else
- fullname = concat (package, "::", name);
- pfnote (fullname, TRUE,
- lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
- if (name != fullname)
- free (name);
+ /* Qualify it. */
+ {
+ char savechar, *name;
+
+ savechar = *cp;
+ *cp = '\0';
+ name = concat (package, "::", sp);
+ *cp = savechar;
+ make_tag (name, strlen(name), TRUE,
+ lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+ free (name);
+ }
}
- else if (globals /* only if tagging global vars is enabled */
- && (LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local")))
+ else if (globals) /* only if we are tagging global vars */
{
+ /* Skip a qualifier, if any. */
+ bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
/* After "my" or "local", but before any following paren or space. */
- char *varname = NULL;
+ char *varstart = cp;
- if (*cp == '$' || *cp == '@' || *cp == '%')
+ if (qual /* should this be removed? If yes, how? */
+ && (*cp == '$' || *cp == '@' || *cp == '%'))
{
- char* varstart = ++cp;
- while (ISALNUM (*cp) || *cp == '_')
+ varstart += 1;
+ do
cp++;
- varname = savenstr (varstart, cp-varstart);
+ while (ISALNUM (*cp) || *cp == '_');
}
- else
+ else if (qual)
{
/* Should be examining a variable list at this point;
could insist on seeing an open parenthesis. */
while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
cp++;
}
+ else
+ continue;
- /* Perhaps I should back cp up one character, so the TAGS table
- doesn't mention (and so depend upon) the following char. */
- pfnote (varname, FALSE,
- lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+ make_tag (varstart, cp - varstart, FALSE,
+ lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
}
}
}
char *name = cp;
while (!notinname (*cp) && *cp != ':')
cp++;
- pfnote (savenstr (name, cp-name), TRUE,
- lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+ make_tag (name, cp - name, TRUE,
+ lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
}
}
}
{
while (!notinname (*cp))
cp++;
- pfnote (savenstr (name, cp-name), TRUE,
- lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+ make_tag (name, cp - name, TRUE,
+ lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
search_identifier = FALSE;
}
else if (LOOKING_AT (cp, "function"))
name = cp;
while (!notinname (*cp))
cp++;
- pfnote (savenstr (name, cp-name), TRUE,
- lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+ make_tag (name, cp - name, TRUE,
+ lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
}
else
search_identifier = TRUE;
name = cp;
while (*cp != '\0' && !iswhite (*cp))
cp++;
- pfnote (savenstr (name, cp-name), FALSE,
- lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+ make_tag (name, cp - name, FALSE,
+ lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
}
else
search_identifier = TRUE;
name = cp;
while (*cp != quote && *cp != '\0')
cp++;
- pfnote (savenstr (name, cp-name), FALSE,
- lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+ make_tag (name, cp - name, FALSE,
+ lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
}
else if (members
&& LOOKING_AT (cp, "var")
name = cp;
while (!notinname(*cp))
cp++;
- pfnote (savenstr (name, cp-name), FALSE,
- lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+ make_tag (name, cp - name, FALSE,
+ lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
}
}
}
for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
continue;
if (*ep++ == '.')
- pfnote (savenstr (bp, ep-bp), TRUE,
- lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
+ make_tag (bp, ep - bp, TRUE,
+ lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
}
}
\f
/*
* Makefile support
- * Idea by Assar Westerlund <assar@sics.se> (2001)
+ * Ideas by Assar Westerlund <assar@sics.se> (2001)
*/
static void
Makefile_targets (inf)
continue;
while (*bp != '\0' && *bp != '=' && *bp != ':')
bp++;
- if (*bp == ':')
- pfnote (savenstr (lb.buffer, bp - lb.buffer), TRUE,
- lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
+ if (*bp == ':' || (globals && *bp == '='))
+ make_tag (lb.buffer, bp - lb.buffer, TRUE,
+ lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
}
}
{
linebuffer tline; /* mostly copied from C_entries */
long save_lcno;
- int save_lineno, save_len;
- char c, *cp, *namebuf;
+ int save_lineno, namelen, taglen;
+ char c, *name;
bool /* each of these flags is TRUE iff: */
incomment, /* point is inside a comment */
is a FORWARD/EXTERN to be ignored, or
whether it is a real tag */
- save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */
- namebuf = NULL; /* keep compiler quiet */
+ save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
+ name = NULL; /* keep compiler quiet */
dbp = lb.buffer;
*dbp = '\0';
- initbuffer (&tline);
+ linebuffer_init (&tline);
incomment = inquote = FALSE;
found_tag = FALSE; /* have a proc name; check if extern */
- get_tagname = FALSE; /* have found "procedure" keyword */
+ get_tagname = FALSE; /* found "procedure" keyword */
inparms = FALSE; /* found '(' after "proc" */
verify_tag = FALSE; /* check if "extern" is ahead */
}
if (found_tag && verify_tag && (*dbp != ' '))
{
- /* check if this is an "extern" declaration */
+ /* Check if this is an "extern" declaration. */
if (*dbp == '\0')
continue;
if (lowcase (*dbp == 'e'))
}
else if (lowcase (*dbp) == 'f')
{
- if (nocase_tail ("forward")) /* check for forward reference */
+ if (nocase_tail ("forward")) /* check for forward reference */
{
found_tag = FALSE;
verify_tag = FALSE;
{
found_tag = FALSE;
verify_tag = FALSE;
- pfnote (namebuf, TRUE,
- tline.buffer, save_len, save_lineno, save_lcno);
+ make_tag (name, namelen, TRUE,
+ tline.buffer, taglen, save_lineno, save_lcno);
continue;
}
}
if (get_tagname) /* grab name of proc or fn */
{
+ char *cp;
+
if (*dbp == '\0')
continue;
- /* save all values for later tagging */
+ /* Find block name. */
+ for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
+ continue;
+
+ /* Save all values for later tagging. */
linebuffer_setlen (&tline, lb.len);
strcpy (tline.buffer, lb.buffer);
save_lineno = lineno;
save_lcno = linecharno;
+ name = tline.buffer + (dbp - lb.buffer);
+ namelen = cp - dbp;
+ taglen = cp - lb.buffer + 1;
- /* grab block name */
- for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
- continue;
- namebuf = savenstr (dbp, cp-dbp);
dbp = cp; /* set dbp to e-o-token */
- save_len = dbp - lb.buffer + 1;
get_tagname = FALSE;
found_tag = TRUE;
continue;
- /* and proceed to check for "extern" */
+ /* And proceed to check for "extern". */
}
else if (!incomment && !inquote && !found_tag)
{
- /* check for proc/fn keywords */
+ /* Check for proc/fn keywords. */
switch (lowcase (c))
{
case 'p':
continue;
}
}
- } /* while not eof */
+ } /* while not eof */
free (tline.buffer);
}
/* Ok, then skip "(" before name in (defstruct (foo)) */
dbp = skip_spaces (dbp);
}
- get_tag (dbp);
+ get_tag (dbp, NULL);
}
static void
* Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
*/
static void
-Postscript_functions (inf)
+PS_functions (inf)
FILE *inf;
{
register char *bp, *ep;
*ep != '\0' && *ep != ' ' && *ep != '{';
ep++)
continue;
- pfnote (savenstr (bp, ep-bp), TRUE,
- lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
+ make_tag (bp, ep - bp, TRUE,
+ lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
}
else if (LOOKING_AT (bp, "defineps"))
- get_tag (bp);
+ get_tag (bp, NULL);
}
}
/* Skip over open parens and white space */
while (notinname (*bp))
bp++;
- get_tag (bp);
+ get_tag (bp, NULL);
}
if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
- get_tag (bp);
+ get_tag (bp, NULL);
}
}
if (strneq (cp, key->buffer, key->len))
{
register char *p;
- char *name;
- int linelen;
+ int namelen, linelen;
bool opgrp = FALSE;
cp = skip_spaces (cp + key->len);
*p != TEX_opgrp && *p != TEX_clgrp);
p++)
continue;
- name = savenstr (cp, p-cp);
+ namelen = p - cp;
linelen = lb.len;
if (!opgrp || *p == TEX_clgrp)
{
*p++;
linelen = p - lb.buffer + 1;
}
- pfnote (name, TRUE, lb.buffer, linelen, lineno, linecharno);
+ make_tag (cp, namelen, TRUE,
+ lb.buffer, linelen, lineno, linecharno);
goto tex_next_line; /* We only tag a line once */
}
}
start = cp;
while (*cp != '\0' && *cp != ',')
cp++;
- pfnote (savenstr (start, cp - start), TRUE,
- lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+ make_tag (start, cp - start, TRUE,
+ lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+ }
+}
+
+\f
+/* Similar to LOOKING_AT but does not use notinname, does not skip */
+#define LOOKING_AT_NOCASE(cp, kw) /* kw is a constant string */ \
+ (strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
+ && ((cp) += sizeof(kw)-1)) /* skip spaces */
+
+/*
+ * HTML support.
+ * Contents of <title>, <h1>, <h2>, <h3> are tags.
+ * Contents of <a name=xxx> are tags with name xxx.
+ *
+ * Francesco Potortì, 2002.
+ */
+static void
+HTML_labels (inf)
+ FILE * inf;
+{
+ bool getnext = FALSE; /* next text outside of HTML tags is a tag */
+ bool skiptag = FALSE; /* skip to the end of the current HTML tag */
+ bool intag = FALSE; /* inside an html tag, looking for ID= */
+ bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
+ char *end;
+
+
+ linebuffer_setlen (&token_name, 0); /* no name in buffer */
+
+ LOOP_ON_INPUT_LINES (inf, lb, dbp)
+ for (;;) /* loop on the same line */
+ {
+ if (skiptag) /* skip HTML tag */
+ {
+ while (*dbp != '\0' && *dbp != '>')
+ dbp++;
+ if (*dbp == '>')
+ {
+ dbp += 1;
+ skiptag = FALSE;
+ continue; /* look on the same line */
+ }
+ break; /* go to next line */
+ }
+
+ else if (intag) /* look for "name=" or "id=" */
+ {
+ while (*dbp != '\0' && *dbp != '>'
+ && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
+ dbp++;
+ if (*dbp == '\0')
+ break; /* go to next line */
+ if (*dbp == '>')
+ {
+ dbp += 1;
+ intag = FALSE;
+ continue; /* look on the same line */
+ }
+ if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
+ || LOOKING_AT_NOCASE (dbp, "id="))
+ {
+ bool quoted = (dbp[0] == '"');
+
+ if (quoted)
+ for (end = ++dbp; *end != '\0' && *end != '"'; end++)
+ continue;
+ else
+ for (end = dbp; *end != '\0' && intoken (*end); end++)
+ continue;
+ linebuffer_setlen (&token_name, end - dbp);
+ strncpy (token_name.buffer, dbp, end - dbp);
+ token_name.buffer[end - dbp] = '\0';
+
+ dbp = end;
+ intag = FALSE; /* we found what we looked for */
+ skiptag = TRUE; /* skip to the end of the tag */
+ getnext = TRUE; /* then grab the text */
+ continue; /* look on the same line */
+ }
+ dbp += 1;
+ }
+
+ else if (getnext) /* grab next tokens and tag them */
+ {
+ dbp = skip_spaces (dbp);
+ if (*dbp == '\0')
+ break; /* go to next line */
+ if (*dbp == '<')
+ {
+ intag = TRUE;
+ inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
+ continue; /* look on the same line */
+ }
+
+ for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
+ continue;
+ make_tag (token_name.buffer, token_name.len, TRUE,
+ dbp, end - dbp, lineno, linecharno);
+ linebuffer_setlen (&token_name, 0); /* no name in buffer */
+ getnext = FALSE;
+ break; /* go to next line */
+ }
+
+ else /* look for an interesting HTML tag */
+ {
+ while (*dbp != '\0' && *dbp != '<')
+ dbp++;
+ if (*dbp == '\0')
+ break; /* go to next line */
+ intag = TRUE;
+ if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
+ {
+ inanchor = TRUE;
+ continue; /* look on the same line */
+ }
+ else if (LOOKING_AT_NOCASE (dbp, "<title>")
+ || LOOKING_AT_NOCASE (dbp, "<h1>")
+ || LOOKING_AT_NOCASE (dbp, "<h2>")
+ || LOOKING_AT_NOCASE (dbp, "<h3>"))
+ {
+ intag = FALSE;
+ getnext = TRUE;
+ continue; /* look on the same line */
+ }
+ dbp += 1;
+ }
}
}
|| len != strlen (last)
|| !strneq (s, last, len)))
{
- pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
+ make_tag (s, len, TRUE, s, pos, lineno, linecharno);
return len;
}
else
*/
static int erlang_func __P((char *, char *));
static void erlang_attribute __P((char *));
-static int erlang_atom __P((char *, int));
+static int erlang_atom __P((char *));
static void
Erlang_functions (inf)
int pos;
int len;
- pos = erlang_atom (s, 0);
+ pos = erlang_atom (s);
if (pos < 1)
return 0;
|| len != (int)strlen (last)
|| !strneq (s, last, len)))
{
- pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
+ make_tag (s, len, TRUE, s, pos, lineno, linecharno);
return len;
}
erlang_attribute (s)
char *s;
{
- int pos;
- int len;
+ char *cp = s;
- if (LOOKING_AT (s, "-define") || LOOKING_AT (s, "-record"))
+ if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
+ && *cp++ == '(')
{
- if (s[pos++] == '(')
- {
- pos = skip_spaces (s + pos) - s;
- len = erlang_atom (s, pos);
- if (len != 0)
- pfnote (savenstr (& s[pos], len), TRUE,
- s, pos + len, lineno, linecharno);
- }
+ int len = erlang_atom (skip_spaces (cp));
+ if (len > 0)
+ make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
}
return;
}
* Return the number of bytes consumed, or -1 if there was an error.
*/
static int
-erlang_atom (s, pos)
+erlang_atom (s)
char *s;
- int pos;
{
- int origpos;
-
- origpos = pos;
+ int pos = 0;
if (ISALPHA (s[pos]) || s[pos] == '_')
{
/* The atom is unquoted. */
- pos++;
- while (ISALNUM (s[pos]) || s[pos] == '_')
+ do
pos++;
- return pos - origpos;
+ while (ISALNUM (s[pos]) || s[pos] == '_');
}
else if (s[pos] == '\'')
{
+ for (pos++; s[pos] != '\''; pos++)
+ if (s[pos] == '\0' /* multiline quoted atoms are ignored */
+ || (s[pos] == '\\' && s[++pos] == '\0'))
+ return 0;
pos++;
-
- for (;;)
- {
- if (s[pos] == '\'')
- {
- pos++;
- break;
- }
- else if (s[pos] == '\0')
- /* Multiline quoted atoms are ignored. */
- return -1;
- else if (s[pos] == '\\')
- {
- if (s[pos+1] == '\0')
- return -1;
- pos += 2;
- }
- else
- pos++;
- }
- return pos - origpos;
}
- else
- return -1;
+
+ return pos;
}
\f
#ifdef ETAGS_REGEXPS
static char *scan_separators __P((char *));
-static void analyse_regex __P((char *, bool));
-static void add_regex __P((char *, bool, language *));
+static void add_regex __P((char *, language *));
static char *substitute __P((char *, char *, struct re_registers *));
-/* Take a string like "/blah/" and turn it into "blah", making sure
- that the first and last characters are the same, and handling
- quoted separator characters. Actually, stops on the occurrence of
- an unquoted separator. Also turns "\t" into a Tab character, and
- similarly for all character escape sequences supported by Gcc.
- Returns pointer to terminating separator. Works in place. Null
- terminates name string. */
+/*
+ * Take a string like "/blah/" and turn it into "blah", verifying
+ * that the first and last characters are the same, and handling
+ * quoted separator characters. Actually, stops on the occurrence of
+ * an unquoted separator. Also process \t, \n, etc. and turn into
+ * appropriate characters. Works in place. Null terminates name string.
+ * Returns pointer to terminating separator, or NULL for
+ * unterminated regexps.
+ */
static char *
scan_separators (name)
char *name;
{
switch (*name)
{
- case 'a': *copyto++ = '\007'; break;
- case 'b': *copyto++ = '\b'; break;
- case 'd': *copyto++ = 0177; break;
- case 'e': *copyto++ = 033; break;
- case 'f': *copyto++ = '\f'; break;
- case 'n': *copyto++ = '\n'; break;
- case 'r': *copyto++ = '\r'; break;
- case 't': *copyto++ = '\t'; break;
- case 'v': *copyto++ = '\v'; break;
+ case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
+ case 'b': *copyto++ = '\b'; break; /* BS (back space) */
+ case 'd': *copyto++ = 0177; break; /* DEL (delete) */
+ case 'e': *copyto++ = 033; break; /* ESC (delete) */
+ case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
+ case 'n': *copyto++ = '\n'; break; /* NL (new line) */
+ case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
+ case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
+ case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
default:
if (*name == sep)
*copyto++ = sep;
else
*copyto++ = *name;
}
+ if (*name != sep)
+ name = NULL; /* signal unterminated regexp */
/* Terminate copied string. */
*copyto = '\0';
/* Look at the argument of --regex or --no-regex and do the right
thing. Same for each line of a regexp file. */
static void
-analyse_regex (regex_arg, ignore_case)
+analyse_regex (regex_arg)
char *regex_arg;
- bool ignore_case;
{
if (regex_arg == NULL)
{
- free_patterns (); /* --no-regex: remove existing regexps */
+ free_regexps (); /* --no-regex: remove existing regexps */
return;
}
pfatal (regexfile);
return;
}
- initbuffer (®exbuf);
+ linebuffer_init (®exbuf);
while (readline_internal (®exbuf, regexfp) > 0)
- analyse_regex (regexbuf.buffer, ignore_case);
+ analyse_regex (regexbuf.buffer);
free (regexbuf.buffer);
fclose (regexfp);
}
error ("unterminated language name in regex: %s", regex_arg);
return;
}
- *cp = '\0';
+ *cp++ = '\0';
lang = get_language_from_langname (lang_name);
if (lang == NULL)
return;
- add_regex (cp + 1, ignore_case, lang);
+ add_regex (cp, lang);
}
break;
/* Regexp to be used for any language. */
default:
- add_regex (regex_arg, ignore_case, NULL);
+ add_regex (regex_arg, NULL);
break;
}
}
-/* Turn a name, which is an ed-style (but Emacs syntax) regular
- expression, into a real regular expression by compiling it. */
+/* Separate the regexp pattern, compile it,
+ and care for optional name and modifiers. */
static void
-add_regex (regexp_pattern, ignore_case, lang)
+add_regex (regexp_pattern, lang)
char *regexp_pattern;
- bool ignore_case;
language *lang;
{
static struct re_pattern_buffer zeropattern;
- char *name;
+ char sep, *pat, *name, *modifiers;
const char *err;
struct re_pattern_buffer *patbuf;
- pattern *pp;
+ regexp *rp;
+ bool
+ force_explicit_name = TRUE, /* do not use implicit tag names */
+ ignore_case = FALSE, /* case is significant */
+ multi_line = FALSE, /* matches are done one line at a time */
+ single_line = FALSE; /* dot does not match newline */
- if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
+ if (strlen(regexp_pattern) < 3)
{
- error ("%s: unterminated regexp", regexp_pattern);
+ error ("null regexp", (char *)NULL);
return;
}
+ sep = regexp_pattern[0];
name = scan_separators (regexp_pattern);
- if (regexp_pattern[0] == '\0')
+ if (name == NULL)
{
- error ("null regexp", (char *)NULL);
+ error ("%s: unterminated regexp", regexp_pattern);
return;
}
- (void) scan_separators (name);
+ if (name[1] == sep)
+ {
+ error ("null name for regexp \"%s\"", regexp_pattern);
+ return;
+ }
+ modifiers = scan_separators (name);
+ if (modifiers == NULL) /* no terminating separator --> no name */
+ {
+ modifiers = name;
+ name = "";
+ }
+ else
+ modifiers += 1; /* skip separator */
+
+ /* Parse regex modifiers. */
+ for (; modifiers[0] != '\0'; modifiers++)
+ switch (modifiers[0])
+ {
+ case 'N':
+ if (modifiers == name)
+ error ("forcing explicit tag name but no name, ignoring", NULL);
+ force_explicit_name = TRUE;
+ break;
+ case 'i':
+ ignore_case = TRUE;
+ break;
+ case 's':
+ single_line = TRUE;
+ /* FALLTHRU */
+ case 'm':
+ multi_line = TRUE;
+ need_filebuf = TRUE;
+ break;
+ default:
+ {
+ char wrongmod [2];
+ wrongmod[0] = modifiers[0];
+ wrongmod[1] = '\0';
+ error ("invalid regexp modifier `%s', ignoring", wrongmod);
+ }
+ break;
+ }
patbuf = xnew (1, struct re_pattern_buffer);
*patbuf = zeropattern;
if (ignore_case)
- patbuf->translate = lc_trans; /* translation table to fold case */
+ {
+ static char lc_trans[CHARS];
+ int i;
+ for (i = 0; i < CHARS; i++)
+ lc_trans[i] = lowcase (i);
+ patbuf->translate = lc_trans; /* translation table to fold case */
+ }
+
+ if (multi_line)
+ pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
+ else
+ pat = regexp_pattern;
- err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
+ if (single_line)
+ re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
+ else
+ re_set_syntax (RE_SYNTAX_EMACS);
+
+ err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
+ if (multi_line)
+ free (pat);
if (err != NULL)
{
error ("%s while compiling pattern", err);
return;
}
- pp = p_head;
- p_head = xnew (1, pattern);
- p_head->regex = savestr (regexp_pattern);
- p_head->p_next = pp;
+ rp = p_head;
+ p_head = xnew (1, regexp);
+ p_head->pattern = savestr (regexp_pattern);
+ p_head->p_next = rp;
p_head->lang = lang;
p_head->pat = patbuf;
- p_head->name_pattern = savestr (name);
+ p_head->name = savestr (name);
p_head->error_signaled = FALSE;
+ p_head->force_explicit_name = force_explicit_name;
p_head->ignore_case = ignore_case;
+ p_head->multi_line = multi_line;
}
/*
size -= 1;
/* Allocate space and do the substitutions. */
+ assert (size >= 0);
result = xnew (size + 1, char);
for (t = result; *out != '\0'; out++)
*t++ = *out;
*t = '\0';
- assert (t <= result + size && t - result == (int)strlen (result));
+ assert (t <= result + size);
+ assert (t - result == (int)strlen (result));
return result;
}
-/* Deallocate all patterns. */
+/* Deallocate all regexps. */
static void
-free_patterns ()
+free_regexps ()
{
- pattern *pp;
+ regexp *rp;
while (p_head != NULL)
{
- pp = p_head->p_next;
- free (p_head->regex);
- free (p_head->name_pattern);
+ rp = p_head->p_next;
+ free (p_head->pattern);
+ free (p_head->name);
free (p_head);
- p_head = pp;
+ p_head = rp;
}
return;
}
+
+/*
+ * Reads the whole file as a single string from `filebuf' and looks for
+ * multi-line regular expressions, creating tags on matches.
+ * readline already dealt with normal regexps.
+ *
+ * Idea by Ben Wing <ben@666.com> (2002).
+ */
+static void
+regex_tag_multiline ()
+{
+ char *buffer = filebuf.buffer;
+ regexp *rp;
+ char *name;
+
+ for (rp = p_head; rp != NULL; rp = rp->p_next)
+ {
+ int match = 0;
+
+ if (!rp->multi_line)
+ continue; /* skip normal regexps */
+
+ /* Generic initialisations before parsing file from memory. */
+ lineno = 1; /* reset global line number */
+ charno = 0; /* reset global char number */
+ linecharno = 0; /* reset global char number of line start */
+
+ /* Only use generic regexps or those for the current language. */
+ if (rp->lang != NULL && rp->lang != curfdp->lang)
+ continue;
+
+ while (match >= 0 && match < filebuf.len)
+ {
+ match = re_search (rp->pat, buffer, filebuf.len, charno,
+ filebuf.len - match, &rp->regs);
+ switch (match)
+ {
+ case -2:
+ /* Some error. */
+ if (!rp->error_signaled)
+ {
+ error ("regexp stack overflow while matching \"%s\"",
+ rp->pattern);
+ rp->error_signaled = TRUE;
+ }
+ break;
+ case -1:
+ /* No match. */
+ break;
+ default:
+ if (match == rp->regs.end[0])
+ {
+ if (!rp->error_signaled)
+ {
+ error ("regexp matches the empty string: \"%s\"",
+ rp->pattern);
+ rp->error_signaled = TRUE;
+ }
+ match = -3; /* exit from while loop */
+ break;
+ }
+
+ /* Match occurred. Construct a tag. */
+ while (charno < rp->regs.end[0])
+ if (buffer[charno++] == '\n')
+ lineno++, linecharno = charno;
+ name = rp->name;
+ if (name[0] == '\0')
+ name = NULL;
+ else /* make a named tag */
+ name = substitute (buffer, rp->name, &rp->regs);
+ if (rp->force_explicit_name)
+ /* Force explicit tag name, if a name is there. */
+ pfnote (name, TRUE, buffer + linecharno,
+ charno - linecharno + 1, lineno, linecharno);
+ else
+ make_tag (name, strlen (name), TRUE, buffer + linecharno,
+ charno - linecharno + 1, lineno, linecharno);
+ break;
+ }
+ }
+ }
+}
+
#endif /* ETAGS_REGEXPS */
\f
return FALSE;
}
-static char *
-get_tag (bp)
+static void
+get_tag (bp, namepp)
register char *bp;
+ char **namepp;
{
- register char *cp, *name;
+ register char *cp = bp;
- if (*bp == '\0')
- return NULL;
- /* Go till you get to white space or a syntactic break */
- for (cp = bp + 1; !notinname (*cp); cp++)
- continue;
- name = savenstr (bp, cp-bp);
- pfnote (name, TRUE,
- lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
- return name;
-}
+ if (*bp != '\0')
+ {
+ /* Go till you get to white space or a syntactic break */
+ for (cp = bp + 1; !notinname (*cp); cp++)
+ continue;
+ make_tag (bp, cp - bp, TRUE,
+ lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+ }
-/* Initialize a linebuffer for use */
-static void
-initbuffer (lbp)
- linebuffer *lbp;
-{
- lbp->size = (DEBUG) ? 3 : 200;
- lbp->buffer = xnew (lbp->size, char);
- lbp->buffer[0] = '\0';
- lbp->len = 0;
+ if (namepp != NULL)
+ *namepp = savenstr (bp, cp - bp);
}
/*
* newline or CR-NL, if any. Return the number of characters read from
* `stream', which is the length of the line including the newline.
*
- * On DOS or Windows we do not count the CR character, if any, before the
- * NL, in the returned length; this mirrors the behavior of emacs on those
+ * On DOS or Windows we do not count the CR character, if any before the
+ * NL, in the returned length; this mirrors the behavior of Emacs on those
* platforms (for text files, it translates CR-NL to NL as it reads in the
* file).
+ *
+ * If multi-line regular expressions are requested, each line read is
+ * appended to `filebuf'.
*/
static long
readline_internal (lbp, stream)
}
lbp->len = p - buffer;
+ if (need_filebuf /* we need filebuf for multi-line regexps */
+ && chars_deleted > 0) /* not at EOF */
+ {
+ while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
+ {
+ /* Expand filebuf. */
+ filebuf.size *= 2;
+ xrnew (filebuf.buffer, filebuf.size, char);
+ }
+ strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
+ filebuf.len += lbp->len;
+ filebuf.buffer[filebuf.len++] = '\n';
+ filebuf.buffer[filebuf.len] = '\0';
+ }
+
return lbp->len + chars_deleted;
}
/*
* Like readline_internal, above, but in addition try to match the
- * input line against relevant regular expressions.
+ * input line against relevant regular expressions and manage #line
+ * directives.
*/
static void
readline (lbp, stream)
fdhead->infabsdir = savestr (curfdp->infabsdir);
fdhead->taggedfname = taggedfname;
fdhead->usecharno = FALSE;
+ fdhead->prop = NULL;
+ fdhead->written = FALSE;
curfdp = fdhead;
}
}
{
if (result > 0)
{
- /* Do a tail recursion on ourselves, thus discarding the contents
- of the line buffer. */
+ /* Do a tail recursion on ourselves, thus discarding the contents
+ of the line buffer. */
readline (lbp, stream);
return;
}
#ifdef ETAGS_REGEXPS
{
int match;
- pattern *pp;
+ regexp *rp;
+ char *name;
- /* Match against relevant patterns. */
+ /* Match against relevant regexps. */
if (lbp->len > 0)
- for (pp = p_head; pp != NULL; pp = pp->p_next)
+ for (rp = p_head; rp != NULL; rp = rp->p_next)
{
- /* Only use generic regexps or those for the current language. */
- if (pp->lang != NULL && pp->lang != fdhead->lang)
+ /* Only use generic regexps or those for the current language.
+ Also do not use multiline regexps, which is the job of
+ regex_tag_multiline. */
+ if ((rp->lang != NULL && rp->lang != fdhead->lang)
+ || rp->multi_line)
continue;
- match = re_match (pp->pat, lbp->buffer, lbp->len, 0, &pp->regs);
+ match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
switch (match)
{
case -2:
/* Some error. */
- if (!pp->error_signaled)
+ if (!rp->error_signaled)
{
- error ("error while matching \"%s\"", pp->regex);
- pp->error_signaled = TRUE;
+ error ("regexp stack overflow while matching \"%s\"",
+ rp->pattern);
+ rp->error_signaled = TRUE;
}
break;
case -1:
/* No match. */
break;
- default:
- /* Match occurred. Construct a tag. */
- if (pp->name_pattern[0] != '\0')
+ case 0:
+ /* Empty string matched. */
+ if (!rp->error_signaled)
{
- /* Make a named tag. */
- char *name = substitute (lbp->buffer,
- pp->name_pattern, &pp->regs);
- if (name != NULL)
- pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
+ error ("regexp matches the empty string: \"%s\"", rp->pattern);
+ rp->error_signaled = TRUE;
}
+ break;
+ default:
+ /* Match occurred. Construct a tag. */
+ name = rp->name;
+ if (name[0] == '\0')
+ name = NULL;
+ else /* make a named tag */
+ name = substitute (lbp->buffer, rp->name, &rp->regs);
+ if (rp->force_explicit_name)
+ /* Force explicit tag name, if a name is there. */
+ pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
else
- {
- /* Make an unnamed tag. */
- pfnote ((char *)NULL, TRUE,
+ make_tag (name, strlen (name), TRUE,
lbp->buffer, match, lineno, linecharno);
- }
break;
}
}
}
/*
- * Return TRUE if the two strings are equal, ignoring case for alphabetic
- * characters.
+ * Compare two strings, ignoring case for alphabetic characters.
*
- * Analogous to BSD's strcasecmp, included for portability.
+ * Same as BSD's strcasecmp, included for portability.
*/
-static bool
-strcaseeq (s1, s2)
+static int
+etags_strcasecmp (s1, s2)
register const char *s1;
register const char *s2;
{
: *s1 == *s2))
s1++, s2++;
- return (*s1 == *s2);
+ return (ISALPHA (*s1) && ISALPHA (*s2)
+ ? lowcase (*s1) - lowcase (*s2)
+ : *s1 - *s2);
+}
+
+/*
+ * Compare two strings, ignoring case for alphabetic characters.
+ * Stop after a given number of characters
+ *
+ * Same as BSD's strncasecmp, included for portability.
+ */
+static int
+etags_strncasecmp (s1, s2, n)
+ register const char *s1;
+ register const char *s2;
+ register int n;
+{
+ while (*s1 != '\0' && n-- > 0
+ && (ISALPHA (*s1) && ISALPHA (*s2)
+ ? lowcase (*s1) == lowcase (*s2)
+ : *s1 == *s2))
+ s1++, s2++;
+
+ if (n < 0)
+ return 0;
+ else
+ return (ISALPHA (*s1) && ISALPHA (*s2)
+ ? lowcase (*s1) - lowcase (*s2)
+ : *s1 - *s2);
}
/* Skip spaces, return new pointer. */
char *s1, *s2;
{
error (s1, s2);
- exit (BAD);
+ exit (EXIT_FAILURE);
}
static void
char *s1;
{
perror (s1);
- exit (BAD);
+ exit (EXIT_FAILURE);
}
static void
suggest_asking_for_help ()
{
- fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
- progname,
+
#ifdef LONG_OPTIONS
- "--help"
+fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
+ progname, "--help");
#else
- "-h"
+fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
+ progname, "-h");
#endif
- );
- exit (BAD);
+ exit (EXIT_FAILURE);
}
/* Print error message. `s1' is printf control string, `s2' is arg for it. */
linebuffer path;
FILE *pipe;
- initbuffer (&path);
+ linebuffer_init (&path);
pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
if (pipe == NULL || readline_internal (&path, pipe) == 0)
pfatal ("pwd");
#endif
}
+\f
+/* Initialize a linebuffer for use */
+static void
+linebuffer_init (lbp)
+ linebuffer *lbp;
+{
+ lbp->size = (DEBUG) ? 3 : 200;
+ lbp->buffer = xnew (lbp->size, char);
+ lbp->buffer[0] = '\0';
+ lbp->len = 0;
+}
+
/* Set the minimum size of a string contained in a linebuffer. */
static void
linebuffer_setlen (lbp, toksize)
lbp->len = toksize;
}
-/* Like malloc but get fatal error if memory is exhausted. */
+/* Like malloc but get fatal error if memory is exhausted. */
static PTR
xmalloc (size)
unsigned int size;
* indent-tabs-mode: t
* tab-width: 8
* fill-column: 79
- * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node")
+ * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
* End:
*/
+
+/* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
+ (do not change this comment) */
+
+/* etags.c ends here */