f69879bf9608de818f93dbcfef0a6d855bccc194
[bpt/coccinelle.git] / commons / glimpse.ml
1 open Common
2
3 (*****************************************************************************)
4 (* Types *)
5 (*****************************************************************************)
6 (* was first used for LFS, then a little for cocci, and then for aComment *)
7
8 type glimpse_search =
9 (* -i insensitive search *)
10 | GlimpseCaseInsensitive
11 (* -w match on complete words. But not always good idea, for instance
12 * if file contain chazarain_j then dont work with -w
13 *)
14 | GlimpseWholeWord
15
16 let default_glimpse_search = [GlimpseWholeWord]
17
18 let s_of_glimpse_search = function
19 | GlimpseCaseInsensitive -> "-i"
20 | GlimpseWholeWord -> "-w"
21
22
23 type glimpsedir = Common.dirname
24
25 (*****************************************************************************)
26 (* Helpers *)
27 (*****************************************************************************)
28
29 let check_have_glimpse () =
30 let xs =
31 Common.cmd_to_list ("glimpse -V") +> Common.exclude Common.null_string in
32 (match xs with
33 | ["This is glimpse version 4.18.2, 2006."] -> ()
34 | ["This is glimpse version 4.18.5, 2006."] -> ()
35 | _ -> failwith "glimpse not found or bad version"
36 )
37
38 let s_of_glimpse_options xs =
39 xs +> List.map s_of_glimpse_search +> Common.join " "
40
41
42 (*****************************************************************************)
43 (* Indexing *)
44 (*****************************************************************************)
45
46 (*
47 * note:
48 * - -o or -b for glimpseindex => bigger index, faster search
49 * - no need to use -b with our way to use glimpse
50 * cos we use -l so dont need to know what is the place of the word
51 * in the file
52 * - -f is for incremental indexing. Handle when files are deleted ?
53 * I think that not that bad cos yes certainly in the index there will
54 * have some no-more-valid pointers, but as glimpse actually then do
55 * a real search on the file, he will see that dont exist anymore and
56 * so using -f is slower but very very little slower
57 * - for -z the order is important in .glimpse_filters => put
58 * the case of compressed file first
59 * - -F receive the list of files to index from stdin
60 * - -H target index dir
61 * - -n for indexing numbers as sometimes some glimpse request are looking
62 * for a number
63 *
64 *
65 * Note que glimpseindex index pas forcement tous les fichiers texte.
66 * Si le fichier texte est trop petit, contient par exemple un seul mot,
67 * alors il l'indexe pas. Si veut indexer quand meme, il faudrait ajouter
68 * l'option -E
69 *
70 * command2 "echo '*_backup' > glimpse/.glimpse_exclude";
71 * command2 "echo '*_backup,v' >> glimpse/.glimpse_exclude";
72 *
73 * ex: glimpseindex -o -H . home
74 *
75 *)
76 let glimpse_cmd s = spf "glimpseindex -o -H %s -n -F" s
77
78 let glimpseindex ext dir indexdir =
79 check_have_glimpse ();
80 Common.command2(spf "mkdir -p %s" indexdir);
81 Common.command2
82 (spf "find %s -name \"*.%s\" | %s"
83 dir ext (glimpse_cmd indexdir)
84 );
85 ()
86
87 let _tmpfile = "/tmp/pad_glimpseindex_files.list"
88
89 let glimpseindex_files files indexdir =
90 check_have_glimpse ();
91 Common.command2(spf "mkdir -p %s" indexdir);
92
93 Common.uncat files _tmpfile;
94 Common.command2
95 (spf "cat %s | %s" _tmpfile (glimpse_cmd indexdir));
96 ()
97
98
99 (*****************************************************************************)
100 (* Searching *)
101 (*****************************************************************************)
102
103
104 (* note:
105 * - -y dont ask for prompt
106 * - -N allow far faster search as it does not actually search the file
107 * => when pdf/ps files no filtering done of them => far faster.
108 * the -N fait pas un grep, donc si file deteled ou modified entre temps,
109 * bah il le voit pas. Ca veut dire aussi que si y'a pas -N, et bien
110 * glimpse fait des grep si le fichier a ete modifié entre temps pour
111 * toujours filer quelque chose de valide (pas de false positive, mais
112 * y'a quand meme peut etre des miss). Est ce qu'il utilise la date du
113 * fichier pour eviter de faire des grep inutile ?
114 * the -N can actually return wrong result. cos a file may
115 * contain "peter norvig"
116 * => better to not use -N at first
117 *
118 * - -N also just show the filename on output
119 * - -l show just the filename too, but the files are still searched so
120 * at least no false positives.
121 * - if use -z for glimpseindex, dont forget the -z too for glimpse
122 * - -W for boolean and queries to not be done on line level but file level
123 *
124 * query langage: good;bad for conjunction. good,bad for disjunction.
125 *
126 * ex: glimpse -y -H . -N -W -w pattern;pattern2
127 *
128 *)
129 let glimpse query ?(options=default_glimpse_search) dir =
130 let str_options = s_of_glimpse_options options in
131 let res =
132 Common.cmd_to_list
133 (spf "glimpse -y -H %s -N -W %s '%s'" dir str_options query) in
134 res
135
136 (* grep -i -l -I *)
137 let grep query =
138 raise Todo
139
140
141 (*
142 check_have_position_index
143
144 let glimpseindex_position: string -> ... (filename * int) list
145 let glimpse_position: string -> ... (filename * int) list
146 *)