Release coccinelle-0.1.8
[bpt/coccinelle.git] / commons / glimpse.ml
1 open Common
2
3 (*****************************************************************************)
4 (* Types *)
5 (*****************************************************************************)
6 (* was first used for LFS, then a little for cocci, and then for aComment *)
7
8 type glimpse_search =
9 (* -i insensitive search *)
10 | GlimpseCaseInsensitive
11 (* -w match on complete words. But not always good idea, for instance
12 * if file contain chazarain_j then dont work with -w
13 *)
14 | GlimpseWholeWord
15
16 let default_glimpse_search = [GlimpseWholeWord]
17
18 let s_of_glimpse_search = function
19 | GlimpseCaseInsensitive -> "-i"
20 | GlimpseWholeWord -> "-w"
21
22
23 type glimpsedir = Common.dirname
24
25 (*****************************************************************************)
26 (* Helpers *)
27 (*****************************************************************************)
28
29 let check_have_glimpse () =
30 let xs =
31 Common.cmd_to_list ("glimpse -V") +> Common.exclude Common.null_string in
32 (match xs with
33 | ["This is glimpse version 4.18.2, 2006."] -> ()
34 | ["This is glimpse version 4.18.5, 2006."] -> ()
35 | _ -> failwith "glimpse not found or bad version"
36 )
37
38 let s_of_glimpse_options xs =
39 xs +> List.map s_of_glimpse_search +> Common.join " "
40
41
42 (*****************************************************************************)
43 (* Indexing *)
44 (*****************************************************************************)
45
46 (*
47 * note:
48 * - -o or -b for glimpseindex => bigger index, faster search
49 * - no need to use -b with our way to use glimpse
50 * cos we use -l so dont need to know what is the place of the word
51 * in the file
52 * - -f is for incremental indexing. Handle when files are deleted ?
53 * I think that not that bad cos yes certainly in the index there will
54 * have some no-more-valid pointers, but as glimpse actually then do
55 * a real search on the file, he will see that dont exist anymore and
56 * so using -f is slower but very very little slower
57 * - for -z the order is important in .glimpse_filters => put
58 * the case of compressed file first
59 * - -F receive the list of files to index from stdin
60 * - -H target index dir
61 * - -n for indexing numbers as sometimes some glimpse request are looking
62 * for a number
63 *
64 *
65 * Note que glimpseindex index pas forcement tous les fichiers texte.
66 * Si le fichier texte est trop petit, contient par exemple un seul mot,
67 * alors il l'indexe pas. Si veut indexer quand meme, il faudrait ajouter
68 * l'option -E
69 *
70 * command2 "echo '*_backup' > glimpse/.glimpse_exclude";
71 * command2 "echo '*_backup,v' >> glimpse/.glimpse_exclude";
72 *
73 * ex: glimpseindex -o -H . home
74 *
75 *)
76 let glimpse_cmd s = spf "glimpseindex -o -H %s -n -F" s
77
78 let glimpseindex ext dir indexdir =
79 check_have_glimpse ();
80 Common.command2(spf "mkdir -p %s" indexdir);
81 Common.command2
82 (spf "find %s -name \"*.%s\" | %s"
83 dir ext (glimpse_cmd indexdir)
84 );
85 ()
86
87
88 let glimpseindex_files files indexdir =
89 check_have_glimpse ();
90 Common.command2(spf "mkdir -p %s" indexdir);
91
92 let tmpfile = Common.new_temp_file "glimpse" "list" in
93 (* "/tmp/pad_glimpseindex_files.list" *)
94
95 Common.uncat files tmpfile;
96 Common.command2
97 (spf "cat %s | %s" tmpfile (glimpse_cmd indexdir));
98 ()
99
100
101 (*****************************************************************************)
102 (* Searching *)
103 (*****************************************************************************)
104
105
106 (* note:
107 * - -y dont ask for prompt
108 * - -N allow far faster search as it does not actually search the file
109 * => when pdf/ps files no filtering done of them => far faster.
110 * the -N fait pas un grep, donc si file deteled ou modified entre temps,
111 * bah il le voit pas. Ca veut dire aussi que si y'a pas -N, et bien
112 * glimpse fait des grep si le fichier a ete modifié entre temps pour
113 * toujours filer quelque chose de valide (pas de false positive, mais
114 * y'a quand meme peut etre des miss). Est ce qu'il utilise la date du
115 * fichier pour eviter de faire des grep inutile ?
116 * the -N can actually return wrong result. cos a file may
117 * contain "peter norvig"
118 * => better to not use -N at first
119 *
120 * - -N also just show the filename on output
121 * - -l show just the filename too, but the files are still searched so
122 * at least no false positives.
123 * - if use -z for glimpseindex, dont forget the -z too for glimpse
124 * - -W for boolean and queries to not be done on line level but file level
125 *
126 * query langage: good;bad for conjunction. good,bad for disjunction.
127 *
128 * ex: glimpse -y -H . -N -W -w pattern;pattern2
129 *
130 *)
131 let glimpse query ?(options=default_glimpse_search) dir =
132 let str_options = s_of_glimpse_options options in
133 let res =
134 Common.cmd_to_list
135 (spf "glimpse -y -H %s -N -W %s '%s'" dir str_options query) in
136 res
137
138 (* grep -i -l -I *)
139 let grep query =
140 raise Todo
141
142
143 (*
144 check_have_position_index
145
146 let glimpseindex_position: string -> ... (filename * int) list
147 let glimpse_position: string -> ... (filename * int) list
148 *)