- Try to do better pretty printing when array elements are individually
[bpt/coccinelle.git] / commons / glimpse.ml
CommitLineData
34e49164
C
1open Common
2
3(*****************************************************************************)
91eba41f 4(* Types *)
34e49164
C
5(*****************************************************************************)
6(* was first used for LFS, then a little for cocci, and then for aComment *)
7
ae4735db 8type glimpse_search =
91eba41f
C
9 (* -i insensitive search *)
10 | GlimpseCaseInsensitive
ae4735db
C
11 (* -w match on complete words. But not always good idea, for instance
12 * if file contain chazarain_j then dont work with -w
91eba41f
C
13 *)
14 | GlimpseWholeWord
15
ae4735db 16let default_glimpse_search = [GlimpseWholeWord]
91eba41f
C
17
18let s_of_glimpse_search = function
19 | GlimpseCaseInsensitive -> "-i"
ae4735db 20 | GlimpseWholeWord -> "-w"
91eba41f
C
21
22
23type glimpsedir = Common.dirname
24
25(*****************************************************************************)
26(* Helpers *)
27(*****************************************************************************)
28
ae4735db
C
29let check_have_glimpse () =
30 let xs =
34e49164
C
31 Common.cmd_to_list ("glimpse -V") +> Common.exclude Common.null_string in
32 (match xs with
33 | ["This is glimpse version 4.18.2, 2006."] -> ()
34 | ["This is glimpse version 4.18.5, 2006."] -> ()
35 | _ -> failwith "glimpse not found or bad version"
36 )
37
ae4735db 38let s_of_glimpse_options xs =
91eba41f
C
39 xs +> List.map s_of_glimpse_search +> Common.join " "
40
41
42(*****************************************************************************)
43(* Indexing *)
44(*****************************************************************************)
34e49164 45
ae4735db 46(*
34e49164 47 * note:
ae4735db
C
48 * - -o or -b for glimpseindex => bigger index, faster search
49 * - no need to use -b with our way to use glimpse
50 * cos we use -l so dont need to know what is the place of the word
34e49164 51 * in the file
ae4735db 52 * - -f is for incremental indexing. Handle when files are deleted ?
34e49164
C
53 * I think that not that bad cos yes certainly in the index there will
54 * have some no-more-valid pointers, but as glimpse actually then do
55 * a real search on the file, he will see that dont exist anymore and
ae4735db 56 * so using -f is slower but very very little slower
34e49164 57 * - for -z the order is important in .glimpse_filters => put
ae4735db 58 * the case of compressed file first
34e49164
C
59 * - -F receive the list of files to index from stdin
60 * - -H target index dir
91eba41f
C
61 * - -n for indexing numbers as sometimes some glimpse request are looking
62 * for a number
ae4735db
C
63 *
64 *
65 * Note que glimpseindex index pas forcement tous les fichiers texte.
66 * Si le fichier texte est trop petit, contient par exemple un seul mot,
67 * alors il l'indexe pas. Si veut indexer quand meme, il faudrait ajouter
34e49164 68 * l'option -E
ae4735db 69 *
34e49164
C
70 * command2 "echo '*_backup' > glimpse/.glimpse_exclude";
71 * command2 "echo '*_backup,v' >> glimpse/.glimpse_exclude";
ae4735db
C
72 *
73 * ex: glimpseindex -o -H . home
74 *
34e49164 75 *)
91eba41f
C
76let glimpse_cmd s = spf "glimpseindex -o -H %s -n -F" s
77
ae4735db 78let glimpseindex ext dir indexdir =
34e49164
C
79 check_have_glimpse ();
80 Common.command2(spf "mkdir -p %s" indexdir);
ae4735db 81 Common.command2
91eba41f
C
82 (spf "find %s -name \"*.%s\" | %s"
83 dir ext (glimpse_cmd indexdir)
34e49164
C
84 );
85 ()
86
87
ae4735db 88let glimpseindex_files files indexdir =
91eba41f
C
89 check_have_glimpse ();
90 Common.command2(spf "mkdir -p %s" indexdir);
708f4980
C
91
92 let tmpfile = Common.new_temp_file "glimpse" "list" in
93 (* "/tmp/pad_glimpseindex_files.list" *)
ae4735db 94
708f4980 95 Common.uncat files tmpfile;
ae4735db 96 Common.command2
708f4980 97 (spf "cat %s | %s" tmpfile (glimpse_cmd indexdir));
91eba41f 98 ()
34e49164 99
34e49164 100
91eba41f
C
101(*****************************************************************************)
102(* Searching *)
103(*****************************************************************************)
34e49164
C
104
105
106(* note:
107 * - -y dont ask for prompt
ae4735db 108 * - -N allow far faster search as it does not actually search the file
34e49164 109 * => when pdf/ps files no filtering done of them => far faster.
ae4735db 110 * the -N fait pas un grep, donc si file deteled ou modified entre temps,
34e49164
C
111 * bah il le voit pas. Ca veut dire aussi que si y'a pas -N, et bien
112 * glimpse fait des grep si le fichier a ete modifié entre temps pour
ae4735db 113 * toujours filer quelque chose de valide (pas de false positive, mais
34e49164
C
114 * y'a quand meme peut etre des miss). Est ce qu'il utilise la date du
115 * fichier pour eviter de faire des grep inutile ?
ae4735db 116 * the -N can actually return wrong result. cos a file may
34e49164 117 * contain "peter norvig"
ae4735db
C
118 * => better to not use -N at first
119 *
34e49164
C
120 * - -N also just show the filename on output
121 * - -l show just the filename too, but the files are still searched so
122 * at least no false positives.
ae4735db 123 * - if use -z for glimpseindex, dont forget the -z too for glimpse
34e49164 124 * - -W for boolean and queries to not be done on line level but file level
ae4735db 125 *
34e49164 126 * query langage: good;bad for conjunction. good,bad for disjunction.
ae4735db 127 *
34e49164 128 * ex: glimpse -y -H . -N -W -w pattern;pattern2
ae4735db 129 *
34e49164 130 *)
ae4735db 131let glimpse query ?(options=default_glimpse_search) dir =
34e49164 132 let str_options = s_of_glimpse_options options in
ae4735db
C
133 let res =
134 Common.cmd_to_list
34e49164
C
135 (spf "glimpse -y -H %s -N -W %s '%s'" dir str_options query) in
136 res
137
138(* grep -i -l -I *)
ae4735db 139let grep query =
34e49164 140 raise Todo
91eba41f
C
141
142
143(*
144check_have_position_index
145
146let glimpseindex_position: string -> ... (filename * int) list
147let glimpse_position: string -> ... (filename * int) list
148*)