Commit | Line | Data |
---|---|---|
34e49164 C |
1 | open Common |
2 | ||
3 | (*****************************************************************************) | |
91eba41f | 4 | (* Types *) |
34e49164 C |
5 | (*****************************************************************************) |
6 | (* was first used for LFS, then a little for cocci, and then for aComment *) | |
7 | ||
ae4735db | 8 | type glimpse_search = |
91eba41f C |
9 | (* -i insensitive search *) |
10 | | GlimpseCaseInsensitive | |
ae4735db C |
11 | (* -w match on complete words. But not always good idea, for instance |
12 | * if file contain chazarain_j then dont work with -w | |
91eba41f C |
13 | *) |
14 | | GlimpseWholeWord | |
15 | ||
ae4735db | 16 | let default_glimpse_search = [GlimpseWholeWord] |
91eba41f C |
17 | |
18 | let s_of_glimpse_search = function | |
19 | | GlimpseCaseInsensitive -> "-i" | |
ae4735db | 20 | | GlimpseWholeWord -> "-w" |
91eba41f C |
21 | |
22 | ||
23 | type glimpsedir = Common.dirname | |
24 | ||
25 | (*****************************************************************************) | |
26 | (* Helpers *) | |
27 | (*****************************************************************************) | |
28 | ||
ae4735db C |
29 | let check_have_glimpse () = |
30 | let xs = | |
34e49164 C |
31 | Common.cmd_to_list ("glimpse -V") +> Common.exclude Common.null_string in |
32 | (match xs with | |
33 | | ["This is glimpse version 4.18.2, 2006."] -> () | |
34 | | ["This is glimpse version 4.18.5, 2006."] -> () | |
35 | | _ -> failwith "glimpse not found or bad version" | |
36 | ) | |
37 | ||
ae4735db | 38 | let s_of_glimpse_options xs = |
91eba41f C |
39 | xs +> List.map s_of_glimpse_search +> Common.join " " |
40 | ||
41 | ||
42 | (*****************************************************************************) | |
43 | (* Indexing *) | |
44 | (*****************************************************************************) | |
34e49164 | 45 | |
ae4735db | 46 | (* |
34e49164 | 47 | * note: |
ae4735db C |
48 | * - -o or -b for glimpseindex => bigger index, faster search |
49 | * - no need to use -b with our way to use glimpse | |
50 | * cos we use -l so dont need to know what is the place of the word | |
34e49164 | 51 | * in the file |
ae4735db | 52 | * - -f is for incremental indexing. Handle when files are deleted ? |
34e49164 C |
53 | * I think that not that bad cos yes certainly in the index there will |
54 | * have some no-more-valid pointers, but as glimpse actually then do | |
55 | * a real search on the file, he will see that dont exist anymore and | |
ae4735db | 56 | * so using -f is slower but very very little slower |
34e49164 | 57 | * - for -z the order is important in .glimpse_filters => put |
ae4735db | 58 | * the case of compressed file first |
34e49164 C |
59 | * - -F receive the list of files to index from stdin |
60 | * - -H target index dir | |
91eba41f C |
61 | * - -n for indexing numbers as sometimes some glimpse request are looking |
62 | * for a number | |
ae4735db C |
63 | * |
64 | * | |
65 | * Note que glimpseindex index pas forcement tous les fichiers texte. | |
66 | * Si le fichier texte est trop petit, contient par exemple un seul mot, | |
67 | * alors il l'indexe pas. Si veut indexer quand meme, il faudrait ajouter | |
34e49164 | 68 | * l'option -E |
ae4735db | 69 | * |
34e49164 C |
70 | * command2 "echo '*_backup' > glimpse/.glimpse_exclude"; |
71 | * command2 "echo '*_backup,v' >> glimpse/.glimpse_exclude"; | |
ae4735db C |
72 | * |
73 | * ex: glimpseindex -o -H . home | |
74 | * | |
34e49164 | 75 | *) |
91eba41f C |
76 | let glimpse_cmd s = spf "glimpseindex -o -H %s -n -F" s |
77 | ||
ae4735db | 78 | let glimpseindex ext dir indexdir = |
34e49164 C |
79 | check_have_glimpse (); |
80 | Common.command2(spf "mkdir -p %s" indexdir); | |
ae4735db | 81 | Common.command2 |
91eba41f C |
82 | (spf "find %s -name \"*.%s\" | %s" |
83 | dir ext (glimpse_cmd indexdir) | |
34e49164 C |
84 | ); |
85 | () | |
86 | ||
87 | ||
ae4735db | 88 | let glimpseindex_files files indexdir = |
91eba41f C |
89 | check_have_glimpse (); |
90 | Common.command2(spf "mkdir -p %s" indexdir); | |
708f4980 C |
91 | |
92 | let tmpfile = Common.new_temp_file "glimpse" "list" in | |
93 | (* "/tmp/pad_glimpseindex_files.list" *) | |
ae4735db | 94 | |
708f4980 | 95 | Common.uncat files tmpfile; |
ae4735db | 96 | Common.command2 |
708f4980 | 97 | (spf "cat %s | %s" tmpfile (glimpse_cmd indexdir)); |
91eba41f | 98 | () |
34e49164 | 99 | |
34e49164 | 100 | |
91eba41f C |
101 | (*****************************************************************************) |
102 | (* Searching *) | |
103 | (*****************************************************************************) | |
34e49164 C |
104 | |
105 | ||
106 | (* note: | |
107 | * - -y dont ask for prompt | |
ae4735db | 108 | * - -N allow far faster search as it does not actually search the file |
34e49164 | 109 | * => when pdf/ps files no filtering done of them => far faster. |
ae4735db | 110 | * the -N fait pas un grep, donc si file deteled ou modified entre temps, |
34e49164 C |
111 | * bah il le voit pas. Ca veut dire aussi que si y'a pas -N, et bien |
112 | * glimpse fait des grep si le fichier a ete modifié entre temps pour | |
ae4735db | 113 | * toujours filer quelque chose de valide (pas de false positive, mais |
34e49164 C |
114 | * y'a quand meme peut etre des miss). Est ce qu'il utilise la date du |
115 | * fichier pour eviter de faire des grep inutile ? | |
ae4735db | 116 | * the -N can actually return wrong result. cos a file may |
34e49164 | 117 | * contain "peter norvig" |
ae4735db C |
118 | * => better to not use -N at first |
119 | * | |
34e49164 C |
120 | * - -N also just show the filename on output |
121 | * - -l show just the filename too, but the files are still searched so | |
122 | * at least no false positives. | |
ae4735db | 123 | * - if use -z for glimpseindex, dont forget the -z too for glimpse |
34e49164 | 124 | * - -W for boolean and queries to not be done on line level but file level |
ae4735db | 125 | * |
34e49164 | 126 | * query langage: good;bad for conjunction. good,bad for disjunction. |
ae4735db | 127 | * |
34e49164 | 128 | * ex: glimpse -y -H . -N -W -w pattern;pattern2 |
ae4735db | 129 | * |
34e49164 | 130 | *) |
ae4735db | 131 | let glimpse query ?(options=default_glimpse_search) dir = |
34e49164 | 132 | let str_options = s_of_glimpse_options options in |
ae4735db C |
133 | let res = |
134 | Common.cmd_to_list | |
34e49164 C |
135 | (spf "glimpse -y -H %s -N -W %s '%s'" dir str_options query) in |
136 | res | |
137 | ||
138 | (* grep -i -l -I *) | |
ae4735db | 139 | let grep query = |
34e49164 | 140 | raise Todo |
91eba41f C |
141 | |
142 | ||
143 | (* | |
144 | check_have_position_index | |
145 | ||
146 | let glimpseindex_position: string -> ... (filename * int) list | |
147 | let glimpse_position: string -> ... (filename * int) list | |
148 | *) |