gnu: Add ugrep.
[jackhill/guix/guix.git] / gnu / packages / search.scm
CommitLineData
98e7fc9b
MW
1;;; GNU Guix --- Functional package management for GNU
2;;; Copyright © 2014 Mark H Weaver <mhw@netris.org>
b9b3440b 3;;; Copyright © 2015, 2016 Eric Bavier <bavier@member.fsf.org>
ff8b5a3a 4;;; Copyright © 2017 Thomas Danckaert <post@thomasdanckaert.be>
423d2136 5;;; Copyright © 2017 Ricardo Wurmus <rekado@elephly.net>
78a9cfa3 6;;; Copyright © 2018, 2020, 2021 Tobias Geerinckx-Rice <me@tobias.gr>
f50bad80 7;;; Copyright © 2018 Adam Massmann <massmannak@gmail.com>
d8aa4444 8;;; Copyright © 2020 Hartmut Goebel <h.goebel@crazy-compilers.com>
98e7fc9b
MW
9;;;
10;;; This file is part of GNU Guix.
11;;;
12;;; GNU Guix is free software; you can redistribute it and/or modify it
13;;; under the terms of the GNU General Public License as published by
14;;; the Free Software Foundation; either version 3 of the License, or (at
15;;; your option) any later version.
16;;;
17;;; GNU Guix is distributed in the hope that it will be useful, but
18;;; WITHOUT ANY WARRANTY; without even the implied warranty of
19;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;;; GNU General Public License for more details.
21;;;
22;;; You should have received a copy of the GNU General Public License
23;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
24
25(define-module (gnu packages search)
26 #:use-module ((guix licenses)
9b8df3e8 27 #:select (gpl2 gpl2+ gpl3+ lgpl2.1+ bsd-3 x11 perl-license))
98e7fc9b
MW
28 #:use-module (guix packages)
29 #:use-module (guix download)
d8aa4444 30 #:use-module (guix git-download)
c3753f38 31 #:use-module (guix utils)
98e7fc9b 32 #:use-module (guix build-system gnu)
9b8df3e8 33 #:use-module (guix build-system perl)
f50bad80 34 #:use-module (guix build-system python)
a2319d2c 35 #:use-module (gnu packages)
98e7fc9b 36 #:use-module (gnu packages compression)
065b7954
EB
37 #:use-module (gnu packages check)
38 #:use-module (gnu packages databases)
e0f415b2 39 #:use-module (gnu packages freedesktop)
d8aa4444 40 #:use-module (gnu packages less)
a2319d2c 41 #:use-module (gnu packages linux)
d8aa4444 42 #:use-module (gnu packages pcre)
a2319d2c 43 #:use-module (gnu packages perl)
f50bad80 44 #:use-module (gnu packages pdf)
423d2136 45 #:use-module (gnu packages python)
f50bad80 46 #:use-module (gnu packages python-web)
44d10b1f 47 #:use-module (gnu packages python-xyz)
9d0c291e 48 #:use-module (gnu packages sphinx)
a2319d2c 49 #:use-module (gnu packages web)
e0f415b2 50 #:use-module (gnu packages xdisorg)
a2319d2c 51 #:use-module (gnu packages xml))
98e7fc9b
MW
52
53(define-public xapian
54 (package
55 (name "xapian")
cbc2c28a 56 (version "1.4.18")
73b3eaf0 57 ;; Note: When updating Xapian, remember to update xapian-bindings below.
98e7fc9b
MW
58 (source (origin
59 (method url-fetch)
6de9dfce 60 (uri (string-append "https://oligarchy.co.uk/xapian/" version
98e7fc9b
MW
61 "/xapian-core-" version ".tar.xz"))
62 (sha256
cbc2c28a 63 (base32 "0xsb4ihf3p767f0zx9p4janwni6r9sg5j6lry0002i8hmnsdnv8r"))))
98e7fc9b
MW
64 (build-system gnu-build-system)
65 (inputs `(("zlib" ,zlib)
bb93042c 66 ("util-linux" ,util-linux "lib")))
98e7fc9b 67 (arguments
16a23d4a
MB
68 `(#:phases
69 (modify-phases %standard-phases
6de9dfce
MB
70 (replace 'check
71 ;; As of Xapian 1.3.3, the TCP server implementation uses
72 ;; getaddrinfo(). This does not work in the build environment,
73 ;; so exclude those tests. See HACKING for the list of targets.
16a23d4a 74 (lambda _
d840f6b9
TGR
75 (invoke "make"
76 "check-inmemory"
77 "check-remoteprog"
78 ;"check-remotetcp"
79 "check-multi"
80 "check-glass"
81 "check-chert"))))))
98e7fc9b
MW
82 (synopsis "Search Engine Library")
83 (description
84 "Xapian is a highly adaptable toolkit which allows developers to easily
85add advanced indexing and search facilities to their own applications. It
86supports the Probabilistic Information Retrieval model and also supports a
87rich set of boolean query operators.")
6de9dfce 88 (home-page "https://xapian.org/")
98e7fc9b
MW
89 (license (list gpl2+ bsd-3 x11))))
90
423d2136
RW
91(define-public python-xapian-bindings
92 (package (inherit xapian)
93 (name "python-xapian-bindings")
94 (version (package-version xapian))
95 (source (origin
96 (method url-fetch)
97 (uri (string-append "https://oligarchy.co.uk/xapian/" version
98 "/xapian-bindings-" version ".tar.xz"))
99 (sha256
100 (base32
cbc2c28a 101 "13ziql8027glgihgvnbsa75vkcn82g83mbihj60zf0njj170clpy"))))
423d2136
RW
102 (build-system gnu-build-system)
103 (arguments
104 `(#:configure-flags '("--with-python3")
105 #:make-flags
106 (list (string-append "pkgpylibdir="
107 (assoc-ref %outputs "out")
c3753f38
MB
108 "/lib/python" ,(version-major+minor
109 (package-version python))
110 "/site-packages/xapian"))))
164d0ad3
MB
111 (native-inputs
112 `(("python-sphinx" ,python-sphinx))) ;for documentation
423d2136
RW
113 (inputs
114 `(("python" ,python)
423d2136
RW
115 ("xapian" ,xapian)
116 ("zlib" ,zlib)))
117 (synopsis "Python bindings for the Xapian search engine library")
118 (license gpl2+)))
119
9b8df3e8
SB
120(define-public perl-search-xapian
121 (package
122 (name "perl-search-xapian")
78a9cfa3 123 (version "1.2.25.4")
9b8df3e8
SB
124 (source
125 (origin
126 (method url-fetch)
127 (uri (string-append "mirror://cpan/authors/id/O/OL/OLLY/"
128 "Search-Xapian-" version ".tar.gz"))
129 (sha256
78a9cfa3 130 (base32 "1pbl8pbgmbs3i8yik4p63g4pd9bhn0dp3d7l667dkvw0kccl66c7"))))
9b8df3e8
SB
131 (build-system perl-build-system)
132 (native-inputs
133 `(("perl-devel-leak" ,perl-devel-leak)))
134 (inputs
135 `(("xapian" ,xapian)))
136 (home-page "https://metacpan.org/release/Search-Xapian")
137 (synopsis "Perl XS frontend to the Xapian C++ search library")
138 (description
139 "Search::Xapian wraps most methods of most Xapian classes. The missing
140classes and methods should be added in the future. It also provides a
141simplified, more 'perlish' interface to some common operations.")
142 (license perl-license)))
143
065b7954
EB
144(define-public libtocc
145 (package
146 (name "libtocc")
147 (version "1.0.1")
148 (source
149 (origin
150 (method url-fetch)
151 (uri (string-append "https://github.com/aidin36/tocc/releases/download/"
152 "v" version "/tocc-" version ".tar.gz"))
153 (sha256
154 (base32
155 "1kd2jd74m8ksc8s7hh0haz0q0c3n0mr39bbky262kk4l58f1g068"))))
156 (build-system gnu-build-system)
157 (native-inputs `(("catch" ,catch-framework)))
158 (inputs `(("unqlite" ,unqlite)))
159 (arguments
160 `(#:phases (modify-phases %standard-phases
9947dcd7
TGR
161 (add-before 'configure 'chdir-source
162 (lambda _
163 (chdir "libtocc/src")
164 #t))
165 (replace 'check
166 (lambda _
167 (with-directory-excursion "../tests"
168 (invoke "./configure"
169 (string-append "CONFIG_SHELL="
170 (which "sh"))
171 (string-append "SHELL="
172 (which "sh"))
173 "CPPFLAGS=-I../src"
174 (string-append
175 "LDFLAGS=-L../src/.libs "
176 "-Wl,-rpath=../src/.libs"))
177 (invoke "make")
178 (invoke "./libtocctests")))))))
cc7a4bbd 179 (home-page "https://t-o-c-c.com/")
065b7954
EB
180 (synopsis "Tool for Obsessive Compulsive Classifiers")
181 (description
182 "libtocc is the engine of the Tocc project, a tag-based file management
183system. The goal of Tocc is to provide a better system for classifying files
184that is more flexible than classic file systems that are based on a tree of
185files and directories.")
186 (license gpl3+)))
187
188(define-public tocc
189 (package
190 (name "tocc")
191 (version (package-version libtocc))
192 (source (package-source libtocc))
193 (build-system gnu-build-system)
194 (inputs
195 `(("libtocc" ,libtocc)
196 ("unqlite" ,unqlite)))
197 (arguments
198 `(#:tests? #f ;No tests
199 #:phases (modify-phases %standard-phases
200 (add-after
f8503e2b 201 'unpack 'chdir-source
065b7954 202 (lambda _ (chdir "cli/src"))))))
cc7a4bbd 203 (home-page "https://t-o-c-c.com/")
065b7954
EB
204 (synopsis "Command-line interface to libtocc")
205 (description
206 "Tocc is a tag-based file management system. This package contains the
207command line tool for interacting with libtocc.")
208 (license gpl3+)))
209
6ad2e17e
EB
210(define-public bool
211 (package
212 (name "bool")
213 (version "0.2.2")
214 (source
215 (origin
216 (method url-fetch)
217 (uri (string-append "mirror://gnu/bool/bool-"
218 version ".tar.xz"))
219 (sha256
220 (base32
221 "1frdmgrmb509fxbdpsxxw3lvvwv7xm1pavqrqgm4jg698iix6xfw"))))
222 (build-system gnu-build-system)
6fd52309 223 (home-page "https://www.gnu.org/software/bool/")
04bdcdb6 224 (synopsis "Finding text and HTML files that match boolean expressions")
6ad2e17e 225 (description
04bdcdb6
LC
226 "GNU Bool is a utility to perform text searches on files using Boolean
227expressions. For example, a search for \"hello AND world\" would return a
228file containing the phrase \"Hello, world!\". It supports both AND and OR
229statements, as well as the NEAR statement to search for the occurrence of
230words in close proximity to each other. It handles context gracefully,
231accounting for new lines and paragraph changes. It also has robust support
232for parsing HTML files.")
6ad2e17e
EB
233 (license gpl3+)))
234
ff8b5a3a
TD
235(define-public hyperestraier
236 (package
237 (name "hyperestraier")
238 (version "1.4.13")
239 (source
240 (origin
241 (method url-fetch)
242 (uri (string-append "http://fallabs.com/" name "/"
243 name "-" version ".tar.gz"))
244 (sha256
245 (base32
246 "1qk3pxgzyrpcz5qfyd5xs2hw9q1cbb7j5zd4kp1diq501wcj2vs9"))))
247 (inputs
248 `(("qdbm" ,qdbm)
249 ("zlib" ,zlib)))
250 (build-system gnu-build-system)
251 (arguments
252 `(#:configure-flags (list (string-append "LDFLAGS=-Wl,-rpath="
253 (assoc-ref %outputs "out")
254 "/lib"))))
71441a38 255 (home-page "https://fallabs.com/hyperestraier")
ff8b5a3a
TD
256 (synopsis "Full-text search system")
257 (description "Hyper Estraier can be used to integrate full-text
258search into applications, using either the provided command line and CGI
259interfaces, or a C API.")
260 (license lgpl2.1+)))
261
b9b3440b
EB
262(define-public mlocate
263 (package
264 (name "mlocate")
265 (version "0.26")
266 (source (origin
267 (method url-fetch)
dca96ad3
LC
268 (uri (string-append "http://releases.pagure.org/mlocate/"
269 "mlocate-" version ".tar.xz"))
b9b3440b
EB
270 (sha256
271 (base32
272 "0gi6y52gkakhhlnzy0p6izc36nqhyfx5830qirhvk3qrzrwxyqrh"))))
273 (build-system gnu-build-system)
dca96ad3 274 (home-page "https://pagure.io/mlocate")
8f65585b 275 (synopsis "Locate files on the file system")
b9b3440b 276 (description
36a4366d 277 "mlocate is a locate/updatedb implementation. The @code{m} stands for
b9b3440b
EB
278\"merging\": @code{updatedb} reuses the existing database to avoid rereading
279most of the file system, which makes it faster and does not trash the system
280caches as much. The locate(1) utility is intended to be completely compatible
281with slocate, and attempts to be compatible to GNU locate when it does not
282conflict with slocate compatibility.")
283 (license gpl2)))
284
a2319d2c
EB
285(define-public swish-e
286 (package
287 (name "swish-e")
288 (version "2.4.7")
289 (source (origin
290 (method url-fetch)
8a5a50a7
EB
291 (uri (list (string-append
292 "https://web.archive.org/web/20160730145202/"
293 "http://swish-e.org/distribution/"
294 "swish-e-" version ".tar.gz")
a2319d2c
EB
295 (string-append "http://http.debian.net/debian/pool/"
296 "main/s/swish-e/swish-e_" version
297 ".orig.tar.gz")))
298 (file-name (string-append name "-" version ".tar.gz"))
299 (sha256
300 (base32
301 "0qkrk7z25yp9hynj21vxkyn7yi8gcagcfxnass5cgczcz0gm9pax"))
302 (patches (search-patches "swish-e-search.patch"
303 "swish-e-format-security.patch"))))
304 (build-system gnu-build-system)
305 ;; Several other packages and perl modules may be installed alongside
306 ;; swish-e to extend its features at runtime, but are not required for
307 ;; building: xpdf, catdoc, MP3::Tag, Spreadsheet::ParseExcel,
308 ;; HTML::Entities.
309 (inputs
7bd65a63 310 `(("perl" ,perl)
a2319d2c
EB
311 ("perl-uri" ,perl-uri)
312 ("perl-html-parser" ,perl-html-parser)
313 ("perl-html-tagset" ,perl-html-tagset)
314 ("perl-mime-types" ,perl-mime-types)))
315 (arguments
7bd65a63
EJ
316 `(;; XXX: This fails to build with zlib (API mismatch) and tests fail
317 ;; with libxml2, so disable both.
318 #:configure-flags (list (string-append "--without-zlib")
319 (string-append "--without-libxml2"))
320 #:phases (modify-phases %standard-phases
a2319d2c
EB
321 (add-after 'install 'wrap-programs
322 (lambda* (#:key inputs outputs #:allow-other-keys)
323 (let* ((out (assoc-ref outputs "out")))
324 (for-each
325 (lambda (program)
326 (wrap-program program
327 `("PERL5LIB" ":" prefix
328 ,(map (lambda (i)
329 (string-append (assoc-ref inputs i)
330 "/lib/perl5/site_perl"))
331 ;; These perl modules have no propagated
332 ;; inputs, so no further analysis needed.
333 '("perl-uri"
334 "perl-html-parser"
335 "perl-html-tagset"
336 "perl-mime-types")))))
337 (list (string-append out "/lib/swish-e/swishspider")
338 (string-append out "/bin/swish-filter-test")))
339 #t))))))
8a5a50a7
EB
340 (home-page (string-append "https://web.archive.org/web/20160730145202/"
341 "http://swish-e.org"))
a2319d2c
EB
342 (synopsis "Web indexing system")
343 (description
344 "Swish-e is Simple Web Indexing System for Humans - Enhanced. Swish-e
345can quickly and easily index directories of files or remote web sites and
346search the generated indexes.")
347 (license gpl2+))) ;with exception
348
f50bad80
AM
349(define-public xapers
350 (package
351 (name "xapers")
352 (version "0.8.2")
353 (source
354 (origin
355 (method url-fetch)
356 (uri (string-append
357 "https://finestructure.net/xapers/releases/xapers-"
358 version ".tar.gz"))
359 (sha256
360 (base32
361 "0ykz6hn3qj46w3c99d6q0pi5ncq2894simcl7vapv047zm3cylmd"))))
362 (build-system python-build-system)
363 (propagated-inputs
f50bad80 364 `(("poppler" ,poppler)
e0f415b2
AM
365 ("python-urwid" ,python-urwid)
366 ("xclip" ,xclip)
367 ("xdg-utils" ,xdg-utils)))
368 (inputs
369 `(("python-latexcodec" ,python-latexcodec)
f50bad80
AM
370 ("python-pybtex" ,python-pybtex)
371 ("python-pycurl" ,python-pycurl)
372 ("python-pyyaml" ,python-pyyaml)
373 ("python-six" ,python-six)
374 ("python-xapian-bindings" ,python-xapian-bindings)))
375 (arguments
e0f415b2
AM
376 `(#:modules ((ice-9 rdelim)
377 (guix build python-build-system)
378 (guix build utils))
379 #:phases
f50bad80
AM
380 (modify-phases %standard-phases
381 (add-after 'install 'install-doc
382 (lambda* (#:key inputs outputs #:allow-other-keys)
e0f415b2
AM
383 (define (purge-term-support input output)
384 (let loop ((line (read-line input)))
385 (if (string-prefix? "if [[ \"$term\"" line)
386 (begin (display "eval \"$cmd\"\n" output)
387 #t)
388 (begin (display (string-append line "\n") output)
389 (loop (read-line input))))))
f50bad80
AM
390 (let* ((out (assoc-ref outputs "out"))
391 (bin (string-append out "/bin"))
e0f415b2 392 (adder-out (string-append bin "/xapers-adder"))
f50bad80 393 (man1 (string-append out "/share/man/man1")))
b357ccdd
AM
394 (install-file "man/man1/xapers.1" man1)
395 (install-file "man/man1/xapers-adder.1" man1)
e0f415b2
AM
396 ;; below is equivalent to setting --no-term option
397 ;; permanently on; this is desirable to avoid imposing
398 ;; an x-terminal installation on the user but breaks
399 ;; some potential xapers-adder uses like auto browser
400 ;; pdf handler, but user could instead still use
401 ;; e.g. "xterm -e xapers-adder %F" for same use.
402 ;; alternatively we could propagate xterm as an input
403 ;; and replace 'x-terminal-emulator' with 'xterm'
404 (call-with-input-file "bin/xapers-adder"
405 (lambda (input)
406 (call-with-output-file adder-out
407 (lambda (output)
408 (purge-term-support input output)))))
409 (chmod adder-out #o555)))))))
f50bad80
AM
410 (home-page "https://finestructure.net/xapers/")
411 (synopsis "Personal document indexing system")
412 (description
413 "Xapers is a personal document indexing system,
414geared towards academic journal articles build on the Xapian search engine.
415Think of it as your own personal document search engine, or a local cache of
416online libraries. It provides fast search of document text and
417bibliographic data and simple document and bibtex retrieval.")
418 (license gpl3+)))
419
d8aa4444
HG
420(define-public ugrep
421 (package
422 (name "ugrep")
423 (version "3.1.4")
424 (source (origin
425 (method git-fetch)
426 (uri (git-reference
427 (url "https://github.com/Genivia/ugrep")
428 (commit (string-append "v" version))))
429 (sha256
430 (base32 "1ydnpdhn1mp2pnbqzvwabrp573626k89kbv97fax6y1bz2pamrg4"))
431 (file-name (string-append name "-" version "-checkout"))
432 (modules '((guix build utils)))
433 (snippet
434 '(begin
435 (delete-file-recursively "bin") ;; pre-build executables
436 (for-each delete-file (find-files "tests" "^archive\\..*"))
437 (for-each delete-file (find-files "tests" "^.*\\.pdf$"))
438 (for-each delete-file (find-files "tests" "^.*\\.class$"))
439 #t))))
440 (build-system gnu-build-system)
441 (inputs
442 `(("bzip2" ,bzip2)
443 ("less" ,less)
444 ("lz4" ,lz4)
445 ("lzip" ,lzip) ;; lzma
446 ("pcre2" ,pcre2)
447 ("zlib" ,zlib)))
448 (arguments
449 `(#:tests? #f ;; No script for re-building the binary test input-files
450 #:test-target "test"
451 #:phases
452 (modify-phases %standard-phases
453 (add-before 'check 'check-setup
454 (lambda _
455 ;; unpatch shepengs in tests
456 (substitute* '("tests/Hello.bat"
457 "tests/Hello.sh")
458 (("#!/gnu/store/.*/bin/sh") "#!/bin/sh")))))))
459 (home-page "https://github.com/Genivia/ugrep/")
460 (synopsis "Faster grep with an interactive query UI")
461 (description "Ugrep is a ultra fast searcher of file systems, text
462and binary files, source code, archives, compressed files, documents, and
463more.
464
465While still being compatible with the standard GNU/BSD grep command-line
466options, ugrep supports fuzzy search as well as structured and (adjustable)
467colored output, piped through \"less\" for pagination. An interactive query
468UI allows refinement and has a built-in help (press F1). Ugrep implements
469multi-threaded and other techniques to speed up search, pattern-matching and
470decompression. Many pre-defined regexps ease searching e.g. C typdefs or XML
471attributes. Results can be output in several structured or self-defined
472formats.")
473 (license bsd-3)))
474
98e7fc9b 475;;; search.scm ends here