gnu: Properly credit Konrad Hinsen.
[jackhill/guix/guix.git] / guix / man-db.scm
1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2017, 2018 Ludovic Courtès <ludo@gnu.org>
3 ;;;
4 ;;; This file is part of GNU Guix.
5 ;;;
6 ;;; GNU Guix is free software; you can redistribute it and/or modify it
7 ;;; under the terms of the GNU General Public License as published by
8 ;;; the Free Software Foundation; either version 3 of the License, or (at
9 ;;; your option) any later version.
10 ;;;
11 ;;; GNU Guix is distributed in the hope that it will be useful, but
12 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;;; GNU General Public License for more details.
15 ;;;
16 ;;; You should have received a copy of the GNU General Public License
17 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
18
19 (define-module (guix man-db)
20 #:use-module (zlib)
21 #:use-module ((guix build utils) #:select (find-files))
22 #:use-module (gdbm) ;gdbm-ffi
23 #:use-module (srfi srfi-9)
24 #:use-module (srfi srfi-26)
25 #:use-module (ice-9 match)
26 #:use-module (ice-9 rdelim)
27 #:use-module (ice-9 regex)
28 #:export (mandb-entry?
29 mandb-entry-file-name
30 mandb-entry-name
31 mandb-entry-section
32 mandb-entry-synopsis
33 mandb-entry-kind
34
35 mandb-entries
36 write-mandb-database))
37
38 ;;; Comment:
39 ;;;
40 ;;; Scan gzipped man pages and create a man-db database. The database is
41 ;;; meant to be used by 'man -k KEYWORD'.
42 ;;;
43 ;;; The implementation here aims to be simpler than that of 'man-db', and to
44 ;;; produce deterministic output. See <https://bugs.gnu.org/29654>.
45 ;;;
46 ;;; Code:
47
48 (define-record-type <mandb-entry>
49 (mandb-entry file-name name section synopsis kind)
50 mandb-entry?
51 (file-name mandb-entry-file-name) ;e.g., "../abiword.1.gz"
52 (name mandb-entry-name) ;e.g., "ABIWORD"
53 (section mandb-entry-section) ;number
54 (synopsis mandb-entry-synopsis) ;string
55 (kind mandb-entry-kind)) ;'ultimate | 'link
56
57 (define (mandb-entry<? entry1 entry2)
58 (match entry1
59 (($ <mandb-entry> file1 name1 section1)
60 (match entry2
61 (($ <mandb-entry> file2 name2 section2)
62 (or (< section1 section2)
63 (string<? (basename file1) (basename file2))))))))
64
65 (define abbreviate-file-name
66 (let ((man-file-rx (make-regexp "(.+)\\.[0-9][a-z]?(\\.gz)?$")))
67 (lambda (file)
68 (match (regexp-exec man-file-rx (basename file))
69 (#f
70 (basename file))
71 (matches
72 (match:substring matches 1))))))
73
74 (define (entry->string entry)
75 "Return the wire format for ENTRY as a string."
76 (match entry
77 (($ <mandb-entry> file name section synopsis kind)
78 ;; See db_store.c:make_content in man-db for the format.
79 (string-append (abbreviate-file-name file) "\t"
80 (number->string section) "\t"
81 (number->string section)
82
83 ;; Timestamp that we always set to the epoch.
84 "\t0\t0"
85
86 ;; See "db_storage.h" in man-db for the different kinds.
87 "\t"
88 (case kind
89 ((ultimate) "A") ;ultimate man page
90 ((link) "B") ;".so" link to other man page
91 (else "A")) ;something that doesn't matter much
92
93 "\t-\t-\t"
94
95 (if (string-suffix? ".gz" file) "gz" "")
96 "\t"
97
98 synopsis "\x00"))))
99
100 ;; The man-db schema version we're compatible with.
101 (define %version-key "$version$\x00")
102 (define %version-value "2.5.0\x00")
103
104 (define (write-mandb-database file entries)
105 "Write ENTRIES to FILE as a man-db database. FILE is usually
106 \".../index.db\", and is a GDBM database."
107 (let ((db (gdbm-open file GDBM_WRCREAT)))
108 (gdbm-set! db %version-key %version-value)
109
110 ;; Write ENTRIES in sorted order so we get deterministic output.
111 (for-each (lambda (entry)
112 (gdbm-set! db
113 (string-append (mandb-entry-file-name entry)
114 "\x00")
115 (entry->string entry)))
116 (sort entries mandb-entry<?))
117 (gdbm-close db)))
118
119 (define (read-synopsis port)
120 "Read from PORT a man page synopsis."
121 (define (section? line)
122 ;; True if LINE starts with ".SH", ".PP", or so.
123 (string-prefix? "." (string-trim line)))
124
125 (define (extract-synopsis str)
126 (match (string-contains str "\\-")
127 (#f "")
128 (index
129 (string-map (match-lambda
130 (#\newline #\space)
131 (chr chr))
132 (string-trim-both (string-drop str (+ 2 index)))))))
133
134 ;; Synopses look like "Command \- Do something.", possibly spanning several
135 ;; lines.
136 (let loop ((lines '()))
137 (match (read-line port 'concat)
138 ((? eof-object?)
139 (extract-synopsis (string-concatenate-reverse lines)))
140 ((? section?)
141 (extract-synopsis (string-concatenate-reverse lines)))
142 (line
143 (loop (cons line lines))))))
144
145 (define* (man-page->entry file #:optional (resolve identity))
146 "Parse FILE, a gzipped man page, and return a <mandb-entry> for it."
147 (define (string->number* str)
148 (if (and (string-prefix? "\"" str)
149 (> (string-length str) 1)
150 (string-suffix? "\"" str))
151 (string->number (string-drop (string-drop-right str 1) 1))
152 (string->number str)))
153
154 ;; Note: This works for both gzipped and uncompressed files.
155 (call-with-gzip-input-port (open-file file "r0")
156 (lambda (port)
157 (let loop ((name #f)
158 (section #f)
159 (synopsis #f)
160 (kind 'ultimate))
161 (if (and name section synopsis)
162 (mandb-entry file name section synopsis kind)
163 (let ((line (read-line port)))
164 (if (eof-object? line)
165 (mandb-entry file name (or section 0) (or synopsis "")
166 kind)
167 (match (string-tokenize line)
168 ((".TH" name (= string->number* section) _ ...)
169 (loop name section synopsis kind))
170 ((".SH" (or "NAME" "\"NAME\""))
171 (loop name section (read-synopsis port) kind))
172 ((".so" link)
173 (match (and=> (resolve link)
174 (cut man-page->entry <> resolve))
175 (#f
176 (loop name section synopsis 'link))
177 (alias
178 (mandb-entry file
179 (mandb-entry-name alias)
180 (mandb-entry-section alias)
181 (mandb-entry-synopsis alias)
182 'link))))
183 (_
184 (loop name section synopsis kind))))))))))
185
186 (define (man-files directory)
187 "Return the list of man pages found under DIRECTORY, recursively."
188 ;; Filter the list to ensure that broken symlinks are excluded.
189 (filter file-exists? (find-files directory "\\.[0-9][a-z]?(\\.gz)?$")))
190
191 (define (mandb-entries directory)
192 "Return mandb entries for the man pages found under DIRECTORY, recursively."
193 (map (lambda (file)
194 (man-page->entry file
195 (lambda (link)
196 (let ((file (string-append directory "/" link
197 ".gz")))
198 (and (file-exists? file) file)))))
199 (man-files directory)))