| 1 | ;;; GNU Guix --- Functional package management for GNU |
| 2 | ;;; Copyright © 2017, 2018 Ludovic Courtès <ludo@gnu.org> |
| 3 | ;;; |
| 4 | ;;; This file is part of GNU Guix. |
| 5 | ;;; |
| 6 | ;;; GNU Guix is free software; you can redistribute it and/or modify it |
| 7 | ;;; under the terms of the GNU General Public License as published by |
| 8 | ;;; the Free Software Foundation; either version 3 of the License, or (at |
| 9 | ;;; your option) any later version. |
| 10 | ;;; |
| 11 | ;;; GNU Guix is distributed in the hope that it will be useful, but |
| 12 | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | ;;; GNU General Public License for more details. |
| 15 | ;;; |
| 16 | ;;; You should have received a copy of the GNU General Public License |
| 17 | ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. |
| 18 | |
| 19 | (define-module (guix man-db) |
| 20 | #:use-module (guix zlib) |
| 21 | #:use-module ((guix build utils) #:select (find-files)) |
| 22 | #:use-module (gdbm) ;gdbm-ffi |
| 23 | #:use-module (srfi srfi-9) |
| 24 | #:use-module (srfi srfi-26) |
| 25 | #:use-module (ice-9 match) |
| 26 | #:use-module (ice-9 rdelim) |
| 27 | #:use-module (ice-9 regex) |
| 28 | #:export (mandb-entry? |
| 29 | mandb-entry-file-name |
| 30 | mandb-entry-name |
| 31 | mandb-entry-section |
| 32 | mandb-entry-synopsis |
| 33 | mandb-entry-kind |
| 34 | |
| 35 | mandb-entries |
| 36 | write-mandb-database)) |
| 37 | |
| 38 | ;;; Comment: |
| 39 | ;;; |
| 40 | ;;; Scan gzipped man pages and create a man-db database. The database is |
| 41 | ;;; meant to be used by 'man -k KEYWORD'. |
| 42 | ;;; |
| 43 | ;;; The implementation here aims to be simpler than that of 'man-db', and to |
| 44 | ;;; produce deterministic output. See <https://bugs.gnu.org/29654>. |
| 45 | ;;; |
| 46 | ;;; Code: |
| 47 | |
| 48 | (define-record-type <mandb-entry> |
| 49 | (mandb-entry file-name name section synopsis kind) |
| 50 | mandb-entry? |
| 51 | (file-name mandb-entry-file-name) ;e.g., "../abiword.1.gz" |
| 52 | (name mandb-entry-name) ;e.g., "ABIWORD" |
| 53 | (section mandb-entry-section) ;number |
| 54 | (synopsis mandb-entry-synopsis) ;string |
| 55 | (kind mandb-entry-kind)) ;'ultimate | 'link |
| 56 | |
| 57 | (define (mandb-entry<? entry1 entry2) |
| 58 | (match entry1 |
| 59 | (($ <mandb-entry> file1 name1 section1) |
| 60 | (match entry2 |
| 61 | (($ <mandb-entry> file2 name2 section2) |
| 62 | (or (< section1 section2) |
| 63 | (string<? (basename file1) (basename file2)))))))) |
| 64 | |
| 65 | (define abbreviate-file-name |
| 66 | (let ((man-file-rx (make-regexp "(.+)\\.[0-9][a-z]?(\\.gz)?$"))) |
| 67 | (lambda (file) |
| 68 | (match (regexp-exec man-file-rx (basename file)) |
| 69 | (#f |
| 70 | (basename file)) |
| 71 | (matches |
| 72 | (match:substring matches 1)))))) |
| 73 | |
| 74 | (define (entry->string entry) |
| 75 | "Return the wire format for ENTRY as a string." |
| 76 | (match entry |
| 77 | (($ <mandb-entry> file name section synopsis kind) |
| 78 | ;; See db_store.c:make_content in man-db for the format. |
| 79 | (string-append (abbreviate-file-name file) "\t" |
| 80 | (number->string section) "\t" |
| 81 | (number->string section) |
| 82 | |
| 83 | ;; Timestamp that we always set to the epoch. |
| 84 | "\t0\t0" |
| 85 | |
| 86 | ;; See "db_storage.h" in man-db for the different kinds. |
| 87 | "\t" |
| 88 | (case kind |
| 89 | ((ultimate) "A") ;ultimate man page |
| 90 | ((link) "B") ;".so" link to other man page |
| 91 | (else "A")) ;something that doesn't matter much |
| 92 | |
| 93 | "\t-\t-\t" |
| 94 | |
| 95 | (if (string-suffix? ".gz" file) "gz" "") |
| 96 | "\t" |
| 97 | |
| 98 | synopsis "\x00")))) |
| 99 | |
| 100 | ;; The man-db schema version we're compatible with. |
| 101 | (define %version-key "$version$\x00") |
| 102 | (define %version-value "2.5.0\x00") |
| 103 | |
| 104 | (define (write-mandb-database file entries) |
| 105 | "Write ENTRIES to FILE as a man-db database. FILE is usually |
| 106 | \".../index.db\", and is a GDBM database." |
| 107 | (let ((db (gdbm-open file GDBM_WRCREAT))) |
| 108 | (gdbm-set! db %version-key %version-value) |
| 109 | |
| 110 | ;; Write ENTRIES in sorted order so we get deterministic output. |
| 111 | (for-each (lambda (entry) |
| 112 | (gdbm-set! db |
| 113 | (string-append (mandb-entry-file-name entry) |
| 114 | "\x00") |
| 115 | (entry->string entry))) |
| 116 | (sort entries mandb-entry<?)) |
| 117 | (gdbm-close db))) |
| 118 | |
| 119 | (define (read-synopsis port) |
| 120 | "Read from PORT a man page synopsis." |
| 121 | (define (section? line) |
| 122 | ;; True if LINE starts with ".SH", ".PP", or so. |
| 123 | (string-prefix? "." (string-trim line))) |
| 124 | |
| 125 | (define (extract-synopsis str) |
| 126 | (match (string-contains str "\\-") |
| 127 | (#f "") |
| 128 | (index |
| 129 | (string-map (match-lambda |
| 130 | (#\newline #\space) |
| 131 | (chr chr)) |
| 132 | (string-trim-both (string-drop str (+ 2 index))))))) |
| 133 | |
| 134 | ;; Synopses look like "Command \- Do something.", possibly spanning several |
| 135 | ;; lines. |
| 136 | (let loop ((lines '())) |
| 137 | (match (read-line port 'concat) |
| 138 | ((? eof-object?) |
| 139 | (extract-synopsis (string-concatenate-reverse lines))) |
| 140 | ((? section?) |
| 141 | (extract-synopsis (string-concatenate-reverse lines))) |
| 142 | (line |
| 143 | (loop (cons line lines)))))) |
| 144 | |
| 145 | (define* (man-page->entry file #:optional (resolve identity)) |
| 146 | "Parse FILE, a gzipped man page, and return a <mandb-entry> for it." |
| 147 | (define (string->number* str) |
| 148 | (if (and (string-prefix? "\"" str) |
| 149 | (> (string-length str) 1) |
| 150 | (string-suffix? "\"" str)) |
| 151 | (string->number (string-drop (string-drop-right str 1) 1)) |
| 152 | (string->number str))) |
| 153 | |
| 154 | ;; Note: This works for both gzipped and uncompressed files. |
| 155 | (call-with-gzip-input-port (open-file file "r0") |
| 156 | (lambda (port) |
| 157 | (let loop ((name #f) |
| 158 | (section #f) |
| 159 | (synopsis #f) |
| 160 | (kind 'ultimate)) |
| 161 | (if (and name section synopsis) |
| 162 | (mandb-entry file name section synopsis kind) |
| 163 | (let ((line (read-line port))) |
| 164 | (if (eof-object? line) |
| 165 | (mandb-entry file name (or section 0) (or synopsis "") |
| 166 | kind) |
| 167 | (match (string-tokenize line) |
| 168 | ((".TH" name (= string->number* section) _ ...) |
| 169 | (loop name section synopsis kind)) |
| 170 | ((".SH" (or "NAME" "\"NAME\"")) |
| 171 | (loop name section (read-synopsis port) kind)) |
| 172 | ((".so" link) |
| 173 | (match (and=> (resolve link) |
| 174 | (cut man-page->entry <> resolve)) |
| 175 | (#f |
| 176 | (loop name section synopsis 'link)) |
| 177 | (alias |
| 178 | (mandb-entry file |
| 179 | (mandb-entry-name alias) |
| 180 | (mandb-entry-section alias) |
| 181 | (mandb-entry-synopsis alias) |
| 182 | 'link)))) |
| 183 | (_ |
| 184 | (loop name section synopsis kind)))))))))) |
| 185 | |
| 186 | (define (man-files directory) |
| 187 | "Return the list of man pages found under DIRECTORY, recursively." |
| 188 | ;; Filter the list to ensure that broken symlinks are excluded. |
| 189 | (filter file-exists? (find-files directory "\\.[0-9][a-z]?(\\.gz)?$"))) |
| 190 | |
| 191 | (define (mandb-entries directory) |
| 192 | "Return mandb entries for the man pages found under DIRECTORY, recursively." |
| 193 | (map (lambda (file) |
| 194 | (man-page->entry file |
| 195 | (lambda (link) |
| 196 | (let ((file (string-append directory "/" link |
| 197 | ".gz"))) |
| 198 | (and (file-exists? file) file))))) |
| 199 | (man-files directory))) |