define-module for elisp special modules
[bpt/guile.git] / module / ice-9 / ftw.scm
1 ;;;; ftw.scm --- file system tree walk
2
3 ;;;; Copyright (C) 2002, 2003, 2006, 2011, 2012, 2014 Free Software Foundation, Inc.
4 ;;;;
5 ;;;; This library is free software; you can redistribute it and/or
6 ;;;; modify it under the terms of the GNU Lesser General Public
7 ;;;; License as published by the Free Software Foundation; either
8 ;;;; version 3 of the License, or (at your option) any later version.
9 ;;;;
10 ;;;; This library is distributed in the hope that it will be useful,
11 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;;; Lesser General Public License for more details.
14 ;;;;
15 ;;;; You should have received a copy of the GNU Lesser General Public
16 ;;;; License along with this library; if not, write to the Free Software
17 ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19 ;;; Author: Thien-Thi Nguyen <ttn@gnu.org>
20
21 ;;; Commentary:
22
23 ;; Two procedures are provided: `ftw' and `nftw'.
24
25 ;; NOTE: The following description was adapted from the GNU libc info page, w/
26 ;; significant modifications for a more "Schemey" interface. Most noticible
27 ;; are the inlining of `struct FTW *' parameters `base' and `level' and the
28 ;; omission of `descriptors' parameters.
29
30 ;; * Types
31 ;;
32 ;; The X/Open specification defines two procedures to process whole
33 ;; hierarchies of directories and the contained files. Both procedures
34 ;; of this `ftw' family take as one of the arguments a callback procedure
35 ;; which must be of these types.
36 ;;
37 ;; - Data Type: __ftw_proc_t
38 ;; (lambda (filename statinfo flag) ...) => status
39 ;;
40 ;; Type for callback procedures given to the `ftw' procedure. The
41 ;; first parameter is a filename, the second parameter is the
42 ;; vector value as returned by calling `stat' on FILENAME.
43 ;;
44 ;; The last parameter is a symbol giving more information about
45 ;; FILENAM. It can have one of the following values:
46 ;;
47 ;; `regular'
48 ;; The current item is a normal file or files which do not fit
49 ;; into one of the following categories. This means
50 ;; especially special files, sockets etc.
51 ;;
52 ;; `directory'
53 ;; The current item is a directory.
54 ;;
55 ;; `invalid-stat'
56 ;; The `stat' call to fill the object pointed to by the second
57 ;; parameter failed and so the information is invalid.
58 ;;
59 ;; `directory-not-readable'
60 ;; The item is a directory which cannot be read.
61 ;;
62 ;; `symlink'
63 ;; The item is a symbolic link. Since symbolic links are
64 ;; normally followed seeing this value in a `ftw' callback
65 ;; procedure means the referenced file does not exist. The
66 ;; situation for `nftw' is different.
67 ;;
68 ;; - Data Type: __nftw_proc_t
69 ;; (lambda (filename statinfo flag base level) ...) => status
70 ;;
71 ;; The first three arguments have the same as for the
72 ;; `__ftw_proc_t' type. A difference is that for the third
73 ;; argument some additional values are defined to allow finer
74 ;; differentiation:
75 ;;
76 ;; `directory-processed'
77 ;; The current item is a directory and all subdirectories have
78 ;; already been visited and reported. This flag is returned
79 ;; instead of `directory' if the `depth' flag is given to
80 ;; `nftw' (see below).
81 ;;
82 ;; `stale-symlink'
83 ;; The current item is a stale symbolic link. The file it
84 ;; points to does not exist.
85 ;;
86 ;; The last two parameters are described below. They contain
87 ;; information to help interpret FILENAME and give some information
88 ;; about current state of the traversal of the directory hierarchy.
89 ;;
90 ;; `base'
91 ;; The value specifies which part of the filename argument
92 ;; given in the first parameter to the callback procedure is
93 ;; the name of the file. The rest of the string is the path
94 ;; to locate the file. This information is especially
95 ;; important if the `chdir' flag for `nftw' was set since then
96 ;; the current directory is the one the current item is found
97 ;; in.
98 ;;
99 ;; `level'
100 ;; While processing the directory the procedures tracks how
101 ;; many directories have been examined to find the current
102 ;; item. This nesting level is 0 for the item given starting
103 ;; item (file or directory) and is incremented by one for each
104 ;; entered directory.
105 ;;
106 ;; * Procedure: (ftw filename proc . options)
107 ;; Do a file system tree walk starting at FILENAME using PROC.
108 ;;
109 ;; The `ftw' procedure calls the callback procedure given in the
110 ;; parameter PROC for every item which is found in the directory
111 ;; specified by FILENAME and all directories below. The procedure
112 ;; follows symbolic links if necessary but does not process an item
113 ;; twice. If FILENAME names no directory this item is the only
114 ;; object reported by calling the callback procedure.
115 ;;
116 ;; The filename given to the callback procedure is constructed by
117 ;; taking the FILENAME parameter and appending the names of all
118 ;; passed directories and then the local file name. So the
119 ;; callback procedure can use this parameter to access the file.
120 ;; Before the callback procedure is called `ftw' calls `stat' for
121 ;; this file and passes the information up to the callback
122 ;; procedure. If this `stat' call was not successful the failure is
123 ;; indicated by setting the flag argument of the callback procedure
124 ;; to `invalid-stat'. Otherwise the flag is set according to the
125 ;; description given in the description of `__ftw_proc_t' above.
126 ;;
127 ;; The callback procedure is expected to return non-#f to indicate
128 ;; that no error occurred and the processing should be continued.
129 ;; If an error occurred in the callback procedure or the call to
130 ;; `ftw' shall return immediately the callback procedure can return
131 ;; #f. This is the only correct way to stop the procedure. The
132 ;; program must not use `throw' or similar techniques to continue
133 ;; the program in another place. [Can we relax this? --ttn]
134 ;;
135 ;; The return value of the `ftw' procedure is #t if all callback
136 ;; procedure calls returned #t and all actions performed by the
137 ;; `ftw' succeeded. If some procedure call failed (other than
138 ;; calling `stat' on an item) the procedure returns #f. If a
139 ;; callback procedure returns a value other than #t this value is
140 ;; returned as the return value of `ftw'.
141 ;;
142 ;; * Procedure: (nftw filename proc . control-flags)
143 ;; Do a new-style file system tree walk starting at FILENAME using PROC.
144 ;; Various optional CONTROL-FLAGS alter the default behavior.
145 ;;
146 ;; The `nftw' procedures works like the `ftw' procedures. It calls
147 ;; the callback procedure PROC for all items it finds in the
148 ;; directory FILENAME and below.
149 ;;
150 ;; The differences are that for one the callback procedure is of a
151 ;; different type. It takes also `base' and `level' parameters as
152 ;; described above.
153 ;;
154 ;; The second difference is that `nftw' takes additional optional
155 ;; arguments which are zero or more of the following symbols:
156 ;;
157 ;; physical'
158 ;; While traversing the directory symbolic links are not
159 ;; followed. I.e., if this flag is given symbolic links are
160 ;; reported using the `symlink' value for the type parameter
161 ;; to the callback procedure. Please note that if this flag is
162 ;; used the appearance of `symlink' in a callback procedure
163 ;; does not mean the referenced file does not exist. To
164 ;; indicate this the extra value `stale-symlink' exists.
165 ;;
166 ;; mount'
167 ;; The callback procedure is only called for items which are on
168 ;; the same mounted file system as the directory given as the
169 ;; FILENAME parameter to `nftw'.
170 ;;
171 ;; chdir'
172 ;; If this flag is given the current working directory is
173 ;; changed to the directory containing the reported object
174 ;; before the callback procedure is called.
175 ;;
176 ;; depth'
177 ;; If this option is given the procedure visits first all files
178 ;; and subdirectories before the callback procedure is called
179 ;; for the directory itself (depth-first processing). This
180 ;; also means the type flag given to the callback procedure is
181 ;; `directory-processed' and not `directory'.
182 ;;
183 ;; The return value is computed in the same way as for `ftw'.
184 ;; `nftw' returns #t if no failure occurred in `nftw' and all
185 ;; callback procedure call return values are also #t. For internal
186 ;; errors such as memory problems the error `ftw-error' is thrown.
187 ;; If the return value of a callback invocation is not #t this
188 ;; very same value is returned.
189
190 ;;; Code:
191
192 (define-module (ice-9 ftw)
193 #:use-module (ice-9 match)
194 #:use-module (ice-9 vlist)
195 #:use-module (srfi srfi-1)
196 #:autoload (ice-9 i18n) (string-locale<?)
197 #:export (ftw nftw
198 file-system-fold
199 file-system-tree
200 scandir))
201
202 (define (directory-files dir)
203 (let ((dir-stream (opendir dir)))
204 (let loop ((new (readdir dir-stream))
205 (acc '()))
206 (if (eof-object? new)
207 (begin
208 (closedir dir-stream)
209 acc)
210 (loop (readdir dir-stream)
211 (if (or (string=? "." new) ;;; ignore
212 (string=? ".." new)) ;;; ignore
213 acc
214 (cons new acc)))))))
215
216 (define (pathify . nodes)
217 (let loop ((nodes nodes)
218 (result ""))
219 (if (null? nodes)
220 (or (and (string=? "" result) "")
221 (substring result 1 (string-length result)))
222 (loop (cdr nodes) (string-append result "/" (car nodes))))))
223
224 (define (abs? filename)
225 (char=? #\/ (string-ref filename 0)))
226
227 ;; `visited?-proc' returns a test procedure VISITED? which when called as
228 ;; (VISITED? stat-obj) returns #f the first time a distinct file is seen,
229 ;; then #t on any subsequent sighting of it.
230 ;;
231 ;; stat:dev and stat:ino together uniquely identify a file (see "Attribute
232 ;; Meanings" in the glibc manual). Often there'll be just one dev, and
233 ;; usually there's just a handful mounted, so the strategy here is a small
234 ;; hash table indexed by dev, containing hash tables indexed by ino.
235 ;;
236 ;; It'd be possible to make a pair (dev . ino) and use that as the key to a
237 ;; single hash table. It'd use an extra pair for every file visited, but
238 ;; might be a little faster if it meant less scheme code.
239 ;;
240 (define (visited?-proc size)
241 (let ((dev-hash (make-hash-table 7)))
242 (lambda (s)
243 (and s
244 (let ((ino-hash (hashv-ref dev-hash (stat:dev s)))
245 (ino (stat:ino s)))
246 (or ino-hash
247 (begin
248 (set! ino-hash (make-hash-table size))
249 (hashv-set! dev-hash (stat:dev s) ino-hash)))
250 (or (hashv-ref ino-hash ino)
251 (begin
252 (hashv-set! ino-hash ino #t)
253 #f)))))))
254
255 (define (stat-dir-readable?-proc uid gid)
256 (let ((uid (getuid))
257 (gid (getgid)))
258 (lambda (s)
259 (let* ((perms (stat:perms s))
260 (perms-bit-set? (lambda (mask)
261 (not (= 0 (logand mask perms))))))
262 (or (zero? uid)
263 (and (= uid (stat:uid s))
264 (perms-bit-set? #o400))
265 (and (= gid (stat:gid s))
266 (perms-bit-set? #o040))
267 (perms-bit-set? #o004))))))
268
269 (define (stat&flag-proc dir-readable? . control-flags)
270 (let* ((directory-flag (if (memq 'depth control-flags)
271 'directory-processed
272 'directory))
273 (stale-symlink-flag (if (memq 'nftw-style control-flags)
274 'stale-symlink
275 'symlink))
276 (physical? (memq 'physical control-flags))
277 (easy-flag (lambda (s)
278 (let ((type (stat:type s)))
279 (if (eq? 'directory type)
280 (if (dir-readable? s)
281 directory-flag
282 'directory-not-readable)
283 'regular)))))
284 (lambda (name)
285 (let ((s (false-if-exception (lstat name))))
286 (cond ((not s)
287 (values s 'invalid-stat))
288 ((eq? 'symlink (stat:type s))
289 (let ((s-follow (false-if-exception (stat name))))
290 (cond ((not s-follow)
291 (values s stale-symlink-flag))
292 ((and s-follow physical?)
293 (values s 'symlink))
294 ((and s-follow (not physical?))
295 (values s-follow (easy-flag s-follow))))))
296 (else (values s (easy-flag s))))))))
297
298 (define (clean name)
299 (let ((last-char-index (1- (string-length name))))
300 (if (char=? #\/ (string-ref name last-char-index))
301 (substring name 0 last-char-index)
302 name)))
303
304 (define (ftw filename proc . options)
305 (let* ((visited? (visited?-proc (cond ((memq 'hash-size options) => cadr)
306 (else 211))))
307 (stat&flag (stat&flag-proc
308 (stat-dir-readable?-proc (getuid) (getgid)))))
309 (letrec ((go (lambda (fullname)
310 (call-with-values (lambda () (stat&flag fullname))
311 (lambda (s flag)
312 (or (visited? s)
313 (let ((ret (proc fullname s flag))) ; callback
314 (or (eq? #t ret)
315 (throw 'ftw-early-exit ret))
316 (and (eq? 'directory flag)
317 (for-each
318 (lambda (child)
319 (go (pathify fullname child)))
320 (directory-files fullname)))
321 #t)))))))
322 (catch 'ftw-early-exit
323 (lambda () (go (clean filename)))
324 (lambda (key val) val)))))
325
326 (define (nftw filename proc . control-flags)
327 (let* ((od (getcwd)) ; orig dir
328 (odev (let ((s (false-if-exception (lstat filename))))
329 (if s (stat:dev s) -1)))
330 (same-dev? (if (memq 'mount control-flags)
331 (lambda (s) (= (stat:dev s) odev))
332 (lambda (s) #t)))
333 (base-sub (lambda (name base) (substring name 0 base)))
334 (maybe-cd (if (memq 'chdir control-flags)
335 (if (abs? filename)
336 (lambda (fullname base)
337 (or (= 0 base)
338 (chdir (base-sub fullname base))))
339 (lambda (fullname base)
340 (chdir
341 (pathify od (base-sub fullname base)))))
342 (lambda (fullname base) #t)))
343 (maybe-cd-back (if (memq 'chdir control-flags)
344 (lambda () (chdir od))
345 (lambda () #t)))
346 (depth-first? (memq 'depth control-flags))
347 (visited? (visited?-proc
348 (cond ((memq 'hash-size control-flags) => cadr)
349 (else 211))))
350 (has-kids? (if depth-first?
351 (lambda (flag) (eq? flag 'directory-processed))
352 (lambda (flag) (eq? flag 'directory))))
353 (stat&flag (apply stat&flag-proc
354 (stat-dir-readable?-proc (getuid) (getgid))
355 (cons 'nftw-style control-flags))))
356 (letrec ((go (lambda (fullname base level)
357 (call-with-values (lambda () (stat&flag fullname))
358 (lambda (s flag)
359 (letrec ((self (lambda ()
360 (maybe-cd fullname base)
361 ;; the callback
362 (let ((ret (proc fullname s flag
363 base level)))
364 (maybe-cd-back)
365 (or (eq? #t ret)
366 (throw 'nftw-early-exit ret)))))
367 (kids (lambda ()
368 (and (has-kids? flag)
369 (for-each
370 (lambda (child)
371 (go (pathify fullname child)
372 (1+ (string-length
373 fullname))
374 (1+ level)))
375 (directory-files fullname))))))
376 (or (visited? s)
377 (not (same-dev? s))
378 (if depth-first?
379 (begin (kids) (self))
380 (begin (self) (kids)))))))
381 #t)))
382 (let ((ret (catch 'nftw-early-exit
383 (lambda () (go (clean filename) 0 0))
384 (lambda (key val) val))))
385 (chdir od)
386 ret))))
387
388 \f
389 ;;;
390 ;;; `file-system-fold' & co.
391 ;;;
392
393 (define-syntax-rule (errno-if-exception expr)
394 (catch 'system-error
395 (lambda ()
396 expr)
397 (lambda args
398 (system-error-errno args))))
399
400 (define* (file-system-fold enter? leaf down up skip error init file-name
401 #:optional (stat lstat))
402 "Traverse the directory at FILE-NAME, recursively. Enter
403 sub-directories only when (ENTER? PATH STAT RESULT) returns true. When
404 a sub-directory is entered, call (DOWN PATH STAT RESULT), where PATH is
405 the path of the sub-directory and STAT the result of (stat PATH); when
406 it is left, call (UP PATH STAT RESULT). For each file in a directory,
407 call (LEAF PATH STAT RESULT). When ENTER? returns false, call (SKIP
408 PATH STAT RESULT). When an `opendir' or STAT call raises an exception,
409 call (ERROR PATH STAT ERRNO RESULT), with ERRNO being the operating
410 system error number that was raised.
411
412 Return the result of these successive applications.
413 When FILE-NAME names a flat file, (LEAF PATH STAT INIT) is returned.
414 The optional STAT parameter defaults to `lstat'."
415
416 (define (mark v s)
417 (vhash-cons (cons (stat:dev s) (stat:ino s)) #t v))
418
419 (define (visited? v s)
420 (vhash-assoc (cons (stat:dev s) (stat:ino s)) v))
421
422 (let loop ((name file-name)
423 (path "")
424 (dir-stat (errno-if-exception (stat file-name)))
425 (result init)
426 (visited vlist-null))
427
428 (define full-name
429 (if (string=? path "")
430 name
431 (string-append path "/" name)))
432
433 (cond
434 ((integer? dir-stat)
435 ;; FILE-NAME is not readable.
436 (error full-name #f dir-stat result))
437 ((visited? visited dir-stat)
438 (values result visited))
439 ((eq? 'directory (stat:type dir-stat)) ; true except perhaps the 1st time
440 (if (enter? full-name dir-stat result)
441 (let ((dir (errno-if-exception (opendir full-name)))
442 (visited (mark visited dir-stat)))
443 (cond
444 ((directory-stream? dir)
445 (let liip ((entry (readdir dir))
446 (result (down full-name dir-stat result))
447 (subdirs '()))
448 (cond ((eof-object? entry)
449 (begin
450 (closedir dir)
451 (let ((r+v
452 (fold (lambda (subdir result+visited)
453 (call-with-values
454 (lambda ()
455 (loop (car subdir)
456 full-name
457 (cdr subdir)
458 (car result+visited)
459 (cdr result+visited)))
460 cons))
461 (cons result visited)
462 subdirs)))
463 (values (up full-name dir-stat (car r+v))
464 (cdr r+v)))))
465 ((or (string=? entry ".")
466 (string=? entry ".."))
467 (liip (readdir dir)
468 result
469 subdirs))
470 (else
471 (let* ((child (string-append full-name "/" entry))
472 (st (errno-if-exception (stat child))))
473 (if (integer? st) ; CHILD is a dangling symlink?
474 (liip (readdir dir)
475 (error child #f st result)
476 subdirs)
477 (if (eq? (stat:type st) 'directory)
478 (liip (readdir dir)
479 result
480 (alist-cons entry st subdirs))
481 (liip (readdir dir)
482 (leaf child st result)
483 subdirs))))))))
484 (else
485 ;; Directory FULL-NAME not readable, but it is stat'able.
486 (values (error full-name dir-stat dir result)
487 visited))))
488 (values (skip full-name dir-stat result)
489 (mark visited dir-stat))))
490 (else
491 ;; Caller passed a FILE-NAME that names a flat file, not a directory.
492 (leaf full-name dir-stat result)))))
493
494 (define* (file-system-tree file-name
495 #:optional (enter? (lambda (n s) #t))
496 (stat lstat))
497 "Return a tree of the form (FILE-NAME STAT CHILDREN ...) where STAT is
498 the result of (STAT FILE-NAME) and CHILDREN are similar structures for
499 each file contained in FILE-NAME when it designates a directory. The
500 optional ENTER? predicate is invoked as (ENTER? NAME STAT) and should
501 return true to allow recursion into directory NAME; the default value is
502 a procedure that always returns #t. When a directory does not match
503 ENTER?, it nonetheless appears in the resulting tree, only with zero
504 children. The optional STAT parameter defaults to `lstat'. Return #f
505 when FILE-NAME is not readable."
506 (define (enter?* name stat result)
507 (enter? name stat))
508 (define (leaf name stat result)
509 (match result
510 (((siblings ...) rest ...)
511 (cons (alist-cons (basename name) (cons stat '()) siblings)
512 rest))))
513 (define (down name stat result)
514 (cons '() result))
515 (define (up name stat result)
516 (match result
517 (((children ...) (siblings ...) rest ...)
518 (cons (alist-cons (basename name) (cons stat children)
519 siblings)
520 rest))))
521 (define skip ; keep an entry for skipped directories
522 leaf)
523 (define (error name stat errno result)
524 (if (string=? name file-name)
525 result
526 (leaf name stat result)))
527
528 (match (file-system-fold enter?* leaf down up skip error '(())
529 file-name stat)
530 (((tree)) tree)
531 ((()) #f))) ; FILE-NAME is unreadable
532
533 (define* (scandir name #:optional (select? (const #t))
534 (entry<? string-locale<?))
535 "Return the list of the names of files contained in directory NAME
536 that match predicate SELECT? (by default, all files.) The returned list
537 of file names is sorted according to ENTRY<?, which defaults to
538 `string-locale<?'. Return #f when NAME is unreadable or is not a directory."
539 (define (enter? dir stat result)
540 (and stat (string=? dir name)))
541
542 (define (visit basename result)
543 (if (select? basename)
544 (cons basename result)
545 result))
546
547 (define (leaf name stat result)
548 (and result
549 (visit (basename name) result)))
550
551 (define (down name stat result)
552 (visit "." '()))
553
554 (define (up name stat result)
555 (visit ".." result))
556
557 (define (skip name stat result)
558 ;; All the sub-directories are skipped.
559 (visit (basename name) result))
560
561 (define (error name* stat errno result)
562 (if (string=? name name*) ; top-level NAME is unreadable
563 result
564 (visit (basename name*) result)))
565
566 (and=> (file-system-fold enter? leaf down up skip error #f name lstat)
567 (lambda (files)
568 (sort files entry<?))))
569
570 ;;; ftw.scm ends here