merge from 1.8 branch
[bpt/guile.git] / ice-9 / ftw.scm
1 ;;;; ftw.scm --- filesystem tree walk
2
3 ;;;; Copyright (C) 2002, 2003, 2006 Free Software Foundation, Inc.
4 ;;;;
5 ;;;; This library is free software; you can redistribute it and/or
6 ;;;; modify it under the terms of the GNU Lesser General Public
7 ;;;; License as published by the Free Software Foundation; either
8 ;;;; version 2.1 of the License, or (at your option) any later version.
9 ;;;;
10 ;;;; This library is distributed in the hope that it will be useful,
11 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;;; Lesser General Public License for more details.
14 ;;;;
15 ;;;; You should have received a copy of the GNU Lesser General Public
16 ;;;; License along with this library; if not, write to the Free Software
17 ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19 ;;; Author: Thien-Thi Nguyen <ttn@gnu.org>
20
21 ;;; Commentary:
22
23 ;; Two procedures are provided: `ftw' and `nftw'.
24
25 ;; NOTE: The following description was adapted from the GNU libc info page, w/
26 ;; significant modifications for a more "Schemey" interface. Most noticible
27 ;; are the inlining of `struct FTW *' parameters `base' and `level' and the
28 ;; omission of `descriptors' parameters.
29
30 ;; * Types
31 ;;
32 ;; The X/Open specification defines two procedures to process whole
33 ;; hierarchies of directories and the contained files. Both procedures
34 ;; of this `ftw' family take as one of the arguments a callback procedure
35 ;; which must be of these types.
36 ;;
37 ;; - Data Type: __ftw_proc_t
38 ;; (lambda (filename statinfo flag) ...) => status
39 ;;
40 ;; Type for callback procedures given to the `ftw' procedure. The
41 ;; first parameter is a filename, the second parameter is the
42 ;; vector value as returned by calling `stat' on FILENAME.
43 ;;
44 ;; The last parameter is a symbol giving more information about
45 ;; FILENAM. It can have one of the following values:
46 ;;
47 ;; `regular'
48 ;; The current item is a normal file or files which do not fit
49 ;; into one of the following categories. This means
50 ;; especially special files, sockets etc.
51 ;;
52 ;; `directory'
53 ;; The current item is a directory.
54 ;;
55 ;; `invalid-stat'
56 ;; The `stat' call to fill the object pointed to by the second
57 ;; parameter failed and so the information is invalid.
58 ;;
59 ;; `directory-not-readable'
60 ;; The item is a directory which cannot be read.
61 ;;
62 ;; `symlink'
63 ;; The item is a symbolic link. Since symbolic links are
64 ;; normally followed seeing this value in a `ftw' callback
65 ;; procedure means the referenced file does not exist. The
66 ;; situation for `nftw' is different.
67 ;;
68 ;; - Data Type: __nftw_proc_t
69 ;; (lambda (filename statinfo flag base level) ...) => status
70 ;;
71 ;; The first three arguments have the same as for the
72 ;; `__ftw_proc_t' type. A difference is that for the third
73 ;; argument some additional values are defined to allow finer
74 ;; differentiation:
75 ;;
76 ;; `directory-processed'
77 ;; The current item is a directory and all subdirectories have
78 ;; already been visited and reported. This flag is returned
79 ;; instead of `directory' if the `depth' flag is given to
80 ;; `nftw' (see below).
81 ;;
82 ;; `stale-symlink'
83 ;; The current item is a stale symbolic link. The file it
84 ;; points to does not exist.
85 ;;
86 ;; The last two parameters are described below. They contain
87 ;; information to help interpret FILENAME and give some information
88 ;; about current state of the traversal of the directory hierarchy.
89 ;;
90 ;; `base'
91 ;; The value specifies which part of the filename argument
92 ;; given in the first parameter to the callback procedure is
93 ;; the name of the file. The rest of the string is the path
94 ;; to locate the file. This information is especially
95 ;; important if the `chdir' flag for `nftw' was set since then
96 ;; the current directory is the one the current item is found
97 ;; in.
98 ;;
99 ;; `level'
100 ;; While processing the directory the procedures tracks how
101 ;; many directories have been examined to find the current
102 ;; item. This nesting level is 0 for the item given starting
103 ;; item (file or directory) and is incremented by one for each
104 ;; entered directory.
105 ;;
106 ;; * Procedure: (ftw filename proc . options)
107 ;; Do a filesystem tree walk starting at FILENAME using PROC.
108 ;;
109 ;; The `ftw' procedure calls the callback procedure given in the
110 ;; parameter PROC for every item which is found in the directory
111 ;; specified by FILENAME and all directories below. The procedure
112 ;; follows symbolic links if necessary but does not process an item
113 ;; twice. If FILENAME names no directory this item is the only
114 ;; object reported by calling the callback procedure.
115 ;;
116 ;; The filename given to the callback procedure is constructed by
117 ;; taking the FILENAME parameter and appending the names of all
118 ;; passed directories and then the local file name. So the
119 ;; callback procedure can use this parameter to access the file.
120 ;; Before the callback procedure is called `ftw' calls `stat' for
121 ;; this file and passes the information up to the callback
122 ;; procedure. If this `stat' call was not successful the failure is
123 ;; indicated by setting the flag argument of the callback procedure
124 ;; to `invalid-stat'. Otherwise the flag is set according to the
125 ;; description given in the description of `__ftw_proc_t' above.
126 ;;
127 ;; The callback procedure is expected to return non-#f to indicate
128 ;; that no error occurred and the processing should be continued.
129 ;; If an error occurred in the callback procedure or the call to
130 ;; `ftw' shall return immediately the callback procedure can return
131 ;; #f. This is the only correct way to stop the procedure. The
132 ;; program must not use `throw' or similar techniques to continue
133 ;; the program in another place. [Can we relax this? --ttn]
134 ;;
135 ;; The return value of the `ftw' procedure is #t if all callback
136 ;; procedure calls returned #t and all actions performed by the
137 ;; `ftw' succeeded. If some procedure call failed (other than
138 ;; calling `stat' on an item) the procedure returns #f. If a
139 ;; callback procedure returns a value other than #t this value is
140 ;; returned as the return value of `ftw'.
141 ;;
142 ;; * Procedure: (nftw filename proc . control-flags)
143 ;; Do a new-style filesystem tree walk starting at FILENAME using PROC.
144 ;; Various optional CONTROL-FLAGS alter the default behavior.
145 ;;
146 ;; The `nftw' procedures works like the `ftw' procedures. It calls
147 ;; the callback procedure PROC for all items it finds in the
148 ;; directory FILENAME and below.
149 ;;
150 ;; The differences are that for one the callback procedure is of a
151 ;; different type. It takes also `base' and `level' parameters as
152 ;; described above.
153 ;;
154 ;; The second difference is that `nftw' takes additional optional
155 ;; arguments which are zero or more of the following symbols:
156 ;;
157 ;; physical'
158 ;; While traversing the directory symbolic links are not
159 ;; followed. I.e., if this flag is given symbolic links are
160 ;; reported using the `symlink' value for the type parameter
161 ;; to the callback procedure. Please note that if this flag is
162 ;; used the appearance of `symlink' in a callback procedure
163 ;; does not mean the referenced file does not exist. To
164 ;; indicate this the extra value `stale-symlink' exists.
165 ;;
166 ;; mount'
167 ;; The callback procedure is only called for items which are on
168 ;; the same mounted filesystem as the directory given as the
169 ;; FILENAME parameter to `nftw'.
170 ;;
171 ;; chdir'
172 ;; If this flag is given the current working directory is
173 ;; changed to the directory containing the reported object
174 ;; before the callback procedure is called.
175 ;;
176 ;; depth'
177 ;; If this option is given the procedure visits first all files
178 ;; and subdirectories before the callback procedure is called
179 ;; for the directory itself (depth-first processing). This
180 ;; also means the type flag given to the callback procedure is
181 ;; `directory-processed' and not `directory'.
182 ;;
183 ;; The return value is computed in the same way as for `ftw'.
184 ;; `nftw' returns #t if no failure occurred in `nftw' and all
185 ;; callback procedure call return values are also #t. For internal
186 ;; errors such as memory problems the error `ftw-error' is thrown.
187 ;; If the return value of a callback invocation is not #t this
188 ;; very same value is returned.
189
190 ;;; Code:
191
192 (define-module (ice-9 ftw)
193 :export (ftw nftw))
194
195 (define (directory-files dir)
196 (let ((dir-stream (opendir dir)))
197 (let loop ((new (readdir dir-stream))
198 (acc '()))
199 (if (eof-object? new)
200 (begin
201 (closedir dir-stream)
202 acc)
203 (loop (readdir dir-stream)
204 (if (or (string=? "." new) ;;; ignore
205 (string=? ".." new)) ;;; ignore
206 acc
207 (cons new acc)))))))
208
209 (define (pathify . nodes)
210 (let loop ((nodes nodes)
211 (result ""))
212 (if (null? nodes)
213 (or (and (string=? "" result) "")
214 (substring result 1 (string-length result)))
215 (loop (cdr nodes) (string-append result "/" (car nodes))))))
216
217 (define (abs? filename)
218 (char=? #\/ (string-ref filename 0)))
219
220 (define (visited?-proc size)
221 (let ((visited (make-hash-table size)))
222 (lambda (s)
223 (and s (let ((ino (stat:ino s)))
224 (or (hash-ref visited ino)
225 (begin
226 (hash-set! visited ino #t)
227 #f)))))))
228
229 (define (stat-dir-readable?-proc uid gid)
230 (let ((uid (getuid))
231 (gid (getgid)))
232 (lambda (s)
233 (let* ((perms (stat:perms s))
234 (perms-bit-set? (lambda (mask)
235 (not (= 0 (logand mask perms))))))
236 (or (and (= uid (stat:uid s))
237 (perms-bit-set? #o400))
238 (and (= gid (stat:gid s))
239 (perms-bit-set? #o040))
240 (perms-bit-set? #o004))))))
241
242 (define (stat&flag-proc dir-readable? . control-flags)
243 (let* ((directory-flag (if (memq 'depth control-flags)
244 'directory-processed
245 'directory))
246 (stale-symlink-flag (if (memq 'nftw-style control-flags)
247 'stale-symlink
248 'symlink))
249 (physical? (memq 'physical control-flags))
250 (easy-flag (lambda (s)
251 (let ((type (stat:type s)))
252 (if (eq? 'directory type)
253 (if (dir-readable? s)
254 directory-flag
255 'directory-not-readable)
256 'regular)))))
257 (lambda (name)
258 (let ((s (false-if-exception (lstat name))))
259 (cond ((not s)
260 (values s 'invalid-stat))
261 ((eq? 'symlink (stat:type s))
262 (let ((s-follow (false-if-exception (stat name))))
263 (cond ((not s-follow)
264 (values s stale-symlink-flag))
265 ((and s-follow physical?)
266 (values s 'symlink))
267 ((and s-follow (not physical?))
268 (values s-follow (easy-flag s-follow))))))
269 (else (values s (easy-flag s))))))))
270
271 (define (clean name)
272 (let ((last-char-index (1- (string-length name))))
273 (if (char=? #\/ (string-ref name last-char-index))
274 (substring name 0 last-char-index)
275 name)))
276
277 (define (ftw filename proc . options)
278 (let* ((visited? (visited?-proc (cond ((memq 'hash-size options) => cadr)
279 (else 211))))
280 (stat&flag (stat&flag-proc
281 (stat-dir-readable?-proc (getuid) (getgid)))))
282 (letrec ((go (lambda (fullname)
283 (call-with-values (lambda () (stat&flag fullname))
284 (lambda (s flag)
285 (or (visited? s)
286 (let ((ret (proc fullname s flag))) ; callback
287 (or (eq? #t ret)
288 (throw 'ftw-early-exit ret))
289 (and (eq? 'directory flag)
290 (for-each
291 (lambda (child)
292 (go (pathify fullname child)))
293 (directory-files fullname)))
294 #t)))))))
295 (catch 'ftw-early-exit
296 (lambda () (go (clean filename)))
297 (lambda (key val) val)))))
298
299 (define (nftw filename proc . control-flags)
300 (let* ((od (getcwd)) ; orig dir
301 (odev (let ((s (false-if-exception (lstat filename))))
302 (if s (stat:dev s) -1)))
303 (same-dev? (if (memq 'mount control-flags)
304 (lambda (s) (= (stat:dev s) odev))
305 (lambda (s) #t)))
306 (base-sub (lambda (name base) (substring name 0 base)))
307 (maybe-cd (if (memq 'chdir control-flags)
308 (if (abs? filename)
309 (lambda (fullname base)
310 (or (= 0 base)
311 (chdir (base-sub fullname base))))
312 (lambda (fullname base)
313 (chdir
314 (pathify od (base-sub fullname base)))))
315 (lambda (fullname base) #t)))
316 (maybe-cd-back (if (memq 'chdir control-flags)
317 (lambda () (chdir od))
318 (lambda () #t)))
319 (depth-first? (memq 'depth control-flags))
320 (visited? (visited?-proc
321 (cond ((memq 'hash-size control-flags) => cadr)
322 (else 211))))
323 (has-kids? (if depth-first?
324 (lambda (flag) (eq? flag 'directory-processed))
325 (lambda (flag) (eq? flag 'directory))))
326 (stat&flag (apply stat&flag-proc
327 (stat-dir-readable?-proc (getuid) (getgid))
328 (cons 'nftw-style control-flags))))
329 (letrec ((go (lambda (fullname base level)
330 (call-with-values (lambda () (stat&flag fullname))
331 (lambda (s flag)
332 (letrec ((self (lambda ()
333 (maybe-cd fullname base)
334 ;; the callback
335 (let ((ret (proc fullname s flag
336 base level)))
337 (maybe-cd-back)
338 (or (eq? #t ret)
339 (throw 'nftw-early-exit ret)))))
340 (kids (lambda ()
341 (and (has-kids? flag)
342 (for-each
343 (lambda (child)
344 (go (pathify fullname child)
345 (1+ (string-length
346 fullname))
347 (1+ level)))
348 (directory-files fullname))))))
349 (or (visited? s)
350 (not (same-dev? s))
351 (if depth-first?
352 (begin (kids) (self))
353 (begin (self) (kids)))))))
354 #t)))
355 (let ((ret (catch 'nftw-early-exit
356 (lambda () (go (clean filename) 0 0))
357 (lambda (key val) val))))
358 (chdir od)
359 ret))))
360
361 ;;; ftw.scm ends here