Import Upstream version 1.8.5
[hcoop/debian/openafs.git] / src / vol / ihandle.c
1 /*
2 * Copyright 2000, International Business Machines Corporation and others.
3 * All Rights Reserved.
4 *
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
8 */
9
10 /* ihandle.c - file descriptor cacheing for Inode handles. */
11 /* */
12 /************************************************************************/
13
14 #include <afsconfig.h>
15 #include <afs/param.h>
16
17 #include <roken.h>
18
19 #include <limits.h>
20
21 #ifdef HAVE_SYS_RESOURCE_H
22 #include <sys/resource.h>
23 #endif
24
25 #include <afs/opr.h>
26 #ifdef AFS_PTHREAD_ENV
27 # include <opr/lock.h>
28 #endif
29 #include <afs/afsint.h>
30 #include <afs/afssyscalls.h>
31 #include <afs/afsutil.h>
32
33 #include "nfs.h"
34 #include "ihandle.h"
35 #include "viceinode.h"
36
37 #ifdef AFS_PTHREAD_ENV
38 pthread_once_t ih_glock_once = PTHREAD_ONCE_INIT;
39 pthread_mutex_t ih_glock_mutex;
40 #endif /* AFS_PTHREAD_ENV */
41
42 /* Linked list of available inode handles */
43 IHandle_t *ihAvailHead;
44 IHandle_t *ihAvailTail;
45
46 /* Linked list of available file descriptor handles */
47 FdHandle_t *fdAvailHead;
48 FdHandle_t *fdAvailTail;
49
50 /* Linked list of available stream descriptor handles */
51 StreamHandle_t *streamAvailHead;
52 StreamHandle_t *streamAvailTail;
53
54 /* LRU list for file descriptor handles */
55 FdHandle_t *fdLruHead;
56 FdHandle_t *fdLruTail;
57
58 int ih_Inited = 0;
59 int ih_PkgDefaultsSet = 0;
60
61 /* Most of the servers use fopen/fdopen. Since the FILE structure
62 * only has eight bits for the file descriptor, the cache size
63 * has to be less than 256. The cache can be made larger as long
64 * as you are sure you don't need fopen/fdopen. */
65
66 /* As noted in ihandle.h, the fileno member of FILE on most platforms
67 * in 2008 is a 16- or 32-bit signed int. -Matt
68 */
69 int fdMaxCacheSize = 0;
70 int fdCacheSize = 0;
71
72 /* Number of in use file descriptors */
73 int fdInUseCount = 0;
74
75 /* Hash table for inode handles */
76 IHashBucket_t ihashTable[I_HANDLE_HASH_SIZE];
77
78 static int _ih_release_r(IHandle_t * ihP);
79
80 /* start-time configurable I/O limits */
81 ih_init_params vol_io_params;
82
83 void ih_PkgDefaults(void)
84 {
85 /* once */
86 ih_PkgDefaultsSet = 1;
87
88 /* default to well-known values */
89 vol_io_params.fd_handle_setaside = FD_HANDLE_SETASIDE;
90
91 /* initial fd cachesize. the only one that will be used if
92 * the application does not call ih_UseLargeCache(). set this
93 * to a value representable in fileno member of the system's
94 * FILE structure (or equivalent). */
95 vol_io_params.fd_initial_cachesize = FD_DEFAULT_CACHESIZE;
96
97 /* fd cache size that will be used if/when ih_UseLargeCache()
98 * is called */
99 vol_io_params.fd_max_cachesize = FD_MAX_CACHESIZE;
100
101 vol_io_params.sync_behavior = IH_SYNC_ONCLOSE;
102 }
103
104 int
105 ih_SetSyncBehavior(const char *behavior)
106 {
107 int val;
108
109 if (strcmp(behavior, "always") == 0) {
110 val = IH_SYNC_ALWAYS;
111
112 } else if (strcmp(behavior, "onclose") == 0) {
113 val = IH_SYNC_ONCLOSE;
114
115 } else if (strcmp(behavior, "never") == 0) {
116 val = IH_SYNC_NEVER;
117
118 } else {
119 /* invalid behavior name */
120 return -1;
121 }
122
123 vol_io_params.sync_behavior = val;
124 return 0;
125 }
126
127 #ifdef AFS_PTHREAD_ENV
128 /* Initialize the global ihandle mutex */
129 void
130 ih_glock_init(void)
131 {
132 opr_mutex_init(&ih_glock_mutex);
133 }
134 #endif /* AFS_PTHREAD_ENV */
135
136 /* Initialize the file descriptor cache */
137 void
138 ih_Initialize(void)
139 {
140 int i;
141 opr_Assert(!ih_Inited);
142 ih_Inited = 1;
143 DLL_INIT_LIST(ihAvailHead, ihAvailTail);
144 DLL_INIT_LIST(fdAvailHead, fdAvailTail);
145 DLL_INIT_LIST(fdLruHead, fdLruTail);
146 for (i = 0; i < I_HANDLE_HASH_SIZE; i++) {
147 DLL_INIT_LIST(ihashTable[i].ihash_head, ihashTable[i].ihash_tail);
148 }
149 #if defined(AFS_NT40_ENV)
150 fdMaxCacheSize = vol_io_params.fd_max_cachesize;
151 #elif defined(AFS_SUN5_ENV) || defined(AFS_NBSD_ENV)
152 {
153 struct rlimit rlim;
154 opr_Verify(getrlimit(RLIMIT_NOFILE, &rlim) == 0);
155 rlim.rlim_cur = rlim.rlim_max;
156 opr_Verify(setrlimit(RLIMIT_NOFILE, &rlim) == 0);
157 fdMaxCacheSize = rlim.rlim_cur - vol_io_params.fd_handle_setaside;
158 #ifdef AFS_NBSD_ENV
159 /* XXX this is to avoid using up all system fd netbsd is
160 * somewhat broken and have set maximum fd for a root process
161 * to the same as system fd that is avaible, so if the
162 * fileserver uses all up process fds, all system fd will be
163 * used up too !
164 *
165 * Check for this better
166 */
167 fdMaxCacheSize /= 4;
168 #endif
169 fdMaxCacheSize = min(fdMaxCacheSize, vol_io_params.fd_max_cachesize);
170 opr_Assert(fdMaxCacheSize > 0);
171 }
172 #elif defined(AFS_HPUX_ENV)
173 /* Avoid problems with "UFSOpen: igetinode failed" panics on HPUX 11.0 */
174 fdMaxCacheSize = 0;
175 #else
176 {
177 long fdMax = max(sysconf(_SC_OPEN_MAX) - vol_io_params.fd_handle_setaside,
178 0);
179 fdMaxCacheSize = (int)min(fdMax, vol_io_params.fd_max_cachesize);
180 }
181 #endif
182 fdCacheSize = min(fdMaxCacheSize, vol_io_params.fd_initial_cachesize);
183 }
184
185 /* Make the file descriptor cache as big as possible. Don't this call
186 * if the program uses fopen or fdopen, if fd_max_cachesize cannot be
187 * represented in the fileno member of the system FILE structure (or
188 * equivalent).
189 */
190 void
191 ih_UseLargeCache(void)
192 {
193 IH_LOCK;
194
195 if (!ih_PkgDefaultsSet) {
196 ih_PkgDefaults();
197 }
198
199 if (!ih_Inited) {
200 ih_Initialize();
201 }
202
203 fdCacheSize = fdMaxCacheSize;
204
205 IH_UNLOCK;
206 }
207
208 /* Allocate a chunk of inode handles */
209 void
210 iHandleAllocateChunk(void)
211 {
212 int i;
213 IHandle_t *ihP;
214
215 opr_Assert(ihAvailHead == NULL);
216 ihP = malloc(I_HANDLE_MALLOCSIZE * sizeof(IHandle_t));
217 opr_Assert(ihP != NULL);
218 for (i = 0; i < I_HANDLE_MALLOCSIZE; i++) {
219 ihP[i].ih_refcnt = 0;
220 DLL_INSERT_TAIL(&ihP[i], ihAvailHead, ihAvailTail, ih_next, ih_prev);
221 }
222 }
223
224 /* Initialize an inode handle */
225 IHandle_t *
226 ih_init(int dev, int vid, Inode ino)
227 {
228 int ihash = IH_HASH(dev, vid, ino);
229 IHandle_t *ihP;
230
231 if (!ih_PkgDefaultsSet) {
232 ih_PkgDefaults();
233 }
234
235 IH_LOCK;
236 if (!ih_Inited) {
237 ih_Initialize();
238 }
239
240 /* Do we already have a handle for this Inode? */
241 for (ihP = ihashTable[ihash].ihash_head; ihP; ihP = ihP->ih_next) {
242 if (ihP->ih_ino == ino && ihP->ih_vid == vid && ihP->ih_dev == dev) {
243 ihP->ih_refcnt++;
244 IH_UNLOCK;
245 return ihP;
246 }
247 }
248
249 /* Allocate and initialize a new Inode handle */
250 if (ihAvailHead == NULL) {
251 iHandleAllocateChunk();
252 }
253 ihP = ihAvailHead;
254 opr_Assert(ihP->ih_refcnt == 0);
255 DLL_DELETE(ihP, ihAvailHead, ihAvailTail, ih_next, ih_prev);
256 ihP->ih_dev = dev;
257 ihP->ih_vid = vid;
258 ihP->ih_ino = ino;
259 ihP->ih_flags = 0;
260 ihP->ih_synced = 0;
261 ihP->ih_refcnt = 1;
262 DLL_INIT_LIST(ihP->ih_fdhead, ihP->ih_fdtail);
263 DLL_INSERT_TAIL(ihP, ihashTable[ihash].ihash_head,
264 ihashTable[ihash].ihash_tail, ih_next, ih_prev);
265 IH_UNLOCK;
266 return ihP;
267 }
268
269 /* Copy an inode handle */
270 IHandle_t *
271 ih_copy(IHandle_t * ihP)
272 {
273 IH_LOCK;
274 opr_Assert(ih_Inited);
275 opr_Assert(ihP->ih_refcnt > 0);
276 ihP->ih_refcnt++;
277 IH_UNLOCK;
278 return ihP;
279 }
280
281 /* Allocate a chunk of file descriptor handles */
282 void
283 fdHandleAllocateChunk(void)
284 {
285 int i;
286 FdHandle_t *fdP;
287
288 opr_Assert(fdAvailHead == NULL);
289 fdP = malloc(FD_HANDLE_MALLOCSIZE * sizeof(FdHandle_t));
290 opr_Assert(fdP != NULL);
291 for (i = 0; i < FD_HANDLE_MALLOCSIZE; i++) {
292 fdP[i].fd_status = FD_HANDLE_AVAIL;
293 fdP[i].fd_refcnt = 0;
294 fdP[i].fd_ih = NULL;
295 fdP[i].fd_fd = INVALID_FD;
296 fdP[i].fd_ihnext = NULL;
297 fdP[i].fd_ihprev = NULL;
298 DLL_INSERT_TAIL(&fdP[i], fdAvailHead, fdAvailTail, fd_next, fd_prev);
299 }
300 }
301
302 /* Allocate a chunk of stream handles */
303 void
304 streamHandleAllocateChunk(void)
305 {
306 int i;
307 StreamHandle_t *streamP;
308
309 opr_Assert(streamAvailHead == NULL);
310 streamP = (StreamHandle_t *)
311 malloc(STREAM_HANDLE_MALLOCSIZE * sizeof(StreamHandle_t));
312 opr_Assert(streamP != NULL);
313 for (i = 0; i < STREAM_HANDLE_MALLOCSIZE; i++) {
314 streamP[i].str_fd = INVALID_FD;
315 DLL_INSERT_TAIL(&streamP[i], streamAvailHead, streamAvailTail,
316 str_next, str_prev);
317 }
318 }
319
320 /*
321 * Get a file descriptor handle given an Inode handle
322 * Takes the given file descriptor, and creates a new FdHandle_t for it,
323 * attached to the given IHandle_t. If fdLruHead is not NULL, fd can be
324 * INVALID_FD, indicating that the caller failed to open the relevant file
325 * because we had too many FDs open; ih_attachfd_r will then just evict/close
326 * an existing fd in the cache, and return NULL. You must not call this
327 * function with an invalid fd while fdLruHead is NULL; instead, error out.
328 */
329 static FdHandle_t *
330 ih_attachfd_r(IHandle_t *ihP, FD_t fd)
331 {
332 FD_t closeFd;
333 FdHandle_t *fdP;
334
335 /* If the given fd is invalid, we must have an available fd to close.
336 * Otherwise, the caller must have realized this before calling
337 * ih_attachfd_r and yielded an error before getting here. */
338 opr_Assert(fd != INVALID_FD || fdLruHead != NULL);
339
340 /* fdCacheSize limits the size of the descriptor cache, but
341 * we permit the number of open files to exceed fdCacheSize.
342 * We only recycle open file descriptors when the number
343 * of open files reaches the size of the cache */
344 if ((fdInUseCount > fdCacheSize || fd == INVALID_FD) && fdLruHead != NULL) {
345 fdP = fdLruHead;
346 opr_Assert(fdP->fd_status == FD_HANDLE_OPEN);
347 DLL_DELETE(fdP, fdLruHead, fdLruTail, fd_next, fd_prev);
348 DLL_DELETE(fdP, fdP->fd_ih->ih_fdhead, fdP->fd_ih->ih_fdtail,
349 fd_ihnext, fd_ihprev);
350 closeFd = fdP->fd_fd;
351 if (fd == INVALID_FD) {
352 fdCacheSize--; /* reduce in order to not run into here too often */
353 DLL_INSERT_TAIL(fdP, fdAvailHead, fdAvailTail, fd_next, fd_prev);
354 fdP->fd_status = FD_HANDLE_AVAIL;
355 fdP->fd_ih = NULL;
356 fdP->fd_fd = INVALID_FD;
357 IH_UNLOCK;
358 OS_CLOSE(closeFd);
359 IH_LOCK;
360 fdInUseCount -= 1;
361 return NULL;
362 }
363 } else {
364 if (fdAvailHead == NULL) {
365 fdHandleAllocateChunk();
366 }
367 fdP = fdAvailHead;
368 opr_Assert(fdP->fd_status == FD_HANDLE_AVAIL);
369 DLL_DELETE(fdP, fdAvailHead, fdAvailTail, fd_next, fd_prev);
370 closeFd = INVALID_FD;
371 }
372
373 fdP->fd_status = FD_HANDLE_INUSE;
374 fdP->fd_fd = fd;
375 fdP->fd_ih = ihP;
376 fdP->fd_refcnt++;
377
378 ihP->ih_refcnt++;
379
380 /* Add this handle to the Inode's list of open descriptors */
381 DLL_INSERT_TAIL(fdP, ihP->ih_fdhead, ihP->ih_fdtail, fd_ihnext,
382 fd_ihprev);
383
384 if (closeFd != INVALID_FD) {
385 IH_UNLOCK;
386 OS_CLOSE(closeFd);
387 IH_LOCK;
388 fdInUseCount -= 1;
389 }
390
391 return fdP;
392 }
393
394 FdHandle_t *
395 ih_attachfd(IHandle_t *ihP, FD_t fd)
396 {
397 FdHandle_t *fdP;
398
399 if (fd == INVALID_FD) {
400 return NULL;
401 }
402
403 IH_LOCK;
404
405 fdInUseCount += 1;
406
407 fdP = ih_attachfd_r(ihP, fd);
408 opr_Assert(fdP);
409
410 IH_UNLOCK;
411
412 return fdP;
413 }
414
415 /*
416 * Get a file descriptor handle given an Inode handle
417 */
418 FdHandle_t *
419 ih_open(IHandle_t * ihP)
420 {
421 FdHandle_t *fdP;
422 FD_t fd;
423
424 if (!ihP) /* XXX should log here in the fileserver */
425 return NULL;
426
427 IH_LOCK;
428
429 /* Do we already have an open file handle for this Inode? */
430 for (fdP = ihP->ih_fdtail; fdP != NULL; fdP = fdP->fd_ihprev) {
431 if (fdP->fd_status == FD_HANDLE_CLOSING) {
432 /* The handle was open when an IH_REALLYCLOSE was issued, so we
433 * cannot reuse it; it will be closed soon. */
434 continue;
435 }
436 #ifndef AFS_IHANDLE_PIO_ENV
437 /*
438 * If we don't have positional i/o, don't try to share fds, since
439 * we can't do so in a threadsafe way.
440 */
441 if (fdP->fd_status == FD_HANDLE_INUSE) {
442 continue;
443 }
444 opr_Assert(fdP->fd_status == FD_HANDLE_OPEN);
445 #else /* AFS_IHANDLE_PIO_ENV */
446 opr_Assert(fdP->fd_status != FD_HANDLE_AVAIL);
447 #endif /* AFS_IHANDLE_PIO_ENV */
448
449 fdP->fd_refcnt++;
450 if (fdP->fd_status == FD_HANDLE_OPEN) {
451 fdP->fd_status = FD_HANDLE_INUSE;
452 DLL_DELETE(fdP, fdLruHead, fdLruTail, fd_next, fd_prev);
453 }
454 ihP->ih_refcnt++;
455 IH_UNLOCK;
456 return fdP;
457 }
458
459 /*
460 * Try to open the Inode, return NULL on error.
461 */
462 fdInUseCount += 1;
463 IH_UNLOCK;
464 ih_open_retry:
465 fd = OS_IOPEN(ihP);
466 IH_LOCK;
467 if (fd == INVALID_FD && (errno != EMFILE || fdLruHead == NULL) ) {
468 fdInUseCount -= 1;
469 IH_UNLOCK;
470 return NULL;
471 }
472
473 fdP = ih_attachfd_r(ihP, fd);
474 if (!fdP) {
475 opr_Assert(fd == INVALID_FD);
476 IH_UNLOCK;
477 goto ih_open_retry;
478 }
479
480 IH_UNLOCK;
481
482 return fdP;
483 }
484
485 /*
486 * Return a file descriptor handle to the cache
487 */
488 int
489 fd_close(FdHandle_t * fdP)
490 {
491 IHandle_t *ihP;
492
493 if (!fdP)
494 return 0;
495
496 IH_LOCK;
497 opr_Assert(ih_Inited);
498 opr_Assert(fdInUseCount > 0);
499 opr_Assert(fdP->fd_status == FD_HANDLE_INUSE ||
500 fdP->fd_status == FD_HANDLE_CLOSING);
501
502 ihP = fdP->fd_ih;
503
504 /* Call fd_reallyclose to really close the unused file handles if
505 * the previous attempt to close (ih_reallyclose()) all file handles
506 * failed (this is determined by checking the ihandle for the flag
507 * IH_REALLY_CLOSED) or we have too many open files.
508 */
509 if (fdP->fd_status == FD_HANDLE_CLOSING ||
510 ihP->ih_flags & IH_REALLY_CLOSED || fdInUseCount > fdCacheSize) {
511 IH_UNLOCK;
512 return fd_reallyclose(fdP);
513 }
514
515 fdP->fd_refcnt--;
516 if (fdP->fd_refcnt == 0) {
517 /* Put this descriptor back into the cache */
518 fdP->fd_status = FD_HANDLE_OPEN;
519 DLL_INSERT_TAIL(fdP, fdLruHead, fdLruTail, fd_next, fd_prev);
520 }
521
522 /* If this is not the only reference to the Inode then we can decrement
523 * the reference count, otherwise we need to call ih_release.
524 */
525 if (ihP->ih_refcnt > 1)
526 ihP->ih_refcnt--;
527 else
528 _ih_release_r(ihP);
529
530 IH_UNLOCK;
531
532 return 0;
533 }
534
535 /*
536 * Actually close the file descriptor handle and return it to
537 * the free list.
538 */
539 int
540 fd_reallyclose(FdHandle_t * fdP)
541 {
542 FD_t closeFd;
543 IHandle_t *ihP;
544
545 if (!fdP)
546 return 0;
547
548 IH_LOCK;
549 opr_Assert(ih_Inited);
550 opr_Assert(fdInUseCount > 0);
551 opr_Assert(fdP->fd_status == FD_HANDLE_INUSE ||
552 fdP->fd_status == FD_HANDLE_CLOSING);
553
554 ihP = fdP->fd_ih;
555 closeFd = fdP->fd_fd;
556 fdP->fd_refcnt--;
557
558 if (fdP->fd_refcnt == 0) {
559 DLL_DELETE(fdP, ihP->ih_fdhead, ihP->ih_fdtail, fd_ihnext, fd_ihprev);
560 DLL_INSERT_TAIL(fdP, fdAvailHead, fdAvailTail, fd_next, fd_prev);
561
562 fdP->fd_status = FD_HANDLE_AVAIL;
563 fdP->fd_refcnt = 0;
564 fdP->fd_ih = NULL;
565 fdP->fd_fd = INVALID_FD;
566 }
567
568 /* All the file descriptor handles have been closed; reset
569 * the IH_REALLY_CLOSED flag indicating that ih_reallyclose
570 * has completed its job.
571 */
572 if (!ihP->ih_fdhead) {
573 ihP->ih_flags &= ~IH_REALLY_CLOSED;
574 } else {
575 FdHandle_t *lfdP, *next;
576 int clear = 1;
577 for (lfdP = ihP->ih_fdhead; lfdP != NULL; lfdP = next) {
578 next = lfdP->fd_ihnext;
579 osi_Assert(lfdP->fd_ih == ihP);
580 if (lfdP->fd_status != FD_HANDLE_CLOSING) {
581 clear = 0;
582 break;
583 }
584 }
585 /* no *future* fd should be subjected to this */
586 if (clear)
587 ihP->ih_flags &= ~IH_REALLY_CLOSED;
588 }
589
590 if (fdP->fd_refcnt == 0) {
591 IH_UNLOCK;
592 OS_CLOSE(closeFd);
593 IH_LOCK;
594 fdInUseCount -= 1;
595 }
596
597 /* If this is not the only reference to the Inode then we can decrement
598 * the reference count, otherwise we need to call ih_release. */
599 if (ihP->ih_refcnt > 1)
600 ihP->ih_refcnt--;
601 else
602 _ih_release_r(ihP);
603
604 IH_UNLOCK;
605
606 return 0;
607 }
608
609 /* Enable buffered I/O on a file descriptor */
610 StreamHandle_t *
611 stream_fdopen(FD_t fd)
612 {
613 StreamHandle_t *streamP;
614
615 IH_LOCK;
616 if (streamAvailHead == NULL) {
617 streamHandleAllocateChunk();
618 }
619 streamP = streamAvailHead;
620 DLL_DELETE(streamP, streamAvailHead, streamAvailTail, str_next, str_prev);
621 IH_UNLOCK;
622 streamP->str_fd = fd;
623 streamP->str_buflen = 0;
624 streamP->str_bufoff = 0;
625 streamP->str_fdoff = 0;
626 streamP->str_error = 0;
627 streamP->str_eof = 0;
628 streamP->str_direction = STREAM_DIRECTION_NONE;
629 return streamP;
630 }
631
632 /* Open a file for buffered I/O */
633 StreamHandle_t *
634 stream_open(const char *filename, const char *mode)
635 {
636 FD_t fd = INVALID_FD;
637
638 if (strcmp(mode, "r") == 0) {
639 fd = OS_OPEN(filename, O_RDONLY, 0);
640 } else if (strcmp(mode, "r+") == 0) {
641 fd = OS_OPEN(filename, O_RDWR, 0);
642 } else if (strcmp(mode, "w") == 0) {
643 fd = OS_OPEN(filename, O_WRONLY | O_TRUNC | O_CREAT, 0);
644 } else if (strcmp(mode, "w+") == 0) {
645 fd = OS_OPEN(filename, O_RDWR | O_TRUNC | O_CREAT, 0);
646 } else if (strcmp(mode, "a") == 0) {
647 fd = OS_OPEN(filename, O_WRONLY | O_APPEND | O_CREAT, 0);
648 } else if (strcmp(mode, "a+") == 0) {
649 fd = OS_OPEN(filename, O_RDWR | O_APPEND | O_CREAT, 0);
650 } else {
651 opr_abort(); /* not implemented */
652 }
653
654 if (fd == INVALID_FD) {
655 return NULL;
656 }
657 return stream_fdopen(fd);
658 }
659
660 /* fread for buffered I/O handles */
661 afs_sfsize_t
662 stream_read(void *ptr, afs_fsize_t size, afs_fsize_t nitems,
663 StreamHandle_t * streamP)
664 {
665 afs_fsize_t nbytes, bytesRead, bytesToRead;
666 char *p;
667
668 /* Need to seek before changing direction */
669 if (streamP->str_direction == STREAM_DIRECTION_NONE) {
670 streamP->str_direction = STREAM_DIRECTION_READ;
671 streamP->str_bufoff = 0;
672 streamP->str_buflen = 0;
673 } else {
674 opr_Assert(streamP->str_direction == STREAM_DIRECTION_READ);
675 }
676
677 bytesRead = 0;
678 nbytes = size * nitems;
679 p = (char *)ptr;
680 while (nbytes > 0 && !streamP->str_eof) {
681 if (streamP->str_buflen == 0) {
682 streamP->str_bufoff = 0;
683 streamP->str_buflen =
684 OS_PREAD(streamP->str_fd, streamP->str_buffer,
685 STREAM_HANDLE_BUFSIZE, streamP->str_fdoff);
686 if (streamP->str_buflen < 0) {
687 streamP->str_error = errno;
688 streamP->str_buflen = 0;
689 bytesRead = 0;
690 break;
691 } else if (streamP->str_buflen == 0) {
692 streamP->str_eof = 1;
693 break;
694 }
695 streamP->str_fdoff += streamP->str_buflen;
696 }
697
698 bytesToRead = nbytes;
699 if (bytesToRead > streamP->str_buflen) {
700 bytesToRead = streamP->str_buflen;
701 }
702 memcpy(p, streamP->str_buffer + streamP->str_bufoff, bytesToRead);
703 p += bytesToRead;
704 streamP->str_bufoff += bytesToRead;
705 streamP->str_buflen -= bytesToRead;
706 bytesRead += bytesToRead;
707 nbytes -= bytesToRead;
708 }
709
710 return (bytesRead / size);
711 }
712
713 /* fwrite for buffered I/O handles */
714 afs_sfsize_t
715 stream_write(void *ptr, afs_fsize_t size, afs_fsize_t nitems,
716 StreamHandle_t * streamP)
717 {
718 char *p;
719 afs_sfsize_t rc;
720 afs_fsize_t nbytes, bytesWritten, bytesToWrite;
721
722 /* Need to seek before changing direction */
723 if (streamP->str_direction == STREAM_DIRECTION_NONE) {
724 streamP->str_direction = STREAM_DIRECTION_WRITE;
725 streamP->str_bufoff = 0;
726 streamP->str_buflen = STREAM_HANDLE_BUFSIZE;
727 } else {
728 opr_Assert(streamP->str_direction == STREAM_DIRECTION_WRITE);
729 }
730
731 nbytes = size * nitems;
732 bytesWritten = 0;
733 p = (char *)ptr;
734 while (nbytes > 0) {
735 if (streamP->str_buflen == 0) {
736 rc = OS_PWRITE(streamP->str_fd, streamP->str_buffer,
737 STREAM_HANDLE_BUFSIZE, streamP->str_fdoff);
738 if (rc < 0) {
739 streamP->str_error = errno;
740 bytesWritten = 0;
741 break;
742 }
743 streamP->str_fdoff += rc;
744 streamP->str_bufoff = 0;
745 streamP->str_buflen = STREAM_HANDLE_BUFSIZE;
746 }
747
748 bytesToWrite = nbytes;
749 if (bytesToWrite > streamP->str_buflen) {
750 bytesToWrite = streamP->str_buflen;
751 }
752 memcpy(streamP->str_buffer + streamP->str_bufoff, p, bytesToWrite);
753 p += bytesToWrite;
754 streamP->str_bufoff += bytesToWrite;
755 streamP->str_buflen -= bytesToWrite;
756 bytesWritten += bytesToWrite;
757 nbytes -= bytesToWrite;
758 }
759
760 return (bytesWritten / size);
761 }
762
763 /* fseek for buffered I/O handles */
764 int
765 stream_aseek(StreamHandle_t * streamP, afs_foff_t offset)
766 {
767 ssize_t rc;
768 int retval = 0;
769
770 if (streamP->str_direction == STREAM_DIRECTION_WRITE
771 && streamP->str_bufoff > 0) {
772 rc = OS_PWRITE(streamP->str_fd, streamP->str_buffer,
773 streamP->str_bufoff, streamP->str_fdoff);
774 if (rc < 0) {
775 streamP->str_error = errno;
776 retval = -1;
777 }
778 }
779 streamP->str_fdoff = offset;
780 streamP->str_bufoff = 0;
781 streamP->str_buflen = 0;
782 streamP->str_eof = 0;
783 streamP->str_direction = STREAM_DIRECTION_NONE;
784 return retval;
785 }
786
787 /* fflush for buffered I/O handles */
788 int
789 stream_flush(StreamHandle_t * streamP)
790 {
791 ssize_t rc;
792 int retval = 0;
793
794 if (streamP->str_direction == STREAM_DIRECTION_WRITE
795 && streamP->str_bufoff > 0) {
796 rc = OS_PWRITE(streamP->str_fd, streamP->str_buffer,
797 streamP->str_bufoff, streamP->str_fdoff);
798 if (rc < 0) {
799 streamP->str_error = errno;
800 retval = -1;
801 } else {
802 streamP->str_fdoff += rc;
803 }
804 streamP->str_bufoff = 0;
805 streamP->str_buflen = STREAM_HANDLE_BUFSIZE;
806 }
807
808 return retval;
809 }
810
811 /* Free a buffered I/O handle */
812 int
813 stream_close(StreamHandle_t * streamP, int reallyClose)
814 {
815 ssize_t rc;
816 int retval = 0;
817
818 opr_Assert(streamP != NULL);
819 if (streamP->str_direction == STREAM_DIRECTION_WRITE
820 && streamP->str_bufoff > 0) {
821 rc = OS_PWRITE(streamP->str_fd, streamP->str_buffer,
822 streamP->str_bufoff, streamP->str_fdoff);
823 if (rc < 0) {
824 retval = -1;
825 } else {
826 streamP->str_fdoff += rc;
827 }
828 }
829 if (reallyClose) {
830 rc = OS_CLOSE(streamP->str_fd);
831 if (rc < 0) {
832 retval = -1;
833 }
834 }
835 streamP->str_fd = INVALID_FD;
836
837 IH_LOCK;
838 DLL_INSERT_TAIL(streamP, streamAvailHead, streamAvailTail,
839 str_next, str_prev);
840 IH_UNLOCK;
841 return retval;
842 }
843
844 /* Close all unused file descriptors associated with the inode
845 * handle. Called with IH_LOCK held. May drop and reacquire
846 * IH_LOCK. Sets the IH_REALLY_CLOSED flag in the inode handle
847 * if it fails to close all file handles.
848 */
849 static int
850 ih_fdclose(IHandle_t * ihP)
851 {
852 int closeCount, closedAll;
853 FdHandle_t *fdP, *head, *tail, *next;
854
855 opr_Assert(ihP->ih_refcnt > 0);
856
857 closedAll = 1;
858 DLL_INIT_LIST(head, tail);
859 ihP->ih_flags &= ~IH_REALLY_CLOSED;
860
861 /*
862 * Remove the file descriptors for this Inode from the LRU queue
863 * and the IHandle queue and put them on a temporary queue so we
864 * can drop the lock before we close the files.
865 */
866 for (fdP = ihP->ih_fdhead; fdP != NULL; fdP = next) {
867 next = fdP->fd_ihnext;
868 opr_Assert(fdP->fd_ih == ihP);
869 opr_Assert(fdP->fd_status == FD_HANDLE_OPEN
870 || fdP->fd_status == FD_HANDLE_INUSE
871 || fdP->fd_status == FD_HANDLE_CLOSING);
872 if (fdP->fd_status == FD_HANDLE_OPEN) {
873 /* Note that FdHandle_t's do not count against the parent
874 * IHandle_t ref count when they are FD_HANDLE_OPEN. So, we don't
875 * need to dec the parent IHandle_t ref count for each one we pull
876 * off here. */
877 DLL_DELETE(fdP, ihP->ih_fdhead, ihP->ih_fdtail, fd_ihnext,
878 fd_ihprev);
879 DLL_DELETE(fdP, fdLruHead, fdLruTail, fd_next, fd_prev);
880 DLL_INSERT_TAIL(fdP, head, tail, fd_next, fd_prev);
881 } else {
882 closedAll = 0;
883 fdP->fd_status = FD_HANDLE_CLOSING;
884 ihP->ih_flags |= IH_REALLY_CLOSED;
885 }
886 }
887
888 /* If the ihandle reference count is 1, we should have
889 * closed all file descriptors.
890 */
891 if (ihP->ih_refcnt == 1 || closedAll) {
892 opr_Assert(closedAll);
893 opr_Assert(!ihP->ih_fdhead);
894 opr_Assert(!ihP->ih_fdtail);
895 }
896
897 if (head == NULL) {
898 return 0; /* No file descriptors closed */
899 }
900
901 IH_UNLOCK;
902 /*
903 * Close the file descriptors
904 */
905 closeCount = 0;
906 for (fdP = head; fdP != NULL; fdP = fdP->fd_next) {
907 OS_CLOSE(fdP->fd_fd);
908 fdP->fd_status = FD_HANDLE_AVAIL;
909 fdP->fd_refcnt = 0;
910 fdP->fd_fd = INVALID_FD;
911 fdP->fd_ih = NULL;
912 closeCount++;
913 }
914
915 IH_LOCK;
916 opr_Assert(fdInUseCount >= closeCount);
917 fdInUseCount -= closeCount;
918
919 /*
920 * Append the temporary queue to the list of available descriptors
921 */
922 if (fdAvailHead == NULL) {
923 fdAvailHead = head;
924 fdAvailTail = tail;
925 } else {
926 fdAvailTail->fd_next = head;
927 head->fd_prev = fdAvailTail;
928 fdAvailTail = tail;
929 }
930
931 return 0;
932 }
933
934 /* Close all cached file descriptors for this inode. */
935 int
936 ih_reallyclose(IHandle_t * ihP)
937 {
938 if (!ihP)
939 return 0;
940
941 IH_LOCK;
942 ihP->ih_refcnt++; /* must not disappear over unlock */
943 if (ihP->ih_synced) {
944 FdHandle_t *fdP;
945 opr_Assert(vol_io_params.sync_behavior != IH_SYNC_ALWAYS);
946 opr_Assert(vol_io_params.sync_behavior != IH_SYNC_NEVER);
947 ihP->ih_synced = 0;
948 IH_UNLOCK;
949
950 fdP = IH_OPEN(ihP);
951 if (fdP) {
952 OS_SYNC(fdP->fd_fd);
953 FDH_CLOSE(fdP);
954 }
955
956 IH_LOCK;
957 }
958
959 opr_Assert(ihP->ih_refcnt > 0);
960
961 ih_fdclose(ihP);
962
963 if (ihP->ih_refcnt > 1)
964 ihP->ih_refcnt--;
965 else
966 _ih_release_r(ihP);
967
968 IH_UNLOCK;
969 return 0;
970 }
971
972 /* Release an Inode handle. All cached file descriptors for this
973 * inode are closed when the last reference to this handle is released
974 */
975 static int
976 _ih_release_r(IHandle_t * ihP)
977 {
978 int ihash;
979
980 if (!ihP)
981 return 0;
982
983 opr_Assert(ihP->ih_refcnt > 0);
984
985 if (ihP->ih_refcnt > 1) {
986 ihP->ih_refcnt--;
987 return 0;
988 }
989
990 ihash = IH_HASH(ihP->ih_dev, ihP->ih_vid, ihP->ih_ino);
991 DLL_DELETE(ihP, ihashTable[ihash].ihash_head,
992 ihashTable[ihash].ihash_tail, ih_next, ih_prev);
993
994 ih_fdclose(ihP);
995
996 ihP->ih_refcnt--;
997
998 DLL_INSERT_TAIL(ihP, ihAvailHead, ihAvailTail, ih_next, ih_prev);
999
1000 return 0;
1001 }
1002
1003 /* Release an Inode handle. All cached file descriptors for this
1004 * inode are closed when the last reference to this handle is released
1005 */
1006 int
1007 ih_release(IHandle_t * ihP)
1008 {
1009 int ret;
1010
1011 if (!ihP)
1012 return 0;
1013
1014 IH_LOCK;
1015 ret = _ih_release_r(ihP);
1016 IH_UNLOCK;
1017 return ret;
1018 }
1019
1020 /* Sync an inode to disk if its handle isn't NULL */
1021 int
1022 ih_condsync(IHandle_t * ihP)
1023 {
1024 int code;
1025 FdHandle_t *fdP;
1026
1027 if (!ihP)
1028 return 0;
1029
1030 fdP = IH_OPEN(ihP);
1031 if (fdP == NULL)
1032 return -1;
1033
1034 code = FDH_SYNC(fdP);
1035 FDH_CLOSE(fdP);
1036
1037 return code;
1038 }
1039
1040 /*************************************************************************
1041 * OS specific support routines.
1042 *************************************************************************/
1043 #ifndef AFS_NAMEI_ENV
1044 Inode
1045 ih_icreate(IHandle_t * ih, int dev, char *part, Inode nI, int p1, int p2,
1046 int p3, int p4)
1047 {
1048 Inode ino;
1049 #ifdef AFS_3DISPARES
1050 /* See viceinode.h */
1051 if (p2 == INODESPECIAL) {
1052 int tp = p3;
1053 p3 = p4;
1054 p4 = tp;
1055 }
1056 #endif
1057 ino = ICREATE(dev, part, nI, p1, p2, p3, p4);
1058 return ino;
1059 }
1060 #endif /* AFS_NAMEI_ENV */
1061
1062 #if defined(AFS_NT40_ENV) || !defined(AFS_NAMEI_ENV)
1063 /* Unix namei implements its own more efficient IH_CREATE_INIT; this wrapper
1064 * is for everyone else */
1065 IHandle_t *
1066 ih_icreate_init(IHandle_t *lh, int dev, char *part, Inode nearInode,
1067 afs_uint32 p1, afs_uint32 p2, afs_uint32 p3, afs_uint32 p4)
1068 {
1069 IHandle_t *ihP;
1070 Inode ino = IH_CREATE(lh, dev, part, nearInode, p1, p2, p3, p4);
1071 if (!VALID_INO(ino)) {
1072 return NULL;
1073 }
1074 IH_INIT(ihP, dev, p1, ino);
1075 return ihP;
1076 }
1077 #endif
1078
1079 afs_sfsize_t
1080 ih_size(FD_t fd)
1081 {
1082 #ifdef AFS_NT40_ENV
1083 LARGE_INTEGER size;
1084 if (!GetFileSizeEx(fd, &size))
1085 return -1;
1086 return size.QuadPart;
1087 #else
1088 struct afs_stat_st status;
1089 if (afs_fstat(fd, &status) < 0)
1090 return -1;
1091 return status.st_size;
1092 #endif
1093 }
1094
1095 #ifndef AFS_IHANDLE_PIO_ENV
1096 ssize_t
1097 ih_pread(int fd, void * buf, size_t count, afs_foff_t offset)
1098 {
1099 afs_foff_t code;
1100 code = OS_SEEK(fd, offset, 0);
1101 if (code < 0)
1102 return code;
1103 return OS_READ(fd, buf, count);
1104 }
1105
1106 ssize_t
1107 ih_pwrite(int fd, const void * buf, size_t count, afs_foff_t offset)
1108 {
1109 afs_foff_t code;
1110 code = OS_SEEK(fd, offset, 0);
1111 if (code < 0)
1112 return code;
1113 return OS_WRITE(fd, buf, count);
1114 }
1115 #endif /* !AFS_IHANDLE_PIO_ENV */
1116
1117 #ifndef AFS_NT40_ENV
1118 int
1119 ih_isunlinked(int fd)
1120 {
1121 struct afs_stat_st status;
1122 if (afs_fstat(fd, &status) < 0) {
1123 return -1;
1124 }
1125 if (status.st_nlink < 1) {
1126 return 1;
1127 }
1128 return 0;
1129 }
1130 #endif /* !AFS_NT40_ENV */
1131
1132 int
1133 ih_fdsync(FdHandle_t *fdP)
1134 {
1135 switch (vol_io_params.sync_behavior) {
1136 case IH_SYNC_ALWAYS:
1137 return OS_SYNC(fdP->fd_fd);
1138 case IH_SYNC_ONCLOSE:
1139 if (fdP->fd_ih) {
1140 fdP->fd_ih->ih_synced = 1;
1141 return 0;
1142 }
1143 return 1;
1144 case IH_SYNC_NEVER:
1145 return 0;
1146 default:
1147 opr_Assert(0);
1148 }
1149 }