Import Upstream version 1.8.5
[hcoop/debian/openafs.git] / src / vol / vnode.c
1 /*
2 * Copyright 2000, International Business Machines Corporation and others.
3 * All Rights Reserved.
4 *
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
8 *
9 * Portions Copyright (c) 2005-2008 Sine Nomine Associates
10 */
11
12 /*
13 System: VICE-TWO
14 Module: vnode.c
15 Institution: The Information Technology Center, Carnegie-Mellon University
16
17 */
18 #include <afsconfig.h>
19 #include <afs/param.h>
20
21 #include <roken.h>
22
23 #include <limits.h>
24
25 #ifdef HAVE_SYS_FILE_H
26 #include <sys/file.h>
27 #endif
28
29 #include <afs/opr.h>
30 #ifdef AFS_PTHREAD_ENV
31 #include <opr/lock.h>
32 #endif
33 #include <opr/jhash.h>
34 #include "rx/rx_queue.h"
35 #include <afs/afsint.h>
36 #include "nfs.h"
37 #include <afs/errors.h>
38 #include "lock.h"
39 #include "lwp.h"
40 #include <afs/afssyscalls.h>
41 #include "ihandle.h"
42 #include "vnode.h"
43 #include "volume.h"
44 #include "volume_inline.h"
45 #include "vnode_inline.h"
46 #include "partition.h"
47 #include "salvsync.h"
48 #include "common.h"
49 #ifdef AFS_NT40_ENV
50 #include "ntops.h"
51 #endif
52
53 struct VnodeClassInfo VnodeClassInfo[nVNODECLASSES];
54
55 void VNLog(afs_int32 aop, afs_int32 anparms, ... );
56
57 /* logging stuff for finding bugs */
58 #define THELOGSIZE 5120
59 static afs_int32 theLog[THELOGSIZE];
60 static afs_int32 vnLogPtr = 0;
61 void
62 VNLog(afs_int32 aop, afs_int32 anparms, ... )
63 {
64 afs_int32 temp;
65 va_list ap;
66
67 va_start(ap, anparms);
68
69 if (anparms > 4)
70 anparms = 4; /* do bounds checking */
71
72 temp = (aop << 16) | anparms;
73 theLog[vnLogPtr++] = temp;
74 if (vnLogPtr >= THELOGSIZE)
75 vnLogPtr = 0;
76 for (temp = 0; temp < anparms; temp++) {
77 theLog[vnLogPtr++] = va_arg(ap, afs_int32);
78 if (vnLogPtr >= THELOGSIZE)
79 vnLogPtr = 0;
80 }
81 va_end(ap);
82 }
83
84
85 /* Vnode hash table. Just use the Jenkins hash of the vnode number,
86 * with the volume ID as an initval because it's there. (That will
87 * make the same vnode number in different volumes hash to a different
88 * value, which would probably not even be a big deal anyway.)
89 */
90
91 #define VNODE_HASH_TABLE_BITS 11
92 #define VNODE_HASH_TABLE_SIZE opr_jhash_size(VNODE_HASH_TABLE_BITS)
93 #define VNODE_HASH_TABLE_MASK opr_jhash_mask(VNODE_HASH_TABLE_BITS)
94 private Vnode *VnodeHashTable[VNODE_HASH_TABLE_SIZE];
95 #define VNODE_HASH(volumeptr,vnodenumber)\
96 (opr_jhash_int((vnodenumber), V_id((volumeptr))) & VNODE_HASH_TABLE_MASK)
97
98
99
100 #define BAD_IGET -1000
101
102 /* There are two separate vnode queue types defined here:
103 * Each hash conflict chain -- is singly linked, with a single head
104 * pointer. New entries are added at the beginning. Old
105 * entries are removed by linear search, which generally
106 * only occurs after a disk read).
107 * LRU chain -- is doubly linked, single head pointer.
108 * Entries are added at the head, reclaimed from the tail,
109 * or removed from anywhere in the queue.
110 */
111
112 /**
113 * add a vnode to the volume's vnode list.
114 *
115 * @param[in] vp volume object pointer
116 * @param[in] vnp vnode object pointer
117 *
118 * @note for DAFS, it may seem like we should be acquiring a lightweight ref
119 * on vp, but this would actually break things. Right now, this is ok
120 * because we destroy all vnode cache contents during during volume
121 * detach.
122 *
123 * @pre VOL_LOCK held
124 *
125 * @internal volume package internal use only
126 */
127 void
128 AddToVVnList(Volume * vp, Vnode * vnp)
129 {
130 if (queue_IsOnQueue(vnp))
131 return;
132
133 Vn_volume(vnp) = vp;
134 Vn_cacheCheck(vnp) = vp->cacheCheck;
135 queue_Append(&vp->vnode_list, vnp);
136 Vn_stateFlags(vnp) |= VN_ON_VVN;
137 }
138
139 /**
140 * delete a vnode from the volume's vnode list.
141 *
142 * @pre VOL_LOCK held
143 *
144 * @internal volume package internal use only
145 */
146 void
147 DeleteFromVVnList(Vnode * vnp)
148 {
149 Vn_volume(vnp) = NULL;
150
151 if (!queue_IsOnQueue(vnp))
152 return;
153
154 queue_Remove(vnp);
155 Vn_stateFlags(vnp) &= ~(VN_ON_VVN);
156 }
157
158 /**
159 * add a vnode to the end of the lru.
160 *
161 * @param[in] vcp vnode class info object pointer
162 * @param[in] vnp vnode object pointer
163 *
164 * @internal vnode package internal use only
165 */
166 void
167 AddToVnLRU(struct VnodeClassInfo * vcp, Vnode * vnp)
168 {
169 if (Vn_stateFlags(vnp) & VN_ON_LRU) {
170 return;
171 }
172
173 /* Add it to the circular LRU list */
174 if (vcp->lruHead == NULL)
175 Abort("VPutVnode: vcp->lruHead==NULL");
176 else {
177 vnp->lruNext = vcp->lruHead;
178 vnp->lruPrev = vcp->lruHead->lruPrev;
179 vcp->lruHead->lruPrev = vnp;
180 vnp->lruPrev->lruNext = vnp;
181 vcp->lruHead = vnp;
182 }
183
184 /* If the vnode was just deleted, put it at the end of the chain so it
185 * will be reused immediately */
186 if (vnp->delete)
187 vcp->lruHead = vnp->lruNext;
188
189 Vn_stateFlags(vnp) |= VN_ON_LRU;
190 }
191
192 /**
193 * delete a vnode from the lru.
194 *
195 * @param[in] vcp vnode class info object pointer
196 * @param[in] vnp vnode object pointer
197 *
198 * @internal vnode package internal use only
199 */
200 void
201 DeleteFromVnLRU(struct VnodeClassInfo * vcp, Vnode * vnp)
202 {
203 if (!(Vn_stateFlags(vnp) & VN_ON_LRU)) {
204 return;
205 }
206
207 if (vnp == vcp->lruHead)
208 vcp->lruHead = vcp->lruHead->lruNext;
209
210 if ((vnp == vcp->lruHead) ||
211 (vcp->lruHead == NULL))
212 Abort("DeleteFromVnLRU: lru chain addled!\n");
213
214 vnp->lruPrev->lruNext = vnp->lruNext;
215 vnp->lruNext->lruPrev = vnp->lruPrev;
216
217 Vn_stateFlags(vnp) &= ~(VN_ON_LRU);
218 }
219
220 /**
221 * add a vnode to the vnode hash table.
222 *
223 * @param[in] vnp vnode object pointer
224 *
225 * @pre VOL_LOCK held
226 *
227 * @post vnode on hash
228 *
229 * @internal vnode package internal use only
230 */
231 void
232 AddToVnHash(Vnode * vnp)
233 {
234 unsigned int newHash;
235
236 if (!(Vn_stateFlags(vnp) & VN_ON_HASH)) {
237 newHash = VNODE_HASH(Vn_volume(vnp), Vn_id(vnp));
238 vnp->hashNext = VnodeHashTable[newHash];
239 VnodeHashTable[newHash] = vnp;
240 vnp->hashIndex = newHash;
241
242 Vn_stateFlags(vnp) |= VN_ON_HASH;
243 }
244 }
245
246 /**
247 * delete a vnode from the vnode hash table.
248 *
249 * @param[in] vnp
250 * @param[in] hash
251 *
252 * @pre VOL_LOCK held
253 *
254 * @post vnode removed from hash
255 *
256 * @internal vnode package internal use only
257 */
258 void
259 DeleteFromVnHash(Vnode * vnp)
260 {
261 Vnode * tvnp;
262
263 if (Vn_stateFlags(vnp) & VN_ON_HASH) {
264 tvnp = VnodeHashTable[vnp->hashIndex];
265 if (tvnp == vnp)
266 VnodeHashTable[vnp->hashIndex] = vnp->hashNext;
267 else {
268 while (tvnp && tvnp->hashNext != vnp)
269 tvnp = tvnp->hashNext;
270 if (tvnp)
271 tvnp->hashNext = vnp->hashNext;
272 }
273
274 vnp->hashNext = NULL;
275 vnp->hashIndex = 0;
276 Vn_stateFlags(vnp) &= ~(VN_ON_HASH);
277 }
278 }
279
280
281 /**
282 * invalidate a vnode cache entry.
283 *
284 * @param[in] avnode vnode object pointer
285 *
286 * @pre VOL_LOCK held
287 *
288 * @post vnode metadata invalidated.
289 * vnode removed from hash table.
290 * DAFS: vnode state set to VN_STATE_INVALID.
291 *
292 * @internal vnode package internal use only
293 */
294 void
295 VInvalidateVnode_r(struct Vnode *avnode)
296 {
297 avnode->changed_newTime = 0; /* don't let it get flushed out again */
298 avnode->changed_oldTime = 0;
299 avnode->delete = 0; /* it isn't deleted, really */
300 avnode->cacheCheck = 0; /* invalid: prevents future vnode searches from working */
301 DeleteFromVnHash(avnode);
302 #ifdef AFS_DEMAND_ATTACH_FS
303 VnChangeState_r(avnode, VN_STATE_INVALID);
304 #endif
305 }
306
307
308 /**
309 * initialize vnode cache for a given vnode class.
310 *
311 * @param[in] class vnode class
312 * @param[in] nVnodes size of cache
313 *
314 * @post vnode cache allocated and initialized
315 *
316 * @internal volume package internal use only
317 *
318 * @note generally called by VInitVolumePackage_r
319 *
320 * @see VInitVolumePackage_r
321 */
322 int
323 VInitVnodes(VnodeClass class, int nVnodes)
324 {
325 byte *va;
326 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
327
328 vcp->allocs = vcp->gets = vcp->reads = vcp->writes = 0;
329 vcp->cacheSize = nVnodes;
330 switch (class) {
331 case vSmall:
332 opr_Assert(CHECKSIZE_SMALLVNODE);
333 vcp->lruHead = NULL;
334 vcp->residentSize = SIZEOF_SMALLVNODE;
335 vcp->diskSize = SIZEOF_SMALLDISKVNODE;
336 vcp->magic = SMALLVNODEMAGIC;
337 break;
338 case vLarge:
339 vcp->lruHead = NULL;
340 vcp->residentSize = SIZEOF_LARGEVNODE;
341 vcp->diskSize = SIZEOF_LARGEDISKVNODE;
342 vcp->magic = LARGEVNODEMAGIC;
343 break;
344 }
345 {
346 int s = vcp->diskSize - 1;
347 int n = 0;
348 while (s)
349 s >>= 1, n++;
350 vcp->logSize = n;
351 }
352
353 if (nVnodes == 0)
354 return 0;
355
356 va = (byte *) calloc(nVnodes, vcp->residentSize);
357 opr_Assert(va != NULL);
358 while (nVnodes--) {
359 Vnode *vnp = (Vnode *) va;
360 Vn_refcount(vnp) = 0; /* no context switches */
361 Vn_stateFlags(vnp) |= VN_ON_LRU;
362 #ifdef AFS_DEMAND_ATTACH_FS
363 CV_INIT(&Vn_stateCV(vnp), "vnode state", CV_DEFAULT, 0);
364 Vn_state(vnp) = VN_STATE_INVALID;
365 Vn_readers(vnp) = 0;
366 #else /* !AFS_DEMAND_ATTACH_FS */
367 Lock_Init(&vnp->lock);
368 #endif /* !AFS_DEMAND_ATTACH_FS */
369 vnp->changed_oldTime = 0;
370 vnp->changed_newTime = 0;
371 Vn_volume(vnp) = NULL;
372 Vn_cacheCheck(vnp) = 0;
373 vnp->delete = Vn_id(vnp) = 0;
374 #ifdef AFS_PTHREAD_ENV
375 vnp->writer = (pthread_t) 0;
376 #else /* AFS_PTHREAD_ENV */
377 vnp->writer = (PROCESS) 0;
378 #endif /* AFS_PTHREAD_ENV */
379 vnp->hashIndex = 0;
380 vnp->handle = NULL;
381 Vn_class(vnp) = vcp;
382 if (vcp->lruHead == NULL)
383 vcp->lruHead = vnp->lruNext = vnp->lruPrev = vnp;
384 else {
385 vnp->lruNext = vcp->lruHead;
386 vnp->lruPrev = vcp->lruHead->lruPrev;
387 vcp->lruHead->lruPrev = vnp;
388 vnp->lruPrev->lruNext = vnp;
389 vcp->lruHead = vnp;
390 }
391 va += vcp->residentSize;
392 }
393 return 0;
394 }
395
396
397 /**
398 * allocate an unused vnode from the lru chain.
399 *
400 * @param[in] vcp vnode class info object pointer
401 * @param[in] vp volume pointer
402 * @param[in] vnodeNumber new vnode number that the vnode will be used for
403 *
404 * @pre VOL_LOCK is held
405 *
406 * @post vnode object is removed from lru
407 * vnode is disassociated with its old volume, and associated with its
408 * new volume
409 * vnode is removed from its old vnode hash table, and for DAFS, it is
410 * added to its new hash table
411 * state is set to VN_STATE_INVALID.
412 * inode handle is released.
413 * a reservation is held on the vnode object
414 *
415 * @note we traverse backwards along the lru circlist. It shouldn't
416 * be necessary to specify that nUsers == 0 since if it is in the list,
417 * nUsers should be 0. Things shouldn't be in lruq unless no one is
418 * using them.
419 *
420 * @warning DAFS: VOL_LOCK is dropped while doing inode handle release
421 *
422 * @warning for non-DAFS, the vnode is _not_ hashed on the vnode hash table;
423 * non-DAFS must hash the vnode itself after loading data
424 *
425 * @return vnode object pointer
426 */
427 Vnode *
428 VGetFreeVnode_r(struct VnodeClassInfo * vcp, struct Volume *vp,
429 VnodeId vnodeNumber)
430 {
431 Vnode *vnp;
432
433 vnp = vcp->lruHead->lruPrev;
434 #ifdef AFS_DEMAND_ATTACH_FS
435 if (Vn_refcount(vnp) != 0 || VnIsExclusiveState(Vn_state(vnp)) ||
436 Vn_readers(vnp) != 0)
437 Abort("VGetFreeVnode_r: in-use vnode in lruq");
438 #else
439 if (Vn_refcount(vnp) != 0 || CheckLock(&vnp->lock))
440 Abort("VGetFreeVnode_r: locked vnode in lruq");
441 #endif
442 VNLog(1, 2, Vn_id(vnp), (intptr_t)vnp, 0, 0);
443
444 /*
445 * it's going to be overwritten soon enough.
446 * remove from LRU, delete hash entry, and
447 * disassociate from old parent volume before
448 * we have a chance to drop the vol glock
449 */
450 DeleteFromVnLRU(vcp, vnp);
451 DeleteFromVnHash(vnp);
452 if (Vn_volume(vnp)) {
453 DeleteFromVVnList(vnp);
454 }
455
456 /* we must re-hash the vnp _before_ we drop the glock again; otherwise,
457 * someone else might try to grab the same vnode id, and we'll both alloc
458 * a vnode object for the same vn id, bypassing vnode locking */
459 Vn_id(vnp) = vnodeNumber;
460 VnCreateReservation_r(vnp);
461 AddToVVnList(vp, vnp);
462 #ifdef AFS_DEMAND_ATTACH_FS
463 AddToVnHash(vnp);
464 #endif
465
466 /* drop the file descriptor */
467 if (vnp->handle) {
468 #ifdef AFS_DEMAND_ATTACH_FS
469 VnChangeState_r(vnp, VN_STATE_RELEASING);
470 VOL_UNLOCK;
471 #endif
472 /* release is, potentially, a highly latent operation due to a couple
473 * factors:
474 * - ihandle package lock contention
475 * - closing file descriptor(s) associated with ih
476 *
477 * Hance, we perform outside of the volume package lock in order to
478 * reduce the probability of contention.
479 */
480 IH_RELEASE(vnp->handle);
481 #ifdef AFS_DEMAND_ATTACH_FS
482 VOL_LOCK;
483 #endif
484 }
485
486 #ifdef AFS_DEMAND_ATTACH_FS
487 VnChangeState_r(vnp, VN_STATE_INVALID);
488 #endif
489
490 return vnp;
491 }
492
493
494 /**
495 * lookup a vnode in the vnode cache hash table.
496 *
497 * @param[in] vp pointer to volume object
498 * @param[in] vnodeId vnode id
499 *
500 * @pre VOL_LOCK held
501 *
502 * @post matching vnode object or NULL is returned
503 *
504 * @return vnode object pointer
505 * @retval NULL no matching vnode object was found in the cache
506 *
507 * @internal vnode package internal use only
508 *
509 * @note this symbol is exported strictly for fssync debug protocol use
510 */
511 Vnode *
512 VLookupVnode(Volume * vp, VnodeId vnodeId)
513 {
514 Vnode * vnp;
515 unsigned int newHash;
516
517 newHash = VNODE_HASH(vp, vnodeId);
518 for (vnp = VnodeHashTable[newHash];
519 (vnp &&
520 ((Vn_id(vnp) != vnodeId) ||
521 (Vn_volume(vnp) != vp) ||
522 (vp->cacheCheck != Vn_cacheCheck(vnp))));
523 vnp = vnp->hashNext);
524
525 return vnp;
526 }
527
528
529 Vnode *
530 VAllocVnode(Error * ec, Volume * vp, VnodeType type, VnodeId in_vnode, Unique in_unique)
531 {
532 Vnode *retVal;
533 VOL_LOCK;
534 retVal = VAllocVnode_r(ec, vp, type, in_vnode, in_unique);
535 VOL_UNLOCK;
536 return retVal;
537 }
538
539 /**
540 * allocate a new vnode.
541 *
542 * @param[out] ec error code return
543 * @param[in] vp volume object pointer
544 * @param[in] type desired vnode type
545 * @param[in] type desired vnode ID (optional)
546 * @param[in] type desired vnode Unique (optional)
547 *
548 * @return vnode object pointer
549 *
550 * @pre VOL_LOCK held;
551 * heavyweight ref held on vp
552 *
553 * @post vnode allocated and returned
554 */
555 Vnode *
556 VAllocVnode_r(Error * ec, Volume * vp, VnodeType type, VnodeId in_vnode, Unique in_unique)
557 {
558 Vnode *vnp;
559 VnodeId vnodeNumber;
560 int bitNumber;
561 struct VnodeClassInfo *vcp;
562 VnodeClass class;
563 Unique unique;
564 struct vnodeIndex *index;
565 unsigned int offset;
566
567 #ifdef AFS_DEMAND_ATTACH_FS
568 VolState vol_state_save;
569 #endif
570
571 *ec = 0;
572
573 #ifdef AFS_DEMAND_ATTACH_FS
574 /*
575 * once a volume has entered an error state, don't permit
576 * further operations to proceed
577 * -- tkeiser 11/21/2007
578 */
579 VWaitExclusiveState_r(vp);
580 if (VIsErrorState(V_attachState(vp))) {
581 /* XXX is VSALVAGING acceptable here? */
582 *ec = DAFS_VSALVAGE;
583 return NULL;
584 }
585 #endif
586
587 if (programType == fileServer && !V_inUse(vp)) {
588 if (vp->specialStatus) {
589 *ec = vp->specialStatus;
590 } else {
591 *ec = VOFFLINE;
592 }
593 return NULL;
594 }
595 class = vnodeTypeToClass(type);
596 vcp = &VnodeClassInfo[class];
597
598 if (!VolumeWriteable(vp)) {
599 *ec = (bit32) VREADONLY;
600 return NULL;
601 }
602
603 if (vp->nextVnodeUnique > V_uniquifier(vp)) {
604 VUpdateVolume_r(ec, vp, 0);
605 if (*ec)
606 return NULL;
607 }
608
609 if (programType == fileServer) {
610 VAddToVolumeUpdateList_r(ec, vp);
611 if (*ec)
612 return NULL;
613 }
614
615 /*
616 * If in_vnode and in_unique are specified, we are asked to
617 * allocate a specifc vnode slot. Used by RW replication to
618 * keep vnode IDs consistent with the master.
619 */
620
621 if (!in_vnode) {
622 int rollover = 0;
623
624 unique = vp->nextVnodeUnique++;
625 if (unique == 0) {
626 rollover = 1; /* nextVnodeUnique rolled over */
627 vp->nextVnodeUnique = 2; /* 1 is reserved for the root vnode */
628 unique = vp->nextVnodeUnique++;
629 }
630
631 if (vp->nextVnodeUnique > V_uniquifier(vp) || rollover) {
632 VUpdateVolume_r(ec, vp, 0);
633 if (*ec)
634 return NULL;
635 }
636
637 /* Find a slot in the bit map */
638 bitNumber = VAllocBitmapEntry_r(ec, vp, &vp->vnodeIndex[class],
639 VOL_ALLOC_BITMAP_WAIT);
640
641 if (*ec)
642 return NULL;
643 vnodeNumber = bitNumberToVnodeNumber(bitNumber, class);
644 } else {
645 index = &vp->vnodeIndex[class];
646 if (!in_unique) {
647 *ec = VNOVNODE;
648 return NULL;
649 }
650 /* Catch us up to where the master is */
651 if (in_unique > vp->nextVnodeUnique)
652 vp->nextVnodeUnique = in_unique+1;
653
654 if (vp->nextVnodeUnique > V_uniquifier(vp)) {
655 VUpdateVolume_r(ec, vp, 0);
656 if (*ec)
657 return NULL;
658 }
659
660 unique = in_unique;
661 bitNumber = vnodeIdToBitNumber(in_vnode);
662 offset = bitNumber >> 3;
663
664 /* Mark vnode in use. Grow bitmap if needed. */
665 if ((offset >= index->bitmapSize)
666 || ((*(index->bitmap + offset) & (1 << (bitNumber & 0x7))) == 0))
667 VGrowBitmap(index);
668 /* Should not happen */
669 if (*(index->bitmap + offset) & (1 << (bitNumber & 0x7))) {
670 *ec = VNOVNODE;
671 return NULL;
672 }
673
674 *(index->bitmap + offset) |= (1 << (bitNumber & 0x7));
675 vnodeNumber = in_vnode;
676 }
677
678 /*
679 * DAFS:
680 * at this point we should be assured that V_attachState(vp) is non-exclusive
681 */
682
683 vnrehash:
684 VNLog(2, 1, vnodeNumber, 0, 0, 0);
685 /* Prepare to move it to the new hash chain */
686 vnp = VLookupVnode(vp, vnodeNumber);
687 if (vnp) {
688 /* slot already exists. May even not be in lruq (consider store file locking a file being deleted)
689 * so we may have to wait for it below */
690 VNLog(3, 2, vnodeNumber, (intptr_t)vnp, 0, 0);
691
692 VnCreateReservation_r(vnp);
693 if (Vn_refcount(vnp) == 1) {
694 /* we're the only user */
695 /* This won't block */
696 VnLock(vnp, WRITE_LOCK, VOL_LOCK_HELD, WILL_NOT_DEADLOCK);
697 } else {
698 #ifdef AFS_DEMAND_ATTACH_FS
699 /*
700 * DAFS:
701 * vnode was cached, wait for any existing exclusive ops to finish.
702 * once we have reacquired the lock, re-verify volume state.
703 *
704 * note: any vnode error state is related to the old vnode; disregard.
705 */
706 VnWaitQuiescent_r(vnp);
707 if (VIsErrorState(V_attachState(vp))) {
708 VnUnlock(vnp, WRITE_LOCK);
709 VnCancelReservation_r(vnp);
710 *ec = DAFS_VSALVAGE;
711 return NULL;
712 }
713 #endif
714
715 /* other users present; follow locking hierarchy */
716 VnLock(vnp, WRITE_LOCK, VOL_LOCK_HELD, MIGHT_DEADLOCK);
717
718 /*
719 * verify state of the world hasn't changed
720 *
721 * (technically, this should never happen because cachecheck
722 * is only updated during a volume attach, which should not
723 * happen when refs are held)
724 */
725 if (Vn_volume(vnp)->cacheCheck != Vn_cacheCheck(vnp)) {
726 VnUnlock(vnp, WRITE_LOCK);
727 VnCancelReservation_r(vnp);
728 goto vnrehash;
729 }
730 }
731
732 /* sanity check: vnode should be blank if it was deleted. If it's
733 * not blank, it is still in use somewhere; but the bitmap told us
734 * this vnode number was free, so something is wrong. */
735 if (vnp->disk.type != vNull) {
736 Error tmp;
737 Log("VAllocVnode: addled bitmap or vnode object! (vol %" AFS_VOLID_FMT ", "
738 "vnode %p, number %ld, type %ld)\n", afs_printable_VolumeId_lu(vp->hashid), vnp,
739 (long)Vn_id(vnp), (long)vnp->disk.type);
740 *ec = EIO;
741 VFreeBitMapEntry_r(&tmp, vp, &vp->vnodeIndex[class], bitNumber,
742 VOL_FREE_BITMAP_WAIT);
743 VInvalidateVnode_r(vnp);
744 VnUnlock(vnp, WRITE_LOCK);
745 VnCancelReservation_r(vnp);
746 #ifdef AFS_DEMAND_ATTACH_FS
747 VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, 0);
748 #else
749 VForceOffline_r(vp, 0);
750 #endif
751 return NULL;
752 }
753
754 } else {
755 /* no such vnode in the cache */
756
757 vnp = VGetFreeVnode_r(vcp, vp, vnodeNumber);
758
759 /* This will never block (guaranteed by check in VGetFreeVnode_r() */
760 VnLock(vnp, WRITE_LOCK, VOL_LOCK_HELD, WILL_NOT_DEADLOCK);
761
762 #ifdef AFS_DEMAND_ATTACH_FS
763 VnChangeState_r(vnp, VN_STATE_ALLOC);
764 #endif
765
766 /* Sanity check: is this vnode really not in use? */
767 {
768 afs_sfsize_t size;
769 IHandle_t *ihP = vp->vnodeIndex[class].handle;
770 FdHandle_t *fdP;
771 afs_foff_t off = vnodeIndexOffset(vcp, vnodeNumber);
772 Error tmp;
773
774 /* XXX we have a potential race here if two threads
775 * allocate new vnodes at the same time, and they
776 * both decide it's time to extend the index
777 * file size...
778 */
779 #ifdef AFS_DEMAND_ATTACH_FS
780 /*
781 * this race has been eliminated for the DAFS case
782 * using exclusive state VOL_STATE_VNODE_ALLOC
783 *
784 * if this becomes a bottleneck, there are ways to
785 * improve parallelism for this code path
786 * -- tkeiser 11/28/2007
787 */
788 VCreateReservation_r(vp);
789 VWaitExclusiveState_r(vp);
790 vol_state_save = VChangeState_r(vp, VOL_STATE_VNODE_ALLOC);
791 #endif
792
793 VOL_UNLOCK;
794 fdP = IH_OPEN(ihP);
795 if (fdP == NULL) {
796 Log("VAllocVnode: can't open index file!\n");
797 *ec = ENOENT;
798 goto error_encountered;
799 }
800 if ((size = FDH_SIZE(fdP)) < 0) {
801 Log("VAllocVnode: can't stat index file!\n");
802 *ec = EIO;
803 goto error_encountered;
804 }
805 if (off + vcp->diskSize <= size) {
806 if (FDH_PREAD(fdP, &vnp->disk, vcp->diskSize, off) != vcp->diskSize) {
807 Log("VAllocVnode: can't read index file!\n");
808 *ec = EIO;
809 goto error_encountered;
810 }
811 if (vnp->disk.type != vNull) {
812 Log("VAllocVnode: addled bitmap or index!\n");
813 *ec = EIO;
814 goto error_encountered;
815 }
816 } else {
817 /* growing file - grow in a reasonable increment */
818 char *buf = malloc(16 * 1024);
819 if (!buf) {
820 Log("VAllocVnode: can't grow vnode index: out of memory\n");
821 *ec = ENOMEM;
822 goto error_encountered;
823 }
824 memset(buf, 0, 16 * 1024);
825 if ((FDH_PWRITE(fdP, buf, 16 * 1024, off)) != 16 * 1024) {
826 Log("VAllocVnode: can't grow vnode index: write failed\n");
827 *ec = EIO;
828 free(buf);
829 goto error_encountered;
830 }
831 free(buf);
832 }
833 FDH_CLOSE(fdP);
834 VOL_LOCK;
835
836 #ifdef AFS_DEMAND_ATTACH_FS
837 VChangeState_r(vp, vol_state_save);
838 VCancelReservation_r(vp);
839 #endif
840 goto sane;
841
842
843 error_encountered:
844 /*
845 * close the file handle
846 * acquire VOL_LOCK
847 * invalidate the vnode
848 * free up the bitmap entry (although salvager should take care of it)
849 * salvage the volume
850 * drop vnode lock and refs
851 */
852 if (fdP)
853 FDH_CLOSE(fdP);
854 VOL_LOCK;
855 VFreeBitMapEntry_r(&tmp, vp, &vp->vnodeIndex[class], bitNumber, 0 /*flags*/);
856 VInvalidateVnode_r(vnp);
857 VnUnlock(vnp, WRITE_LOCK);
858 VnCancelReservation_r(vnp);
859 #ifdef AFS_DEMAND_ATTACH_FS
860 VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, 0);
861 VCancelReservation_r(vp);
862 #else
863 VForceOffline_r(vp, 0);
864 #endif
865 return NULL;
866 }
867 sane:
868 VNLog(4, 2, vnodeNumber, (intptr_t)vnp, 0, 0);
869 #ifndef AFS_DEMAND_ATTACH_FS
870 AddToVnHash(vnp);
871 #endif
872 }
873
874 VNLog(5, 1, (intptr_t)vnp, 0, 0, 0);
875 memset(&vnp->disk, 0, sizeof(vnp->disk));
876 vnp->changed_newTime = 0; /* set this bit when vnode is updated */
877 vnp->changed_oldTime = 0; /* set this on CopyOnWrite. */
878 vnp->delete = 0;
879 vnp->disk.vnodeMagic = vcp->magic;
880 vnp->disk.type = type;
881 vnp->disk.uniquifier = unique;
882 vnp->handle = NULL;
883 vcp->allocs++;
884 V_filecount(vp)++;
885 #ifdef AFS_DEMAND_ATTACH_FS
886 VnChangeState_r(vnp, VN_STATE_EXCLUSIVE);
887 #endif
888 return vnp;
889 }
890
891 /**
892 * load a vnode from disk.
893 *
894 * @param[out] ec client error code return
895 * @param[in] vp volume object pointer
896 * @param[in] vnp vnode object pointer
897 * @param[in] vcp vnode class info object pointer
898 * @param[in] class vnode class enumeration
899 *
900 * @pre vnode is registered in appropriate data structures;
901 * caller holds a ref on vnode; VOL_LOCK is held
902 *
903 * @post vnode data is loaded from disk.
904 * vnode state is set to VN_STATE_ONLINE.
905 * on failure, vnode is invalidated.
906 *
907 * @internal vnode package internal use only
908 */
909 static void
910 VnLoad(Error * ec, Volume * vp, Vnode * vnp,
911 struct VnodeClassInfo * vcp, VnodeClass class)
912 {
913 /* vnode not cached */
914 Error error;
915 int dosalv = 1;
916 ssize_t nBytes;
917 IHandle_t *ihP = vp->vnodeIndex[class].handle;
918 FdHandle_t *fdP;
919 afs_ino_str_t stmp;
920
921 *ec = 0;
922 vcp->reads++;
923
924 #ifdef AFS_DEMAND_ATTACH_FS
925 VnChangeState_r(vnp, VN_STATE_LOAD);
926 #endif
927
928 /* This will never block */
929 VnLock(vnp, WRITE_LOCK, VOL_LOCK_HELD, WILL_NOT_DEADLOCK);
930
931 VOL_UNLOCK;
932 fdP = IH_OPEN(ihP);
933 if (fdP == NULL) {
934 Log("VnLoad: can't open index dev=%u, i=%s\n", vp->device,
935 PrintInode(stmp, vp->vnodeIndex[class].handle->ih_ino));
936 *ec = VIO;
937 goto error_encountered_nolock;
938 } else if ((nBytes = FDH_PREAD(fdP, (char *)&vnp->disk, vcp->diskSize, vnodeIndexOffset(vcp, Vn_id(vnp))))
939 != vcp->diskSize) {
940 /* Don't take volume off line if the inumber is out of range
941 * or the inode table is full. */
942 if (nBytes == BAD_IGET) {
943 Log("VnLoad: bad inumber %s\n",
944 PrintInode(stmp, vp->vnodeIndex[class].handle->ih_ino));
945 *ec = VIO;
946 dosalv = 0;
947 } else if (nBytes == -1 && errno == EIO) {
948 /* disk error; salvage */
949 Log("VnLoad: Couldn't read vnode %u, volume %" AFS_VOLID_FMT " (%s); volume needs salvage\n", Vn_id(vnp), afs_printable_VolumeId_lu(V_id(vp)), V_name(vp));
950 } else {
951 /* vnode is not allocated */
952 if (GetLogLevel() >= 5)
953 Log("VnLoad: Couldn't read vnode %u, volume %" AFS_VOLID_FMT " (%s); read %d bytes, errno %d\n",
954 Vn_id(vnp), afs_printable_VolumeId_lu(V_id(vp)), V_name(vp), (int)nBytes, errno);
955 *ec = VNOVNODE;
956 dosalv = 0;
957 }
958 goto error_encountered_nolock;
959 }
960 FDH_CLOSE(fdP);
961 VOL_LOCK;
962
963 /* Quick check to see that the data is reasonable */
964 if (vnp->disk.vnodeMagic != vcp->magic || vnp->disk.type == vNull) {
965 if (vnp->disk.type == vNull) {
966 *ec = VNOVNODE;
967 dosalv = 0;
968 } else {
969 struct vnodeIndex *index = &vp->vnodeIndex[class];
970 unsigned int bitNumber = vnodeIdToBitNumber(Vn_id(vnp));
971 unsigned int offset = bitNumber >> 3;
972
973 #ifdef AFS_DEMAND_ATTACH_FS
974 /* Make sure the volume bitmap isn't getting updated while we are
975 * checking it */
976 VWaitExclusiveState_r(vp);
977 #endif
978
979 /* Test to see if vnode number is valid. */
980 if ((offset >= index->bitmapSize)
981 || ((*(index->bitmap + offset) & (1 << (bitNumber & 0x7)))
982 == 0)) {
983 Log("VnLoad: Request for unallocated vnode %u, volume %" AFS_VOLID_FMT " (%s) denied.\n", Vn_id(vnp), afs_printable_VolumeId_lu(V_id(vp)), V_name(vp));
984 *ec = VNOVNODE;
985 dosalv = 0;
986 } else {
987 Log("VnLoad: Bad magic number, vnode %u, volume %" AFS_VOLID_FMT " (%s); volume needs salvage\n", Vn_id(vnp), afs_printable_VolumeId_lu(V_id(vp)), V_name(vp));
988 }
989 }
990 goto error_encountered;
991 }
992
993 IH_INIT(vnp->handle, V_device(vp), afs_printable_VolumeId_lu(V_parentId(vp)), VN_GET_INO(vnp));
994 VnUnlock(vnp, WRITE_LOCK);
995 #ifdef AFS_DEMAND_ATTACH_FS
996 VnChangeState_r(vnp, VN_STATE_ONLINE);
997 #endif
998 return;
999
1000
1001 error_encountered_nolock:
1002 if (fdP) {
1003 FDH_REALLYCLOSE(fdP);
1004 }
1005 VOL_LOCK;
1006
1007 error_encountered:
1008 if (dosalv) {
1009 #ifdef AFS_DEMAND_ATTACH_FS
1010 VRequestSalvage_r(&error, vp, SALVSYNC_ERROR, 0);
1011 #else
1012 VForceOffline_r(vp, 0);
1013 error = VSALVAGE;
1014 #endif
1015 if (!*ec)
1016 *ec = error;
1017 }
1018
1019 VInvalidateVnode_r(vnp);
1020 VnUnlock(vnp, WRITE_LOCK);
1021 }
1022
1023 /**
1024 * store a vnode to disk.
1025 *
1026 * @param[out] ec error code output
1027 * @param[in] vp volume object pointer
1028 * @param[in] vnp vnode object pointer
1029 * @param[in] vcp vnode class info object pointer
1030 * @param[in] class vnode class enumeration
1031 *
1032 * @pre VOL_LOCK held.
1033 * caller holds refs to volume and vnode.
1034 * DAFS: caller is responsible for performing state sanity checks.
1035 *
1036 * @post vnode state is stored to disk.
1037 *
1038 * @internal vnode package internal use only
1039 */
1040 static void
1041 VnStore(Error * ec, Volume * vp, Vnode * vnp,
1042 struct VnodeClassInfo * vcp, VnodeClass class)
1043 {
1044 ssize_t nBytes;
1045 afs_foff_t offset;
1046 IHandle_t *ihP = vp->vnodeIndex[class].handle;
1047 FdHandle_t *fdP;
1048 afs_ino_str_t stmp;
1049 #ifdef AFS_DEMAND_ATTACH_FS
1050 VnState vn_state_save;
1051 #endif
1052
1053 *ec = 0;
1054
1055 #ifdef AFS_DEMAND_ATTACH_FS
1056 vn_state_save = VnChangeState_r(vnp, VN_STATE_STORE);
1057 #endif
1058
1059 offset = vnodeIndexOffset(vcp, Vn_id(vnp));
1060 VOL_UNLOCK;
1061 fdP = IH_OPEN(ihP);
1062 if (fdP == NULL) {
1063 Log("VnStore: can't open index file!\n");
1064 goto error_encountered;
1065 }
1066 nBytes = FDH_PWRITE(fdP, &vnp->disk, vcp->diskSize, offset);
1067 if (nBytes != vcp->diskSize) {
1068 /* Don't force volume offline if the inumber is out of
1069 * range or the inode table is full.
1070 */
1071 FDH_REALLYCLOSE(fdP);
1072 if (nBytes == BAD_IGET) {
1073 Log("VnStore: bad inumber %s\n",
1074 PrintInode(stmp,
1075 vp->vnodeIndex[class].handle->ih_ino));
1076 *ec = VIO;
1077 VOL_LOCK;
1078 #ifdef AFS_DEMAND_ATTACH_FS
1079 VnChangeState_r(vnp, VN_STATE_ERROR);
1080 #endif
1081 } else {
1082 Log("VnStore: Couldn't write vnode %u, volume %" AFS_VOLID_FMT " (%s) (error %d)\n", Vn_id(vnp), afs_printable_VolumeId_lu(V_id(Vn_volume(vnp))), V_name(Vn_volume(vnp)), (int)nBytes);
1083 #ifdef AFS_DEMAND_ATTACH_FS
1084 goto error_encountered;
1085 #else
1086 VOL_LOCK;
1087 VForceOffline_r(vp, 0);
1088 *ec = VSALVAGE;
1089 #endif
1090 }
1091 return;
1092 } else {
1093 FDH_CLOSE(fdP);
1094 }
1095
1096 VOL_LOCK;
1097 #ifdef AFS_DEMAND_ATTACH_FS
1098 VnChangeState_r(vnp, vn_state_save);
1099 #endif
1100 return;
1101
1102 error_encountered:
1103 #ifdef AFS_DEMAND_ATTACH_FS
1104 /* XXX instead of dumping core, let's try to request a salvage
1105 * and just fail the putvnode */
1106 if (fdP)
1107 FDH_CLOSE(fdP);
1108 VOL_LOCK;
1109 VnChangeState_r(vnp, VN_STATE_ERROR);
1110 VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, 0);
1111 #else
1112 opr_abort();
1113 #endif
1114 }
1115
1116 /**
1117 * get a handle to a vnode object.
1118 *
1119 * @param[out] ec error code
1120 * @param[in] vp volume object
1121 * @param[in] vnodeNumber vnode id
1122 * @param[in] locktype type of lock to acquire
1123 *
1124 * @return vnode object pointer
1125 *
1126 * @see VGetVnode_r
1127 */
1128 Vnode *
1129 VGetVnode(Error * ec, Volume * vp, VnodeId vnodeNumber, int locktype)
1130 { /* READ_LOCK or WRITE_LOCK, as defined in lock.h */
1131 Vnode *retVal;
1132 VOL_LOCK;
1133 retVal = VGetVnode_r(ec, vp, vnodeNumber, locktype);
1134 VOL_UNLOCK;
1135 return retVal;
1136 }
1137
1138 /**
1139 * get a handle to a vnode object.
1140 *
1141 * @param[out] ec error code
1142 * @param[in] vp volume object
1143 * @param[in] vnodeNumber vnode id
1144 * @param[in] locktype type of lock to acquire
1145 *
1146 * @return vnode object pointer
1147 *
1148 * @internal vnode package internal use only
1149 *
1150 * @pre VOL_LOCK held.
1151 * heavyweight ref held on volume object.
1152 */
1153 Vnode *
1154 VGetVnode_r(Error * ec, Volume * vp, VnodeId vnodeNumber, int locktype)
1155 { /* READ_LOCK or WRITE_LOCK, as defined in lock.h */
1156 Vnode *vnp;
1157 VnodeClass class;
1158 struct VnodeClassInfo *vcp;
1159
1160 *ec = 0;
1161
1162 if (vnodeNumber == 0) {
1163 *ec = VNOVNODE;
1164 return NULL;
1165 }
1166
1167 VNLog(100, 1, vnodeNumber, 0, 0, 0);
1168
1169 #ifdef AFS_DEMAND_ATTACH_FS
1170 /*
1171 * once a volume has entered an error state, don't permit
1172 * further operations to proceed
1173 * -- tkeiser 11/21/2007
1174 */
1175 VWaitExclusiveState_r(vp);
1176 if (VIsErrorState(V_attachState(vp))) {
1177 /* XXX is VSALVAGING acceptable here? */
1178 *ec = VSALVAGING;
1179 return NULL;
1180 }
1181 #endif
1182
1183 if (programType == fileServer && !V_inUse(vp)) {
1184 *ec = (vp->specialStatus ? vp->specialStatus : VOFFLINE);
1185
1186 /* If the volume is VBUSY (being cloned or dumped) and this is
1187 * a READ operation, then don't fail.
1188 */
1189 if ((*ec != VBUSY) || (locktype != READ_LOCK)) {
1190 return NULL;
1191 }
1192 *ec = 0;
1193 }
1194 class = vnodeIdToClass(vnodeNumber);
1195 vcp = &VnodeClassInfo[class];
1196 if (locktype == WRITE_LOCK && !VolumeWriteable(vp)) {
1197 *ec = (bit32) VREADONLY;
1198 return NULL;
1199 }
1200
1201 if (locktype == WRITE_LOCK && programType == fileServer) {
1202 VAddToVolumeUpdateList_r(ec, vp);
1203 if (*ec) {
1204 return NULL;
1205 }
1206 }
1207
1208 vcp->gets++;
1209
1210 /* See whether the vnode is in the cache. */
1211 vnp = VLookupVnode(vp, vnodeNumber);
1212 if (vnp) {
1213 /* vnode is in cache */
1214
1215 VNLog(101, 2, vnodeNumber, (intptr_t)vnp, 0, 0);
1216 VnCreateReservation_r(vnp);
1217
1218 #ifdef AFS_DEMAND_ATTACH_FS
1219 /*
1220 * this is the one DAFS case where we may run into contention.
1221 * here's the basic control flow:
1222 *
1223 * if locktype is READ_LOCK:
1224 * wait until vnode is not exclusive
1225 * set to VN_STATE_READ
1226 * increment read count
1227 * done
1228 * else
1229 * wait until vnode is quiescent
1230 * set to VN_STATE_EXCLUSIVE
1231 * done
1232 */
1233 if (locktype == READ_LOCK) {
1234 VnWaitExclusiveState_r(vnp);
1235 } else {
1236 VnWaitQuiescent_r(vnp);
1237 }
1238
1239 if (VnIsErrorState(Vn_state(vnp))) {
1240 VnCancelReservation_r(vnp);
1241 *ec = VSALVAGE;
1242 return NULL;
1243 }
1244 #endif /* AFS_DEMAND_ATTACH_FS */
1245 } else {
1246 /* vnode not cached */
1247
1248 /* Not in cache; tentatively grab most distantly used one from the LRU
1249 * chain */
1250 vcp->reads++;
1251 vnp = VGetFreeVnode_r(vcp, vp, vnodeNumber);
1252
1253 /* Initialize */
1254 vnp->changed_newTime = vnp->changed_oldTime = 0;
1255 vnp->delete = 0;
1256
1257 /*
1258 * XXX for non-DAFS, there is a serious
1259 * race condition here:
1260 *
1261 * two threads can race to load a vnode. the net
1262 * result is two struct Vnodes can be allocated
1263 * and hashed, which point to the same underlying
1264 * disk data store. conflicting vnode locks can
1265 * thus be held concurrently.
1266 *
1267 * for non-DAFS to be safe, VOL_LOCK really shouldn't
1268 * be dropped in VnLoad. Of course, this would likely
1269 * lead to an unacceptable slow-down.
1270 */
1271
1272 VnLoad(ec, vp, vnp, vcp, class);
1273 if (*ec) {
1274 VnCancelReservation_r(vnp);
1275 return NULL;
1276 }
1277 #ifndef AFS_DEMAND_ATTACH_FS
1278 AddToVnHash(vnp);
1279 #endif
1280 /*
1281 * DAFS:
1282 * there is no possibility for contention. we "own" this vnode.
1283 */
1284 }
1285
1286 /*
1287 * DAFS:
1288 * it is imperative that nothing drop vol lock between here
1289 * and the VnBeginRead/VnChangeState stanza below
1290 */
1291
1292 VnLock(vnp, locktype, VOL_LOCK_HELD, MIGHT_DEADLOCK);
1293
1294 /* Check that the vnode hasn't been removed while we were obtaining
1295 * the lock */
1296 VNLog(102, 2, vnodeNumber, (intptr_t) vnp, 0, 0);
1297 if ((vnp->disk.type == vNull) || (Vn_cacheCheck(vnp) == 0)) {
1298 VnUnlock(vnp, locktype);
1299 VnCancelReservation_r(vnp);
1300 *ec = VNOVNODE;
1301 /* vnode is labelled correctly by now, so we don't have to invalidate it */
1302 return NULL;
1303 }
1304
1305 #ifdef AFS_DEMAND_ATTACH_FS
1306 if (locktype == READ_LOCK) {
1307 VnBeginRead_r(vnp);
1308 } else {
1309 VnChangeState_r(vnp, VN_STATE_EXCLUSIVE);
1310 }
1311 #endif
1312
1313 if (programType == fileServer)
1314 VBumpVolumeUsage_r(Vn_volume(vnp)); /* Hack; don't know where it should be
1315 * called from. Maybe VGetVolume */
1316 return vnp;
1317 }
1318
1319
1320 int TrustVnodeCacheEntry = 1;
1321 /* This variable is bogus--when it's set to 0, the hash chains fill
1322 up with multiple versions of the same vnode. Should fix this!! */
1323 void
1324 VPutVnode(Error * ec, Vnode * vnp)
1325 {
1326 VOL_LOCK;
1327 VPutVnode_r(ec, vnp);
1328 VOL_UNLOCK;
1329 }
1330
1331 /**
1332 * put back a handle to a vnode object.
1333 *
1334 * @param[out] ec client error code
1335 * @param[in] vnp vnode object pointer
1336 *
1337 * @pre VOL_LOCK held.
1338 * ref held on vnode.
1339 *
1340 * @post ref dropped on vnode.
1341 * if vnode was modified or deleted, it is written out to disk
1342 * (assuming a write lock was held).
1343 *
1344 * @internal volume package internal use only
1345 */
1346 void
1347 VPutVnode_r(Error * ec, Vnode * vnp)
1348 {
1349 int writeLocked;
1350 VnodeClass class;
1351 struct VnodeClassInfo *vcp;
1352
1353 *ec = 0;
1354 opr_Assert(Vn_refcount(vnp) != 0);
1355 class = vnodeIdToClass(Vn_id(vnp));
1356 vcp = &VnodeClassInfo[class];
1357 opr_Assert(vnp->disk.vnodeMagic == vcp->magic);
1358 VNLog(200, 2, Vn_id(vnp), (intptr_t) vnp, 0, 0);
1359
1360 #ifdef AFS_DEMAND_ATTACH_FS
1361 writeLocked = (Vn_state(vnp) == VN_STATE_EXCLUSIVE);
1362 #else
1363 writeLocked = WriteLocked(&vnp->lock);
1364 #endif
1365
1366 if (writeLocked) {
1367 /* sanity checks */
1368 #ifdef AFS_PTHREAD_ENV
1369 pthread_t thisProcess = pthread_self();
1370 #else /* AFS_PTHREAD_ENV */
1371 PROCESS thisProcess;
1372 LWP_CurrentProcess(&thisProcess);
1373 #endif /* AFS_PTHREAD_ENV */
1374 VNLog(201, 2, (intptr_t) vnp,
1375 ((vnp->changed_newTime) << 1) | ((vnp->
1376 changed_oldTime) << 1) | vnp->
1377 delete, 0, 0);
1378 if (thisProcess != vnp->writer)
1379 Abort("VPutVnode: Vnode at %"AFS_PTR_FMT" locked by another process!\n",
1380 vnp);
1381
1382
1383 if (vnp->changed_oldTime || vnp->changed_newTime || vnp->delete) {
1384 Volume *vp = Vn_volume(vnp);
1385 afs_uint32 now = FT_ApproxTime();
1386 opr_Assert(Vn_cacheCheck(vnp) == vp->cacheCheck);
1387
1388 if (vnp->delete) {
1389 /* No longer any directory entries for this vnode. Free the Vnode */
1390 memset(&vnp->disk, 0, sizeof(vnp->disk));
1391 /* delete flag turned off further down */
1392 VNLog(202, 2, Vn_id(vnp), (intptr_t) vnp, 0, 0);
1393 } else if (vnp->changed_newTime) {
1394 vnp->disk.serverModifyTime = now;
1395 }
1396 if (vnp->changed_newTime)
1397 {
1398 V_updateDate(vp) = vp->updateTime = now;
1399 if(V_volUpdateCounter(vp)< UINT_MAX)
1400 V_volUpdateCounter(vp)++;
1401 }
1402
1403 /* The vnode has been changed. Write it out to disk */
1404 if (!V_inUse(vp)) {
1405 #ifdef AFS_DEMAND_ATTACH_FS
1406 VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, 0);
1407 #else
1408 opr_Assert(V_needsSalvaged(vp));
1409 *ec = VSALVAGE;
1410 #endif
1411 } else {
1412 VnStore(ec, vp, vnp, vcp, class);
1413
1414 /* If the vnode is to be deleted, and we wrote the vnode out,
1415 * free its bitmap entry. Do after the vnode is written so we
1416 * don't allocate from bitmap before the vnode is written
1417 * (doing so could cause a "addled bitmap" message).
1418 */
1419 if (vnp->delete && !*ec) {
1420 if (V_filecount(Vn_volume(vnp))-- < 1)
1421 V_filecount(Vn_volume(vnp)) = 0;
1422 VFreeBitMapEntry_r(ec, vp, &vp->vnodeIndex[class],
1423 vnodeIdToBitNumber(Vn_id(vnp)),
1424 VOL_FREE_BITMAP_WAIT);
1425 }
1426 }
1427 vcp->writes++;
1428 vnp->changed_newTime = vnp->changed_oldTime = 0;
1429 }
1430 #ifdef AFS_DEMAND_ATTACH_FS
1431 VnChangeState_r(vnp, VN_STATE_ONLINE);
1432 #endif
1433 } else { /* Not write locked */
1434 if (vnp->changed_newTime || vnp->changed_oldTime || vnp->delete)
1435 Abort
1436 ("VPutVnode: Change or delete flag for vnode "
1437 "%"AFS_PTR_FMT" is set but vnode is not write locked!\n",
1438 vnp);
1439 #ifdef AFS_DEMAND_ATTACH_FS
1440 VnEndRead_r(vnp);
1441 #endif
1442 }
1443
1444 /* Do not look at disk portion of vnode after this point; it may
1445 * have been deleted above */
1446 vnp->delete = 0;
1447 VnUnlock(vnp, ((writeLocked) ? WRITE_LOCK : READ_LOCK));
1448 VnCancelReservation_r(vnp);
1449 }
1450
1451 /*
1452 * Make an attempt to convert a vnode lock from write to read.
1453 * Do nothing if the vnode isn't write locked or the vnode has
1454 * been deleted.
1455 */
1456 int
1457 VVnodeWriteToRead(Error * ec, Vnode * vnp)
1458 {
1459 int retVal;
1460 VOL_LOCK;
1461 retVal = VVnodeWriteToRead_r(ec, vnp);
1462 VOL_UNLOCK;
1463 return retVal;
1464 }
1465
1466 /**
1467 * convert vnode handle from mutually exclusive to shared access.
1468 *
1469 * @param[out] ec client error code
1470 * @param[in] vnp vnode object pointer
1471 *
1472 * @return unspecified use (see out argument 'ec' for error code return)
1473 *
1474 * @pre VOL_LOCK held.
1475 * ref held on vnode.
1476 * write lock held on vnode.
1477 *
1478 * @post read lock held on vnode.
1479 * if vnode was modified, it has been written to disk.
1480 *
1481 * @internal volume package internal use only
1482 */
1483 int
1484 VVnodeWriteToRead_r(Error * ec, Vnode * vnp)
1485 {
1486 int writeLocked;
1487 VnodeClass class;
1488 struct VnodeClassInfo *vcp;
1489 #ifdef AFS_PTHREAD_ENV
1490 pthread_t thisProcess;
1491 #else /* AFS_PTHREAD_ENV */
1492 PROCESS thisProcess;
1493 #endif /* AFS_PTHREAD_ENV */
1494
1495 *ec = 0;
1496 opr_Assert(Vn_refcount(vnp) != 0);
1497 class = vnodeIdToClass(Vn_id(vnp));
1498 vcp = &VnodeClassInfo[class];
1499 opr_Assert(vnp->disk.vnodeMagic == vcp->magic);
1500 VNLog(300, 2, Vn_id(vnp), (intptr_t) vnp, 0, 0);
1501
1502 #ifdef AFS_DEMAND_ATTACH_FS
1503 writeLocked = (Vn_state(vnp) == VN_STATE_EXCLUSIVE);
1504 #else
1505 writeLocked = WriteLocked(&vnp->lock);
1506 #endif
1507 if (!writeLocked) {
1508 return 0;
1509 }
1510
1511
1512 VNLog(301, 2, (intptr_t) vnp,
1513 ((vnp->changed_newTime) << 1) | ((vnp->
1514 changed_oldTime) << 1) | vnp->
1515 delete, 0, 0);
1516
1517 /* sanity checks */
1518 #ifdef AFS_PTHREAD_ENV
1519 thisProcess = pthread_self();
1520 #else /* AFS_PTHREAD_ENV */
1521 LWP_CurrentProcess(&thisProcess);
1522 #endif /* AFS_PTHREAD_ENV */
1523 if (thisProcess != vnp->writer)
1524 Abort("VPutVnode: Vnode at %"AFS_PTR_FMT
1525 " locked by another process!\n", vnp);
1526
1527 if (vnp->delete) {
1528 return 0;
1529 }
1530 if (vnp->changed_oldTime || vnp->changed_newTime) {
1531 Volume *vp = Vn_volume(vnp);
1532 afs_uint32 now = FT_ApproxTime();
1533 opr_Assert(Vn_cacheCheck(vnp) == vp->cacheCheck);
1534 if (vnp->changed_newTime)
1535 vnp->disk.serverModifyTime = now;
1536 if (vnp->changed_newTime)
1537 V_updateDate(vp) = vp->updateTime = now;
1538
1539 /* The inode has been changed. Write it out to disk */
1540 if (!V_inUse(vp)) {
1541 #ifdef AFS_DEMAND_ATTACH_FS
1542 VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, 0);
1543 #else
1544 opr_Assert(V_needsSalvaged(vp));
1545 *ec = VSALVAGE;
1546 #endif
1547 } else {
1548 VnStore(ec, vp, vnp, vcp, class);
1549 }
1550 vcp->writes++;
1551 vnp->changed_newTime = vnp->changed_oldTime = 0;
1552 }
1553
1554 vnp->writer = 0;
1555 #ifdef AFS_DEMAND_ATTACH_FS
1556 VnChangeState_r(vnp, VN_STATE_ONLINE);
1557 VnBeginRead_r(vnp);
1558 #else
1559 ConvertWriteToReadLock(&vnp->lock);
1560 #endif
1561 return 0;
1562 }
1563
1564 /**
1565 * initial size of ihandle pointer vector.
1566 *
1567 * @see VInvalidateVnodesByVolume_r
1568 */
1569 #define IH_VEC_BASE_SIZE 256
1570
1571 /**
1572 * increment amount for growing ihandle pointer vector.
1573 *
1574 * @see VInvalidateVnodesByVolume_r
1575 */
1576 #define IH_VEC_INCREMENT 256
1577
1578 /**
1579 * Compile list of ihandles to be released/reallyclosed at a later time.
1580 *
1581 * @param[in] vp volume object pointer
1582 * @param[out] vec_out vector of ihandle pointers to be released/reallyclosed
1583 * @param[out] vec_len_out number of valid elements in ihandle vector
1584 *
1585 * @pre - VOL_LOCK is held
1586 * - volume is in appropriate exclusive state (e.g. VOL_STATE_VNODE_CLOSE,
1587 * VOL_STATE_VNODE_RELEASE)
1588 *
1589 * @post - all vnodes on VVn list are invalidated
1590 * - ih_vec is populated with all valid ihandles
1591 *
1592 * @return operation status
1593 * @retval 0 success
1594 * @retval ENOMEM out of memory
1595 *
1596 * @todo we should handle out of memory conditions more gracefully.
1597 *
1598 * @internal vnode package internal use only
1599 */
1600 static int
1601 VInvalidateVnodesByVolume_r(Volume * vp,
1602 IHandle_t *** vec_out,
1603 size_t * vec_len_out)
1604 {
1605 int ret = 0;
1606 Vnode *vnp, *nvnp;
1607 size_t i = 0, vec_len;
1608 IHandle_t **ih_vec, **ih_vec_new;
1609
1610 #ifdef AFS_DEMAND_ATTACH_FS
1611 VOL_UNLOCK;
1612 #endif /* AFS_DEMAND_ATTACH_FS */
1613
1614 vec_len = IH_VEC_BASE_SIZE;
1615 ih_vec = malloc(sizeof(IHandle_t *) * vec_len);
1616 #ifdef AFS_DEMAND_ATTACH_FS
1617 VOL_LOCK;
1618 #endif
1619 if (ih_vec == NULL)
1620 return ENOMEM;
1621
1622 /*
1623 * Traverse the volume's vnode list. Pull all the ihandles out into a
1624 * thread-private array for later asynchronous processing.
1625 */
1626 #ifdef AFS_DEMAND_ATTACH_FS
1627 restart_traversal:
1628 #endif
1629 for (queue_Scan(&vp->vnode_list, vnp, nvnp, Vnode)) {
1630 if (vnp->handle != NULL) {
1631 if (i == vec_len) {
1632 #ifdef AFS_DEMAND_ATTACH_FS
1633 VOL_UNLOCK;
1634 #endif
1635 vec_len += IH_VEC_INCREMENT;
1636 ih_vec_new = realloc(ih_vec, sizeof(IHandle_t *) * vec_len);
1637 #ifdef AFS_DEMAND_ATTACH_FS
1638 VOL_LOCK;
1639 #endif
1640 if (ih_vec_new == NULL) {
1641 ret = ENOMEM;
1642 goto done;
1643 }
1644 ih_vec = ih_vec_new;
1645 #ifdef AFS_DEMAND_ATTACH_FS
1646 /*
1647 * Theoretically, the volume's VVn list should not change
1648 * because the volume is in an exclusive state. For the
1649 * sake of safety, we will restart the traversal from the
1650 * the beginning (which is not expensive because we're
1651 * deleting the items from the list as we go).
1652 */
1653 goto restart_traversal;
1654 #endif
1655 }
1656 ih_vec[i++] = vnp->handle;
1657 vnp->handle = NULL;
1658 }
1659 DeleteFromVVnList(vnp);
1660 VInvalidateVnode_r(vnp);
1661 }
1662
1663 done:
1664 *vec_out = ih_vec;
1665 *vec_len_out = i;
1666
1667 return ret;
1668 }
1669
1670 /* VCloseVnodeFiles - called when a volume is going off line. All open
1671 * files for vnodes in that volume are closed. This might be excessive,
1672 * since we may only be taking one volume of a volume group offline.
1673 */
1674 void
1675 VCloseVnodeFiles_r(Volume * vp)
1676 {
1677 #ifdef AFS_DEMAND_ATTACH_FS
1678 VolState vol_state_save;
1679 #endif
1680 IHandle_t ** ih_vec;
1681 size_t i, vec_len;
1682
1683 #ifdef AFS_DEMAND_ATTACH_FS
1684 vol_state_save = VChangeState_r(vp, VOL_STATE_VNODE_CLOSE);
1685 #endif /* AFS_DEMAND_ATTACH_FS */
1686
1687 /* XXX need better error handling here */
1688 opr_Verify(VInvalidateVnodesByVolume_r(vp, &ih_vec,
1689 &vec_len) == 0);
1690
1691 /*
1692 * DAFS:
1693 * now we drop VOL_LOCK while we perform some potentially very
1694 * expensive operations in the background
1695 */
1696 #ifdef AFS_DEMAND_ATTACH_FS
1697 VOL_UNLOCK;
1698 #endif
1699
1700 for (i = 0; i < vec_len; i++) {
1701 IH_REALLYCLOSE(ih_vec[i]);
1702 IH_RELEASE(ih_vec[i]);
1703 }
1704
1705 free(ih_vec);
1706
1707 #ifdef AFS_DEMAND_ATTACH_FS
1708 VOL_LOCK;
1709 VChangeState_r(vp, vol_state_save);
1710 #endif /* AFS_DEMAND_ATTACH_FS */
1711 }
1712
1713
1714 /**
1715 * shut down all vnode cache state for a given volume.
1716 *
1717 * @param[in] vp volume object pointer
1718 *
1719 * @pre VOL_LOCK is held
1720 *
1721 * @post all file descriptors closed.
1722 * all inode handles released.
1723 * all vnode cache objects disassociated from volume.
1724 *
1725 * @note for DAFS, these operations are performed outside the vol glock under
1726 * volume exclusive state VOL_STATE_VNODE_RELEASE. Please further note
1727 * that it would be a bug to acquire and release a volume reservation
1728 * during this exclusive operation. This is due to the fact that we are
1729 * generally called during the refcount 1->0 transition.
1730 *
1731 * @todo we should handle failures in VInvalidateVnodesByVolume_r more
1732 * gracefully.
1733 *
1734 * @see VInvalidateVnodesByVolume_r
1735 *
1736 * @internal this routine is internal to the volume package
1737 */
1738 void
1739 VReleaseVnodeFiles_r(Volume * vp)
1740 {
1741 #ifdef AFS_DEMAND_ATTACH_FS
1742 VolState vol_state_save;
1743 #endif
1744 IHandle_t ** ih_vec;
1745 size_t i, vec_len;
1746
1747 #ifdef AFS_DEMAND_ATTACH_FS
1748 vol_state_save = VChangeState_r(vp, VOL_STATE_VNODE_RELEASE);
1749 #endif /* AFS_DEMAND_ATTACH_FS */
1750
1751 /* XXX need better error handling here */
1752 opr_Verify(VInvalidateVnodesByVolume_r(vp, &ih_vec,
1753 &vec_len) == 0);
1754
1755 /*
1756 * DAFS:
1757 * now we drop VOL_LOCK while we perform some potentially very
1758 * expensive operations in the background
1759 */
1760 #ifdef AFS_DEMAND_ATTACH_FS
1761 VOL_UNLOCK;
1762 #endif
1763
1764 for (i = 0; i < vec_len; i++) {
1765 IH_RELEASE(ih_vec[i]);
1766 }
1767
1768 free(ih_vec);
1769
1770 #ifdef AFS_DEMAND_ATTACH_FS
1771 VOL_LOCK;
1772 VChangeState_r(vp, vol_state_save);
1773 #endif /* AFS_DEMAND_ATTACH_FS */
1774 }