Import Upstream version 1.8.5
[hcoop/debian/openafs.git] / src / vol / vutil.c
1 /*
2 * Copyright 2000, International Business Machines Corporation and others.
3 * All Rights Reserved.
4 *
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
8 */
9
10 /*
11 System: VICE-TWO
12 Module: vutil.c
13 Institution: The Information Technology Center, Carnegie-Mellon University
14
15 */
16
17 #include <afsconfig.h>
18 #include <afs/param.h>
19
20 #include <roken.h>
21 #include <afs/opr.h>
22
23 #ifdef HAVE_SYS_FILE_H
24 #include <sys/file.h>
25 #endif
26
27 #ifdef HAVE_SYS_LOCKF_H
28 #include <sys/lockf.h>
29 #endif
30
31 #ifdef AFS_PTHREAD_ENV
32 # include <opr/lock.h>
33 #else
34 # include <opr/lockstub.h>
35 #endif
36
37 #include <rx/rx_queue.h>
38 #include <rx/xdr.h>
39 #include <afs/afsint.h>
40 #include "nfs.h"
41 #include <afs/errors.h>
42 #include "lock.h"
43 #include "lwp.h"
44 #include <afs/afssyscalls.h>
45 #include "ihandle.h"
46 #include <afs/afsutil.h>
47 #ifdef AFS_NT40_ENV
48 #include "ntops.h"
49 #endif
50 #include "vnode.h"
51 #include "volume.h"
52 #include "volume_inline.h"
53 #include "partition.h"
54 #include "viceinode.h"
55
56 #include "volinodes.h"
57 #include "vol_prototypes.h"
58 #include "common.h"
59
60 #ifndef AFS_NT40_ENV
61 # ifdef O_LARGEFILE
62 # define AFS_SETLKW F_SETLKW64
63 # define AFS_SETLK F_SETLK64
64 # define afs_st_flock flock64
65 # else
66 # define AFS_SETLKW F_SETLKW
67 # define AFS_SETLK F_SETLK
68 # define afs_st_flock flock
69 # endif
70 #endif
71
72 /* Note: the volume creation functions herein leave the destroyMe flag in the
73 volume header ON: this means that the volumes will not be attached by the
74 file server and WILL BE DESTROYED the next time a system salvage is performed */
75
76 #ifdef FSSYNC_BUILD_CLIENT
77 static void
78 RemoveInodes(struct afs_inode_info *stuff, Device dev, VolumeId parent,
79 VolumeId vid)
80 {
81 int i;
82 IHandle_t *handle;
83
84 /* This relies on the fact that IDEC only needs the device and NT only
85 * needs the dev and vid to decrement volume special files.
86 */
87 IH_INIT(handle, dev, parent, -1);
88 for (i = 0; i < MAXINODETYPE; i++) {
89 Inode inode = *stuff[i].inode;
90 if (VALID_INO(inode)) {
91 if (stuff[i].inodeType == VI_LINKTABLE) {
92 IH_DEC(handle, inode, parent);
93 } else {
94 IH_DEC(handle, inode, vid);
95 }
96 }
97 }
98 IH_RELEASE(handle);
99 }
100
101 Volume *
102 VCreateVolume(Error * ec, char *partname, VolumeId volumeId, VolumeId parentId)
103 { /* Should be the same as volumeId if there is
104 * no parent */
105 Volume *retVal;
106 VOL_LOCK;
107 retVal = VCreateVolume_r(ec, partname, volumeId, parentId);
108 VOL_UNLOCK;
109 return retVal;
110 }
111
112 Volume *
113 VCreateVolume_r(Error * ec, char *partname, VolumeId volumeId, VolumeId parentId)
114 { /* Should be the same as volumeId if there is
115 * no parent */
116 VolumeDiskData vol;
117 int i, rc;
118 char headerName[VMAXPATHLEN], volumePath[VMAXPATHLEN];
119 Device device;
120 struct DiskPartition64 *partition;
121 struct VolumeDiskHeader diskHeader;
122 IHandle_t *handle;
123 FdHandle_t *fdP;
124 Inode nearInode AFS_UNUSED = 0;
125 char *part, *name;
126 struct stat st;
127 struct VolumeHeader tempHeader;
128 struct afs_inode_info stuff[MAXINODETYPE];
129 afs_ino_str_t stmp;
130 # ifdef AFS_DEMAND_ATTACH_FS
131 int locktype = 0;
132 # endif /* AFS_DEMAND_ATTACH_FS */
133
134 init_inode_info(&tempHeader, stuff);
135
136 *ec = 0;
137 memset(&vol, 0, sizeof(vol));
138 vol.id = volumeId;
139 vol.parentId = parentId;
140 vol.copyDate = time(0); /* The only date which really means when this
141 * @i(instance) of this volume was created.
142 * Creation date does not mean this */
143
144 /* Initialize handle for error case below. */
145 handle = NULL;
146
147 /* Verify that the parition is valid before writing to it. */
148 if (!(partition = VGetPartition_r(partname, 0))) {
149 Log("VCreateVolume: partition %s is not in service.\n", partname);
150 *ec = VNOVOL;
151 return NULL;
152 }
153 #if defined(NEARINODE_HINT)
154 nearInodeHash(volumeId, nearInode);
155 nearInode %= partition->f_files;
156 #endif
157 VGetVolumePath(ec, vol.id, &part, &name);
158 if (*ec == VNOVOL || !strcmp(partition->name, part)) {
159 /* this case is ok */
160 } else {
161 /* return EXDEV if it's a clone or read-only to an alternate partition
162 * otherwise assume it's a move */
163 if (vol.parentId != vol.id) {
164 Log("VCreateVolume: volume %" AFS_VOLID_FMT " for parent %" AFS_VOLID_FMT
165 " found on %s; unable to create volume on %s.\n",
166 afs_printable_VolumeId_lu(vol.id),
167 afs_printable_VolumeId_lu(vol.parentId), part, partition->name);
168 *ec = EXDEV;
169 return NULL;
170 }
171 }
172 *ec = 0;
173
174 # ifdef AFS_DEMAND_ATTACH_FS
175 /* volume doesn't exist yet, but we must lock it to try to prevent something
176 * else from reading it when we're e.g. half way through creating it (or
177 * something tries to create the same volume at the same time) */
178 locktype = VVolLockType(V_VOLUPD, 1);
179 rc = VLockVolumeByIdNB(volumeId, partition, locktype);
180 if (rc) {
181 Log("VCreateVolume: vol %lu already locked by someone else\n",
182 afs_printable_uint32_lu(volumeId));
183 *ec = VNOVOL;
184 return NULL;
185 }
186 # else /* AFS_DEMAND_ATTACH_FS */
187 VLockPartition_r(partname);
188 # endif /* !AFS_DEMAND_ATTACH_FS */
189
190 memset(&tempHeader, 0, sizeof(tempHeader));
191 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
192 tempHeader.stamp.version = VOLUMEHEADERVERSION;
193 tempHeader.id = vol.id;
194 tempHeader.parent = vol.parentId;
195 vol.stamp.magic = VOLUMEINFOMAGIC;
196 vol.stamp.version = VOLUMEINFOVERSION;
197 vol.destroyMe = DESTROY_ME;
198 snprintf(headerName, sizeof headerName, VFORMAT,
199 afs_printable_VolumeId_lu(vol.id));
200 snprintf(volumePath, sizeof volumePath, "%s" OS_DIRSEP "%s",
201 VPartitionPath(partition), headerName);
202 rc = stat(volumePath, &st);
203 if (rc == 0 || errno != ENOENT) {
204 if (rc == 0) {
205 Log("VCreateVolume: Header file %s already exists!\n",
206 volumePath);
207 *ec = VVOLEXISTS;
208 } else {
209 Log("VCreateVolume: Error %d trying to stat header file %s\n",
210 errno, volumePath);
211 *ec = VNOVOL;
212 }
213 goto bad_noheader;
214 }
215 device = partition->device;
216
217 for (i = 0; i < MAXINODETYPE; i++) {
218 struct afs_inode_info *p = &stuff[i];
219 if (p->obsolete)
220 continue;
221 #ifdef AFS_NAMEI_ENV
222 *(p->inode) =
223 IH_CREATE(NULL, device, VPartitionPath(partition), nearInode,
224 (p->inodeType == VI_LINKTABLE) ? vol.parentId : vol.id,
225 INODESPECIAL, p->inodeType, vol.parentId);
226 if (!(VALID_INO(*(p->inode)))) {
227 if (errno == EEXIST && (p->inodeType == VI_LINKTABLE)) {
228 /* Increment the reference count instead. */
229 IHandle_t *lh;
230 int code;
231
232 *(p->inode) = namei_MakeSpecIno(vol.parentId, VI_LINKTABLE);
233 IH_INIT(lh, device, parentId, *(p->inode));
234 fdP = IH_OPEN(lh);
235 if (fdP == NULL) {
236 IH_RELEASE(lh);
237 goto bad;
238 }
239 code = IH_INC(lh, *(p->inode), parentId);
240 FDH_REALLYCLOSE(fdP);
241 IH_RELEASE(lh);
242 if (code < 0)
243 goto bad;
244 continue;
245 }
246 }
247 #else
248 *(p->inode) =
249 IH_CREATE(NULL, device, VPartitionPath(partition), nearInode,
250 vol.id, INODESPECIAL, p->inodeType, vol.parentId);
251 #endif
252
253 if (!VALID_INO(*(p->inode))) {
254 Log("VCreateVolume: Problem creating %s file associated with volume header %s\n", p->description, volumePath);
255 bad:
256 if (handle)
257 IH_RELEASE(handle);
258 RemoveInodes(stuff, device, vol.parentId, vol.id);
259 if (!*ec) {
260 *ec = VNOVOL;
261 }
262 VDestroyVolumeDiskHeader(partition, volumeId, parentId);
263 bad_noheader:
264 # ifdef AFS_DEMAND_ATTACH_FS
265 if (locktype) {
266 VUnlockVolumeById(volumeId, partition);
267 }
268 # endif /* AFS_DEMAND_ATTACH_FS */
269 return NULL;
270 }
271 IH_INIT(handle, device, vol.parentId, *(p->inode));
272 fdP = IH_OPEN(handle);
273 if (fdP == NULL) {
274 Log("VCreateVolume: Problem iopen inode %s (err=%d)\n",
275 PrintInode(stmp, *(p->inode)), errno);
276 goto bad;
277 }
278 if (FDH_PWRITE(fdP, (char *)&p->stamp, sizeof(p->stamp), 0) !=
279 sizeof(p->stamp)) {
280 Log("VCreateVolume: Problem writing to inode %s (err=%d)\n",
281 PrintInode(stmp, *(p->inode)), errno);
282 FDH_REALLYCLOSE(fdP);
283 goto bad;
284 }
285 FDH_REALLYCLOSE(fdP);
286 IH_RELEASE(handle);
287 nearInode = *(p->inode);
288 }
289
290 IH_INIT(handle, device, vol.parentId, tempHeader.volumeInfo);
291 fdP = IH_OPEN(handle);
292 if (fdP == NULL) {
293 Log("VCreateVolume: Problem iopen inode %s (err=%d)\n",
294 PrintInode(stmp, tempHeader.volumeInfo), errno);
295 goto bad;
296 }
297 if (FDH_PWRITE(fdP, (char *)&vol, sizeof(vol), 0) != sizeof(vol)) {
298 Log("VCreateVolume: Problem writing to inode %s (err=%d)\n",
299 PrintInode(stmp, tempHeader.volumeInfo), errno);
300 FDH_REALLYCLOSE(fdP);
301 goto bad;
302 }
303 FDH_CLOSE(fdP);
304 IH_RELEASE(handle);
305
306 VolumeHeaderToDisk(&diskHeader, &tempHeader);
307 rc = VCreateVolumeDiskHeader(&diskHeader, partition);
308 if (rc) {
309 Log("VCreateVolume: Error %d trying to write volume header for "
310 "volume %" AFS_VOLID_FMT " on partition %s; volume not created\n", rc,
311 afs_printable_VolumeId_lu(vol.id), VPartitionPath(partition));
312 if (rc == EEXIST) {
313 *ec = VVOLEXISTS;
314 }
315 goto bad;
316 }
317
318 # ifdef AFS_DEMAND_ATTACH_FS
319 if (locktype) {
320 VUnlockVolumeById(volumeId, partition);
321 }
322 # endif /* AFS_DEMAND_ATTACH_FS */
323 return (VAttachVolumeByName_r(ec, partname, headerName, V_SECRETLY));
324 }
325 #endif /* FSSYNC_BUILD_CLIENT */
326
327
328 void
329 AssignVolumeName(VolumeDiskData * vol, char *name, char *ext)
330 {
331 VOL_LOCK;
332 AssignVolumeName_r(vol, name, ext);
333 VOL_UNLOCK;
334 }
335
336 void
337 AssignVolumeName_r(VolumeDiskData * vol, char *name, char *ext)
338 {
339 char *dot;
340 strncpy(vol->name, name, VNAMESIZE - 1);
341 vol->name[VNAMESIZE - 1] = '\0';
342 dot = strrchr(vol->name, '.');
343 if (dot && (strcmp(dot, ".backup") == 0 || strcmp(dot, ".readonly") == 0))
344 *dot = 0;
345 if (ext)
346 strncat(vol->name, ext, VNAMESIZE - 1 - strlen(vol->name));
347 }
348
349 afs_int32
350 CopyVolumeHeader_r(VolumeDiskData * from, VolumeDiskData * to)
351 {
352 /* The id and parentId fields are not copied; these are inviolate--the to volume
353 * is assumed to have already been created. The id's cannot be changed once
354 * creation has taken place, since they are embedded in the various inodes associated
355 * with the volume. The copydate is also inviolate--it always reflects the time
356 * this volume was created (compare with the creation date--the creation date of
357 * a backup volume is the creation date of the original parent, because the backup
358 * is used to backup the parent volume). */
359 Date copydate;
360 VolumeId id, parent;
361 id = to->id;
362 parent = to->parentId;
363 copydate = to->copyDate;
364 memcpy(to, from, sizeof(*from));
365 to->id = id;
366 to->parentId = parent;
367 to->copyDate = copydate;
368 to->destroyMe = DESTROY_ME; /* Caller must always clear this!!! */
369 to->stamp.magic = VOLUMEINFOMAGIC;
370 to->stamp.version = VOLUMEINFOVERSION;
371 return 0;
372 }
373
374 afs_int32
375 CopyVolumeHeader(VolumeDiskData * from, VolumeDiskData * to)
376 {
377 afs_int32 code;
378
379 VOL_LOCK;
380 code = CopyVolumeHeader_r(from, to);
381 VOL_UNLOCK;
382 return (code);
383 }
384
385 void
386 ClearVolumeStats(VolumeDiskData * vol)
387 {
388 VOL_LOCK;
389 ClearVolumeStats_r(vol);
390 VOL_UNLOCK;
391 }
392
393 void
394 ClearVolumeStats_r(VolumeDiskData * vol)
395 {
396 memset(vol->weekUse, 0, sizeof(vol->weekUse));
397 vol->dayUse = 0;
398 vol->dayUseDate = 0;
399 }
400
401 void
402 CopyVolumeStats_r(VolumeDiskData * from, VolumeDiskData * to)
403 {
404 memcpy(to->weekUse, from->weekUse, sizeof(to->weekUse));
405 to->dayUse = from->dayUse;
406 to->dayUseDate = from->dayUseDate;
407 if (from->stat_initialized) {
408 memcpy(to->stat_reads, from->stat_reads, sizeof(to->stat_reads));
409 memcpy(to->stat_writes, from->stat_writes, sizeof(to->stat_writes));
410 memcpy(to->stat_fileSameAuthor, from->stat_fileSameAuthor,
411 sizeof(to->stat_fileSameAuthor));
412 memcpy(to->stat_fileDiffAuthor, from->stat_fileDiffAuthor,
413 sizeof(to->stat_fileDiffAuthor));
414 memcpy(to->stat_dirSameAuthor, from->stat_dirSameAuthor,
415 sizeof(to->stat_dirSameAuthor));
416 memcpy(to->stat_dirDiffAuthor, from->stat_dirDiffAuthor,
417 sizeof(to->stat_dirDiffAuthor));
418 }
419 }
420
421 void
422 CopyVolumeStats(VolumeDiskData * from, VolumeDiskData * to)
423 {
424 VOL_LOCK;
425 CopyVolumeStats_r(from, to);
426 VOL_UNLOCK;
427 }
428
429 /**
430 * read an existing volume disk header.
431 *
432 * @param[in] volid volume id
433 * @param[in] dp disk partition object
434 * @param[out] hdr volume disk header or NULL
435 *
436 * @note if hdr is NULL, this is essentially an existence test for the vol
437 * header
438 *
439 * @return operation status
440 * @retval 0 success
441 * @retval -1 volume header doesn't exist
442 * @retval EIO failed to read volume header
443 *
444 * @internal
445 */
446 afs_int32
447 VReadVolumeDiskHeader(VolumeId volid,
448 struct DiskPartition64 * dp,
449 VolumeDiskHeader_t * hdr)
450 {
451 afs_int32 code = 0;
452 int fd;
453 char path[MAXPATHLEN];
454
455 snprintf(path, sizeof(path), "%s" OS_DIRSEP VFORMAT,
456 VPartitionPath(dp), afs_printable_VolumeId_lu(volid));
457 fd = open(path, O_RDONLY);
458 if (fd < 0) {
459 Log("VReadVolumeDiskHeader: Couldn't open header for volume %" AFS_VOLID_FMT " (errno %d).\n",
460 afs_printable_VolumeId_lu(volid), errno);
461 code = -1;
462
463 } else if (hdr && read(fd, hdr, sizeof(*hdr)) != sizeof(*hdr)) {
464 Log("VReadVolumeDiskHeader: Couldn't read header for volume %" AFS_VOLID_FMT ".\n",
465 afs_printable_VolumeId_lu(volid));
466 code = EIO;
467 }
468
469 if (fd >= 0) {
470 close(fd);
471 }
472 return code;
473 }
474
475 #ifdef FSSYNC_BUILD_CLIENT
476 /**
477 * write an existing volume disk header.
478 *
479 * @param[in] hdr volume disk header
480 * @param[in] dp disk partition object
481 * @param[in] cr assert if O_CREAT | O_EXCL should be passed to open()
482 *
483 * @return operation status
484 * @retval 0 success
485 * @retval -1 volume header doesn't exist
486 * @retval EIO failed to write volume header
487 *
488 * @internal
489 */
490 static afs_int32
491 _VWriteVolumeDiskHeader(VolumeDiskHeader_t * hdr,
492 struct DiskPartition64 * dp,
493 int flags)
494 {
495 afs_int32 code = 0;
496 int fd;
497 char path[MAXPATHLEN];
498
499 #ifdef AFS_DEMAND_ATTACH_FS
500 /* prevent racing with VGC scanners reading the vol header while we are
501 * writing it */
502 code = VPartHeaderLock(dp, READ_LOCK);
503 if (code) {
504 return EIO;
505 }
506 #endif /* AFS_DEMAND_ATTACH_FS */
507
508 flags |= O_RDWR;
509
510 snprintf(path, sizeof(path), "%s" OS_DIRSEP VFORMAT,
511 VPartitionPath(dp), afs_printable_VolumeId_lu(hdr->id));
512 fd = open(path, flags, 0644);
513 if (fd < 0) {
514 code = errno;
515 Log("_VWriteVolumeDiskHeader: Couldn't open header for volume %lu, "
516 "error = %d\n", afs_printable_uint32_lu(hdr->id), errno);
517 } else if (write(fd, hdr, sizeof(*hdr)) != sizeof(*hdr)) {
518 Log("_VWriteVolumeDiskHeader: Couldn't write header for volume %lu, "
519 "error = %d\n", afs_printable_uint32_lu(hdr->id), errno);
520 code = EIO;
521 }
522
523 if (fd >= 0) {
524 if (close(fd) != 0) {
525 Log("_VWriteVolumeDiskHeader: Error closing header for volume "
526 "%lu, errno %d\n", afs_printable_uint32_lu(hdr->id), errno);
527 }
528 }
529
530 #ifdef AFS_DEMAND_ATTACH_FS
531 VPartHeaderUnlock(dp, READ_LOCK);
532 #endif /* AFS_DEMAND_ATTACH_FS */
533
534 return code;
535 }
536
537 /**
538 * write an existing volume disk header.
539 *
540 * @param[in] hdr volume disk header
541 * @param[in] dp disk partition object
542 *
543 * @return operation status
544 * @retval 0 success
545 * @retval ENOENT volume header doesn't exist
546 * @retval EIO failed to write volume header
547 */
548 afs_int32
549 VWriteVolumeDiskHeader(VolumeDiskHeader_t * hdr,
550 struct DiskPartition64 * dp)
551 {
552 afs_int32 code;
553
554 #ifdef AFS_DEMAND_ATTACH_FS
555 VolumeDiskHeader_t oldhdr;
556 int delvgc = 0, addvgc = 0;
557 SYNC_response res;
558
559 /* first, see if anything with the volume IDs have changed; if so, we
560 * need to update the VGC */
561
562 code = VReadVolumeDiskHeader(hdr->id, dp, &oldhdr);
563 if (code == 0 && (oldhdr.id != hdr->id || oldhdr.parent != hdr->parent)) {
564 /* the vol id or parent vol id changed; need to delete the VGC entry
565 * for the old vol id/parent, and add the new one */
566 delvgc = 1;
567 addvgc = 1;
568
569 } else if (code) {
570 /* couldn't get the old header info; add the new header info to the
571 * VGC in case it hasn't been added yet */
572 addvgc = 1;
573 }
574
575 #endif /* AFS_DEMAND_ATTACH_FS */
576
577 code = _VWriteVolumeDiskHeader(hdr, dp, 0);
578 if (code) {
579 goto done;
580 }
581
582 #ifdef AFS_DEMAND_ATTACH_FS
583 if (delvgc) {
584 memset(&res, 0, sizeof(res));
585 code = FSYNC_VGCDel(dp->name, oldhdr.parent, oldhdr.id, FSYNC_WHATEVER, &res);
586
587 /* unknown vol id is okay; it just further suggests the old header
588 * data was bogus, which is fine since we're trying to fix it */
589 if (code && res.hdr.reason != FSYNC_UNKNOWN_VOLID) {
590 Log("VWriteVolumeDiskHeader: FSYNC_VGCDel(%s, %lu, %lu) "
591 "failed with code %ld reason %ld\n", dp->name,
592 afs_printable_uint32_lu(oldhdr.parent),
593 afs_printable_uint32_lu(oldhdr.id),
594 afs_printable_int32_ld(code),
595 afs_printable_int32_ld(res.hdr.reason));
596 }
597
598 }
599 if (addvgc) {
600 memset(&res, 0, sizeof(res));
601 code = FSYNC_VGCAdd(dp->name, hdr->parent, hdr->id, FSYNC_WHATEVER, &res);
602 if (code) {
603 Log("VWriteVolumeDiskHeader: FSYNC_VGCAdd(%s, %lu, %lu) "
604 "failed with code %ld reason %ld\n", dp->name,
605 afs_printable_uint32_lu(hdr->parent),
606 afs_printable_uint32_lu(hdr->id),
607 afs_printable_int32_ld(code),
608 afs_printable_int32_ld(res.hdr.reason));
609 }
610 }
611
612 #endif /* AFS_DEMAND_ATTACH_FS */
613
614 done:
615 return code;
616 }
617
618 /**
619 * create and write a volume disk header to disk.
620 *
621 * @param[in] hdr volume disk header
622 * @param[in] dp disk partition object
623 *
624 * @return operation status
625 * @retval 0 success
626 * @retval EEXIST volume header already exists
627 * @retval EIO failed to write volume header
628 *
629 * @internal
630 */
631 afs_int32
632 VCreateVolumeDiskHeader(VolumeDiskHeader_t * hdr,
633 struct DiskPartition64 * dp)
634 {
635 afs_int32 code = 0;
636 #ifdef AFS_DEMAND_ATTACH_FS
637 SYNC_response res;
638 #endif /* AFS_DEMAND_ATTACH_FS */
639
640 code = _VWriteVolumeDiskHeader(hdr, dp, O_CREAT | O_EXCL);
641 if (code) {
642 goto done;
643 }
644
645 #ifdef AFS_DEMAND_ATTACH_FS
646 memset(&res, 0, sizeof(res));
647 code = FSYNC_VGCAdd(dp->name, hdr->parent, hdr->id, FSYNC_WHATEVER, &res);
648 if (code) {
649 Log("VCreateVolumeDiskHeader: FSYNC_VGCAdd(%s, %lu, %lu) failed "
650 "with code %ld reason %ld\n", dp->name,
651 afs_printable_uint32_lu(hdr->parent),
652 afs_printable_uint32_lu(hdr->id),
653 afs_printable_int32_ld(code),
654 afs_printable_int32_ld(res.hdr.reason));
655 }
656 #endif /* AFS_DEMAND_ATTACH_FS */
657
658 done:
659 return code;
660 }
661
662
663 /**
664 * destroy a volume disk header.
665 *
666 * @param[in] dp disk partition object
667 * @param[in] volid volume id
668 * @param[in] parent parent's volume id, 0 if unknown
669 *
670 * @return operation status
671 * @retval 0 success
672 *
673 * @note if parent is 0, the parent volume ID will be looked up from the
674 * fileserver
675 *
676 * @note for non-DAFS, parent is currently ignored
677 */
678 afs_int32
679 VDestroyVolumeDiskHeader(struct DiskPartition64 * dp,
680 VolumeId volid,
681 VolumeId parent)
682 {
683 afs_int32 code = 0;
684 char path[MAXPATHLEN];
685 #ifdef AFS_DEMAND_ATTACH_FS
686 SYNC_response res;
687 #endif /* AFS_DEMAND_ATTACH_FS */
688
689 snprintf(path, sizeof(path), "%s" OS_DIRSEP VFORMAT,
690 VPartitionPath(dp), afs_printable_VolumeId_lu(volid));
691 code = unlink(path);
692 if (code) {
693 Log("VDestroyVolumeDiskHeader: Couldn't unlink disk header, error = %d\n", errno);
694 goto done;
695 }
696
697 #ifdef AFS_DEMAND_ATTACH_FS
698 memset(&res, 0, sizeof(res));
699 if (!parent) {
700 FSSYNC_VGQry_response_t q_res;
701
702 code = FSYNC_VGCQuery(dp->name, volid, &q_res, &res);
703 if (code) {
704 Log("VDestroyVolumeDiskHeader: FSYNC_VGCQuery(%s, %lu) failed "
705 "with code %ld, reason %ld\n", dp->name,
706 afs_printable_uint32_lu(volid), afs_printable_int32_ld(code),
707 afs_printable_int32_ld(res.hdr.reason));
708 goto done;
709 }
710
711 parent = q_res.rw;
712
713 }
714 code = FSYNC_VGCDel(dp->name, parent, volid, FSYNC_WHATEVER, &res);
715 if (code) {
716 Log("VDestroyVolumeDiskHeader: FSYNC_VGCDel(%s, %" AFS_VOLID_FMT ", %" AFS_VOLID_FMT ") failed "
717 "with code %ld reason %ld\n", dp->name,
718 afs_printable_VolumeId_lu(parent),
719 afs_printable_VolumeId_lu(volid),
720 afs_printable_int32_ld(code),
721 afs_printable_int32_ld(res.hdr.reason));
722 }
723 #endif /* AFS_DEMAND_ATTACH_FS */
724
725 done:
726 return code;
727 }
728 #endif /* FSSYNC_BUILD_CLIENT */
729
730 /**
731 * handle a single vol header as part of VWalkVolumeHeaders.
732 *
733 * @param[in] dp disk partition
734 * @param[in] volfunc function to call when a vol header is successfully read
735 * @param[in] name full path name to the .vol header
736 * @param[out] hdr header data read in from the .vol header
737 * @param[in] locked 1 if the partition headers are locked, 0 otherwise
738 * @param[in] rock the rock to pass to volfunc
739 *
740 * @return operation status
741 * @retval 0 success
742 * @retval -1 fatal error, stop scanning
743 * @retval 1 failed to read header
744 * @retval 2 volfunc callback indicated error after header read
745 */
746 static int
747 _VHandleVolumeHeader(struct DiskPartition64 *dp, VWalkVolFunc volfunc,
748 const char *name, struct VolumeDiskHeader *hdr,
749 int locked, void *rock)
750 {
751 int error = 0;
752 FD_t fd;
753
754 if ((fd = OS_OPEN(name, O_RDONLY, 0)) == INVALID_FD
755 || OS_READ(fd, hdr, sizeof(*hdr))
756 != sizeof(*hdr)
757 || hdr->stamp.magic != VOLUMEHEADERMAGIC) {
758 error = 1;
759 }
760
761 if (fd != INVALID_FD) {
762 OS_CLOSE(fd);
763 }
764
765 #ifdef AFSFS_DEMAND_ATTACH_FS
766 if (locked) {
767 VPartHeaderUnlock(dp);
768 }
769 #endif /* AFS_DEMAND_ATTACH_FS */
770
771 if (!error && volfunc) {
772 /* the volume header seems fine; call the caller-supplied
773 * 'we-found-a-volume-header' function */
774 int last = 1;
775
776 #ifdef AFS_DEMAND_ATTACH_FS
777 if (!locked) {
778 last = 0;
779 }
780 #endif /* AFS_DEMAND_ATTACH_FS */
781
782 error = (*volfunc) (dp, name, hdr, last, rock);
783 if (error < 0) {
784 return -1;
785 }
786 if (error) {
787 error = 2;
788 }
789 }
790
791 #ifdef AFS_DEMAND_ATTACH_FS
792 if (error && !locked) {
793 int code;
794 /* retry reading the volume header under the partition
795 * header lock, just to be safe and ensure we're not
796 * racing something rewriting the vol header */
797 code = VPartHeaderLock(dp, WRITE_LOCK);
798 if (code) {
799 Log("Error acquiring partition write lock when "
800 "looking at header %s\n", name);
801 return -1;
802 }
803
804 return _VHandleVolumeHeader(dp, volfunc, name, hdr, 1, rock);
805 }
806 #endif /* AFS_DEMAND_ATTACH_FS */
807
808 return error;
809 }
810
811 /**
812 * walk through the list of volume headers on a partition.
813 *
814 * This function looks through all of the .vol headers on a partition, reads in
815 * each header, and calls the supplied volfunc function on each one. If the
816 * header cannot be read (or volfunc returns a positive error code), DAFS will
817 * VPartHeaderExLock() and retry. If that fails, or if we are non-DAFS, errfunc
818 * will be called (which typically will unlink the problem volume header).
819 *
820 * If volfunc returns a negative error code, walking the partition will stop
821 * and we will return an error immediately.
822 *
823 * @param[in] dp partition to walk
824 * @param[in] partpath the path opendir()
825 * @param[in] volfunc the function to call when a header is encountered, or
826 * NULL to just skip over valid headers
827 * @param[in] errfunc the function to call when a problematic header is
828 * encountered, or NULL to just skip over bad headers
829 * @param[in] rock rock for volfunc and errfunc
830 *
831 * @see VWalkVolFunc
832 * @see VWalkErrFunc
833 *
834 * @return operation status
835 * @retval 0 success
836 * @retval negative fatal error, walk did not finish
837 */
838 int
839 VWalkVolumeHeaders(struct DiskPartition64 *dp, const char *partpath,
840 VWalkVolFunc volfunc, VWalkErrFunc errfunc, void *rock)
841 {
842 DIR *dirp;
843 struct dirent *dentry;
844 int code = 0;
845 struct VolumeDiskHeader diskHeader;
846
847 dirp = opendir(partpath);
848 if (!dirp) {
849 Log("VWalkVolumeHeaders: cannot open directory %s\n", partpath);
850 code = -1;
851 goto done;
852 }
853
854 while ((dentry = readdir(dirp)) != NULL) {
855 char *p;
856 p = strrchr(dentry->d_name, '.');
857 if (p != NULL && strcmp(p, VHDREXT) == 0) {
858 char name[VMAXPATHLEN];
859
860 snprintf(name, VMAXPATHLEN, "%s" OS_DIRSEP "%s", partpath, dentry->d_name);
861
862 code = _VHandleVolumeHeader(dp, volfunc, name, &diskHeader, -1, rock);
863 if (code < 0) {
864 /* fatal error, stop walking */
865 goto done;
866 }
867 if (code && errfunc) {
868 /* error with header; call the caller-supplied vol error
869 * function */
870
871 struct VolumeDiskHeader *hdr = &diskHeader;
872 if (code == 1) {
873 /* we failed to read the header at all, so don't pass in
874 * the header ptr */
875 hdr = NULL;
876 }
877 (*errfunc) (dp, name, hdr, rock);
878 }
879 code = 0;
880 }
881 }
882 done:
883 if (dirp) {
884 closedir(dirp);
885 dirp = NULL;
886 }
887
888 return code;
889 }
890
891 /**
892 * initialize a struct VLockFile.
893 *
894 * @param[in] lf struct VLockFile to initialize
895 * @param[in] path Full path to the file to use for locks. The string contents
896 * are copied.
897 */
898 void
899 VLockFileInit(struct VLockFile *lf, const char *path)
900 {
901 memset(lf, 0, sizeof(*lf));
902 lf->path = strdup(path);
903 lf->fd = INVALID_FD;
904 opr_mutex_init(&lf->mutex);
905 }
906
907 #ifdef AFS_NT40_ENV
908 static_inline FD_t
909 _VOpenPath(const char *path)
910 {
911 HANDLE handle;
912
913 handle = CreateFile(path,
914 GENERIC_READ | GENERIC_WRITE,
915 FILE_SHARE_READ | FILE_SHARE_WRITE,
916 NULL,
917 OPEN_ALWAYS,
918 FILE_ATTRIBUTE_HIDDEN,
919 NULL);
920 if (handle == INVALID_HANDLE_VALUE) {
921 return INVALID_FD;
922 }
923
924 return handle;
925 }
926
927 static_inline int
928 _VLockFd(FD_t handle, afs_uint32 offset, int locktype, int nonblock)
929 {
930 DWORD flags = 0;
931 OVERLAPPED lap;
932
933 if (locktype == WRITE_LOCK) {
934 flags |= LOCKFILE_EXCLUSIVE_LOCK;
935 }
936 if (nonblock) {
937 flags |= LOCKFILE_FAIL_IMMEDIATELY;
938 }
939
940 memset(&lap, 0, sizeof(lap));
941 lap.Offset = offset;
942
943 if (!LockFileEx(handle, flags, 0, 1, 0, &lap)) {
944 if (GetLastError() == ERROR_LOCK_VIOLATION) {
945 return EBUSY;
946 }
947 return EIO;
948 }
949
950 return 0;
951 }
952
953 static_inline void
954 _VUnlockFd(FD_t handle, afs_uint32 offset)
955 {
956 OVERLAPPED lap;
957
958 memset(&lap, 0, sizeof(lap));
959 lap.Offset = offset;
960
961 UnlockFileEx(handle, 0, 1, 0, &lap);
962 }
963
964 static_inline void
965 _VCloseFd(FD_t handle)
966 {
967 CloseHandle(handle);
968 }
969
970 #else /* !AFS_NT40_ENV */
971
972 /**
973 * open a file on the local filesystem suitable for locking
974 *
975 * @param[in] path abs path of the file to open
976 *
977 * @return file descriptor
978 * @retval INVALID_FD failure opening file
979 */
980 static_inline FD_t
981 _VOpenPath(const char *path)
982 {
983 int fd;
984
985 fd = open(path, O_RDWR | O_CREAT, 0660);
986 if (fd < 0) {
987 return INVALID_FD;
988 }
989 return fd;
990 }
991
992 /**
993 * lock an offset in a file descriptor.
994 *
995 * @param[in] fd file descriptor to lock
996 * @param[in] offset offset in file to lock
997 * @param[in] locktype READ_LOCK or WRITE_LOCK
998 * @param[in] nonblock 1 to fail immediately, 0 to wait to acquire lock
999 *
1000 * @return operation status
1001 * @retval 0 success
1002 * @retval EBUSY someone else is holding a conflicting lock and nonblock=1 was
1003 * specified
1004 * @retval EIO error acquiring file lock
1005 */
1006 static_inline int
1007 _VLockFd(FD_t fd, afs_uint32 offset, int locktype, int nonblock)
1008 {
1009 int l_type = F_WRLCK;
1010 int cmd = AFS_SETLKW;
1011 struct afs_st_flock sf;
1012
1013 opr_Assert(fd >= 0);
1014
1015 if (locktype == READ_LOCK) {
1016 l_type = F_RDLCK;
1017 }
1018 if (nonblock) {
1019 cmd = AFS_SETLK;
1020 }
1021
1022 sf.l_start = offset;
1023 sf.l_len = 1;
1024 sf.l_type = l_type;
1025 sf.l_whence = SEEK_SET;
1026
1027 if (fcntl(fd, cmd, &sf)) {
1028 if (nonblock && (errno == EACCES || errno == EAGAIN)) {
1029 /* We asked for a nonblocking lock, and it was already locked */
1030 sf.l_pid = 0;
1031 if (fcntl(fd, F_GETLK, &sf) != 0 || sf.l_pid == 0) {
1032 Log("_VLockFd: fcntl failed with error %d when trying to "
1033 "query the conflicting lock for fd %d (locktype=%d, "
1034 "offset=%lu)\n", errno, fd, locktype,
1035 afs_printable_uint32_lu(offset));
1036 } else {
1037 Log("_VLockFd: conflicting lock held on fd %d, offset %lu by "
1038 "pid %ld (locktype=%d)\n", fd,
1039 afs_printable_uint32_lu(offset), (long int)sf.l_pid,
1040 locktype);
1041 }
1042 return EBUSY;
1043 }
1044 Log("_VLockFd: fcntl failed with error %d when trying to lock "
1045 "fd %d (locktype=%d, offset=%lu)\n", errno, fd, locktype,
1046 afs_printable_uint32_lu(offset));
1047 return EIO;
1048 }
1049
1050 return 0;
1051 }
1052
1053 /**
1054 * close a file descriptor used for file locking.
1055 *
1056 * @param[in] fd file descriptor to close
1057 */
1058 static_inline void
1059 _VCloseFd(FD_t fd)
1060 {
1061 if (close(fd)) {
1062 Log("_VCloseFd: error %d closing fd %d\n",
1063 errno, fd);
1064 }
1065 }
1066
1067 /**
1068 * unlock a file offset in a file descriptor.
1069 *
1070 * @param[in] fd file descriptor to unlock
1071 * @param[in] offset offset to unlock
1072 */
1073 static_inline void
1074 _VUnlockFd(FD_t fd, afs_uint32 offset)
1075 {
1076 struct afs_st_flock sf;
1077
1078 sf.l_start = offset;
1079 sf.l_len = 1;
1080 sf.l_type = F_UNLCK;
1081 sf.l_whence = SEEK_SET;
1082
1083 if (fcntl(fd, AFS_SETLK, &sf)) {
1084 Log("_VUnlockFd: fcntl failed with error %d when trying to unlock "
1085 "fd %d\n", errno, fd);
1086 }
1087 }
1088 #endif /* !AFS_NT40_ENV */
1089
1090 /**
1091 * reinitialize a struct VLockFile.
1092 *
1093 * Use this to close the lock file (unlocking any locks in it), and effectively
1094 * restore lf to the state it was in when it was initialized. This is the same
1095 * as unlocking all of the locks on the file, without having to remember what
1096 * all of the locks were. Do not unlock previously held locks after calling
1097 * this.
1098 *
1099 * @param[in] lf struct VLockFile to reinit
1100 *
1101 * @pre nobody is waiting for a lock on this lockfile or otherwise using
1102 * this lockfile at all
1103 */
1104 void
1105 VLockFileReinit(struct VLockFile *lf)
1106 {
1107 opr_mutex_enter(&lf->mutex);
1108
1109 if (lf->fd != INVALID_FD) {
1110 _VCloseFd(lf->fd);
1111 lf->fd = INVALID_FD;
1112 }
1113
1114 lf->refcount = 0;
1115
1116 opr_mutex_exit(&lf->mutex);
1117 }
1118
1119 /**
1120 * lock a file on disk for the process.
1121 *
1122 * @param[in] lf the struct VLockFile representing the file to lock
1123 * @param[in] offset the offset in the file to lock
1124 * @param[in] locktype READ_LOCK or WRITE_LOCK
1125 * @param[in] nonblock 0 to wait for conflicting locks to clear before
1126 * obtaining the lock; 1 to fail immediately if a
1127 * conflicting lock is held by someone else
1128 *
1129 * @return operation status
1130 * @retval 0 success
1131 * @retval EBUSY someone else is holding a conflicting lock and nonblock=1 was
1132 * specified
1133 * @retval EIO error acquiring file lock
1134 *
1135 * @note DAFS only
1136 *
1137 * @note do not try to lock/unlock the same offset in the same file from
1138 * different threads; use VGetDiskLock to protect threads from each other in
1139 * addition to other processes
1140 */
1141 int
1142 VLockFileLock(struct VLockFile *lf, afs_uint32 offset, int locktype, int nonblock)
1143 {
1144 int code;
1145
1146 opr_Assert(locktype == READ_LOCK || locktype == WRITE_LOCK);
1147
1148 opr_mutex_enter(&lf->mutex);
1149
1150 if (lf->fd == INVALID_FD) {
1151 opr_Assert(lf->refcount == 0);
1152 lf->fd = _VOpenPath(lf->path);
1153 if (lf->fd == INVALID_FD) {
1154 opr_mutex_exit(&lf->mutex);
1155 return EIO;
1156 }
1157 }
1158
1159 lf->refcount++;
1160
1161 opr_Assert(lf->refcount > 0);
1162
1163 opr_mutex_exit(&lf->mutex);
1164
1165 code = _VLockFd(lf->fd, offset, locktype, nonblock);
1166
1167 if (code) {
1168 opr_mutex_enter(&lf->mutex);
1169 opr_Assert(lf->refcount > 0);
1170 if (--lf->refcount < 1) {
1171 _VCloseFd(lf->fd);
1172 lf->fd = INVALID_FD;
1173 }
1174 opr_mutex_exit(&lf->mutex);
1175 }
1176
1177 return code;
1178 }
1179
1180 void
1181 VLockFileUnlock(struct VLockFile *lf, afs_uint32 offset)
1182 {
1183 opr_mutex_enter(&lf->mutex);
1184
1185 opr_Assert(lf->fd != INVALID_FD);
1186 opr_Assert(lf->refcount > 0);
1187
1188 if (--lf->refcount < 1) {
1189 _VCloseFd(lf->fd);
1190 lf->fd = INVALID_FD;
1191 } else {
1192 _VUnlockFd(lf->fd, offset);
1193 }
1194
1195 opr_mutex_exit(&lf->mutex);
1196 }
1197
1198 #ifdef AFS_DEMAND_ATTACH_FS
1199
1200 /**
1201 * initialize a struct VDiskLock.
1202 *
1203 * @param[in] dl struct VDiskLock to initialize
1204 * @param[in] lf the struct VLockFile to associate with this disk lock
1205 */
1206 void
1207 VDiskLockInit(struct VDiskLock *dl, struct VLockFile *lf, afs_uint32 offset)
1208 {
1209 opr_Assert(lf);
1210 memset(dl, 0, sizeof(*dl));
1211 Lock_Init(&dl->rwlock);
1212 opr_mutex_init(&dl->mutex);
1213 opr_cv_init(&dl->cv);
1214 dl->lockfile = lf;
1215 dl->offset = offset;
1216 }
1217
1218 /**
1219 * acquire a lock on a file on local disk.
1220 *
1221 * @param[in] dl the VDiskLock structure corresponding to the file on disk
1222 * @param[in] locktype READ_LOCK if you want a read lock, or WRITE_LOCK if
1223 * you want a write lock
1224 * @param[in] nonblock 0 to wait for conflicting locks to clear before
1225 * obtaining the lock; 1 to fail immediately if a
1226 * conflicting lock is held by someone else
1227 *
1228 * @return operation status
1229 * @retval 0 success
1230 * @retval EBUSY someone else is holding a conflicting lock and nonblock=1 was
1231 * specified
1232 * @retval EIO error acquiring file lock
1233 *
1234 * @note DAFS only
1235 *
1236 * @note while normal fcntl-y locks on Unix systems generally only work per-
1237 * process, this interface also deals with locks between threads in the
1238 * process in addition to different processes acquiring the lock
1239 */
1240 int
1241 VGetDiskLock(struct VDiskLock *dl, int locktype, int nonblock)
1242 {
1243 int code = 0;
1244 opr_Assert(locktype == READ_LOCK || locktype == WRITE_LOCK);
1245
1246 if (nonblock) {
1247 if (locktype == READ_LOCK) {
1248 ObtainReadLockNoBlock(&dl->rwlock, code);
1249 } else {
1250 ObtainWriteLockNoBlock(&dl->rwlock, code);
1251 }
1252
1253 if (code) {
1254 return EBUSY;
1255 }
1256
1257 } else if (locktype == READ_LOCK) {
1258 ObtainReadLock(&dl->rwlock);
1259 } else {
1260 ObtainWriteLock(&dl->rwlock);
1261 }
1262
1263 opr_mutex_enter(&dl->mutex);
1264
1265 if ((dl->flags & VDISKLOCK_ACQUIRING)) {
1266 /* Some other thread is waiting to acquire an fs lock. If nonblock=1,
1267 * we can return immediately, since we know we'll need to wait to
1268 * acquire. Otherwise, wait for the other thread to finish acquiring
1269 * the fs lock */
1270 if (nonblock) {
1271 code = EBUSY;
1272 } else {
1273 while ((dl->flags & VDISKLOCK_ACQUIRING)) {
1274 opr_cv_wait(&dl->cv, &dl->mutex);
1275 }
1276 }
1277 }
1278
1279 if (code == 0 && !(dl->flags & VDISKLOCK_ACQUIRED)) {
1280 /* no other thread holds the lock on the actual file; so grab one */
1281
1282 /* first try, don't block on the lock to see if we can get it without
1283 * waiting */
1284 code = VLockFileLock(dl->lockfile, dl->offset, locktype, 1);
1285
1286 if (code == EBUSY && !nonblock) {
1287
1288 /* mark that we are waiting on the fs lock */
1289 dl->flags |= VDISKLOCK_ACQUIRING;
1290
1291 opr_mutex_exit(&dl->mutex);
1292 code = VLockFileLock(dl->lockfile, dl->offset, locktype, nonblock);
1293 opr_mutex_enter(&dl->mutex);
1294
1295 dl->flags &= ~VDISKLOCK_ACQUIRING;
1296
1297 if (code == 0) {
1298 dl->flags |= VDISKLOCK_ACQUIRED;
1299 }
1300
1301 opr_cv_broadcast(&dl->cv);
1302 }
1303 }
1304
1305 if (code) {
1306 if (locktype == READ_LOCK) {
1307 ReleaseReadLock(&dl->rwlock);
1308 } else {
1309 ReleaseWriteLock(&dl->rwlock);
1310 }
1311 } else {
1312 /* successfully got the lock, so inc the number of unlocks we need
1313 * to do before we can unlock the actual file */
1314 ++dl->lockers;
1315 }
1316
1317 opr_mutex_exit(&dl->mutex);
1318
1319 return code;
1320 }
1321
1322 /**
1323 * release a lock on a file on local disk.
1324 *
1325 * @param[in] dl the struct VDiskLock to release
1326 * @param[in] locktype READ_LOCK if you are unlocking a read lock, or
1327 * WRITE_LOCK if you are unlocking a write lock
1328 *
1329 * @return operation status
1330 * @retval 0 success
1331 */
1332 void
1333 VReleaseDiskLock(struct VDiskLock *dl, int locktype)
1334 {
1335 opr_Assert(locktype == READ_LOCK || locktype == WRITE_LOCK);
1336
1337 opr_mutex_enter(&dl->mutex);
1338 opr_Assert(dl->lockers > 0);
1339
1340 if (--dl->lockers < 1) {
1341 /* no threads are holding this lock anymore, so we can release the
1342 * actual disk lock */
1343 VLockFileUnlock(dl->lockfile, dl->offset);
1344 dl->flags &= ~VDISKLOCK_ACQUIRED;
1345 }
1346
1347 opr_mutex_exit(&dl->mutex);
1348
1349 if (locktype == READ_LOCK) {
1350 ReleaseReadLock(&dl->rwlock);
1351 } else {
1352 ReleaseWriteLock(&dl->rwlock);
1353 }
1354 }
1355
1356 #endif /* AFS_DEMAND_ATTACH_FS */