Import Upstream version 1.8.5
[hcoop/debian/openafs.git] / src / vol / vol-salvage.c
1 /*
2 * Copyright 2000, International Business Machines Corporation and others.
3 * All Rights Reserved.
4 *
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
8 */
9
10 /*
11 * System: VICE-TWO
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
14 */
15
16 /* 1.2 features:
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
21
22 1.3 features:
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
25
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
27
28 1.4 features:
29 Fixed bug which was causing inode link counts to go bad (thus leaking
30 disk blocks).
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
36
37 1.5 features:
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
44
45 1.6 features:
46 It now deletes obsolete volume inodes without complaining
47
48 1.7 features:
49 Repairs rw volume headers (again).
50
51 1.8 features:
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
55 at some other site!)
56 Some of the messages are cleaned up or made more explicit. One or two added.
57 Logging cleaned up.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
59 read/write volume.
60
61 1.9 features:
62 When a volume header is recreated, the new name will be "bogus.volume#"
63
64 2.0 features:
65 Directory salvaging turned on!!!
66
67 2.1 features:
68 Prints warning messages for setuid programs.
69
70 2.2 features:
71 Logs missing inode numbers.
72
73 2.3 features:
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
75
76 2.4 features:
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
83 */
84
85
86 #include <afsconfig.h>
87 #include <afs/param.h>
88
89 #include <afs/procmgmt.h>
90 #include <roken.h>
91
92 #ifdef HAVE_SYS_FILE_H
93 # include <sys/file.h>
94 #endif
95
96 #ifdef AFS_NT40_ENV
97 #include <WINNT/afsevent.h>
98 #endif
99 #ifndef WCOREDUMP
100 #define WCOREDUMP(x) ((x) & 0200)
101 #endif
102 #include <afs/opr.h>
103 #ifdef AFS_PTHREAD_ENV
104 # include <opr/lock.h>
105 #endif
106
107 #include <afs/afsint.h>
108 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
109 #if defined(AFS_VFSINCL_ENV)
110 #include <sys/vnode.h>
111 #ifdef AFS_SUN5_ENV
112 #include <sys/fs/ufs_inode.h>
113 #else
114 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
115 #include <ufs/ufs/dinode.h>
116 #include <ufs/ffs/fs.h>
117 #else
118 #include <ufs/inode.h>
119 #endif
120 #endif
121 #else /* AFS_VFSINCL_ENV */
122 #ifdef AFS_OSF_ENV
123 #include <ufs/inode.h>
124 #else /* AFS_OSF_ENV */
125 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_DARWIN_ENV)
126 #include <sys/inode.h>
127 #endif
128 #endif
129 #endif /* AFS_VFSINCL_ENV */
130 #endif /* AFS_SGI_ENV */
131 #ifdef AFS_AIX_ENV
132 #include <sys/vfs.h>
133 #include <sys/lockf.h>
134 #else
135 #ifdef AFS_HPUX_ENV
136 #include <checklist.h>
137 #else
138 #if defined(AFS_SGI_ENV)
139 #include <mntent.h>
140 #else
141 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
142 #ifdef AFS_SUN5_ENV
143 #include <sys/mnttab.h>
144 #include <sys/mntent.h>
145 #else
146 #include <mntent.h>
147 #endif
148 #else
149 #endif /* AFS_SGI_ENV */
150 #endif /* AFS_HPUX_ENV */
151 #endif
152 #endif
153 #ifndef AFS_NT40_ENV
154 #include <afs/osi_inode.h>
155 #endif
156 #include <afs/cmd.h>
157 #include <afs/dir.h>
158 #include <afs/afsutil.h>
159 #include <afs/fileutil.h>
160 #include <rx/rx_queue.h>
161
162 #include "nfs.h"
163 #include "lwp.h"
164 #include "lock.h"
165 #include <afs/afssyscalls.h>
166 #include "ihandle.h"
167 #include "vnode.h"
168 #include "volume.h"
169 #include "partition.h"
170 #include "daemon_com.h"
171 #include "daemon_com_inline.h"
172 #include "fssync.h"
173 #include "fssync_inline.h"
174 #include "volume_inline.h"
175 #include "salvsync.h"
176 #include "viceinode.h"
177 #include "salvage.h"
178 #include "volinodes.h" /* header magic number, etc. stuff */
179 #include "vol-salvage.h"
180 #include "common.h"
181 #include "vol_internal.h"
182 #include <afs/acl.h>
183 #include <afs/prs_fs.h>
184
185 #ifdef FSSYNC_BUILD_CLIENT
186 #include "vg_cache.h"
187 #endif
188
189 #ifdef AFS_NT40_ENV
190 #include <pthread.h>
191 #endif
192
193 #define SALV_BUFFER_SIZE 1024
194
195 #ifdef AFS_OSF_ENV
196 extern void *calloc();
197 #endif
198 static char *TimeStamp(char *buffer, size_t size, time_t clock, int precision);
199
200
201 int debug; /* -d flag */
202 extern int Testing; /* -n flag */
203 int ListInodeOption; /* -i flag */
204 int ShowRootFiles; /* -r flag */
205 int RebuildDirs; /* -sal flag */
206 int Parallel = 4; /* -para X flag */
207 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
208 int forceR = 0; /* -b flag */
209 int ShowLog = 0; /* -showlog flag */
210 char *ShowLogFilename = NULL; /* log file name for -showlog */
211 int ShowSuid = 0; /* -showsuid flag */
212 int ShowMounts = 0; /* -showmounts flag */
213 int orphans = ORPH_IGNORE; /* -orphans option */
214 int Showmode = 0;
215 int ClientMode = 0; /* running as salvager server client */
216
217 #ifdef AFS_NT40_ENV
218 int canfork = 0;
219 #else
220 int canfork = 1;
221 #endif
222
223 #define MAXPARALLEL 32
224
225 int OKToZap; /* -o flag */
226 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
227 * in the volume header */
228
229 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
230 * partition */
231 /**
232 * information that is 'global' to a particular salvage job.
233 */
234 struct SalvInfo {
235 Device fileSysDevice; /**< The device number of the current partition
236 * being salvaged */
237 char fileSysPath[9]; /**< The path of the mounted partition currently
238 * being salvaged, i.e. the directory containing
239 * the volume headers */
240 char *fileSysPathName; /**< NT needs this to make name pretty log. */
241 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
242 int VGLinkH_cnt; /**< # of references to lnk handle. */
243 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
244
245 #ifndef AFS_NT40_ENV
246 char *fileSysDeviceName; /**< The block device where the file system being
247 * salvaged was mounted */
248 char *filesysfulldev;
249 #endif
250 int VolumeChanged; /**< Set by any routine which would change the
251 * volume in a way which would require callbacks
252 * to be broken if the volume was put back on
253 * on line by an active file server */
254
255 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
256 * header dealt with */
257
258 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
259 FD_t inodeFd; /**< File descriptor for inode file */
260
261 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
262 int nVolumes; /**< Number of volumes (read-write and read-only)
263 * in volume summary */
264 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
265 * inodes */
266
267 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
268 * vnodes in the volume that
269 * we are currently looking
270 * at */
271 int useFSYNC; /**< 0 if the fileserver is unavailable; 1 if we should try
272 * to contact the fileserver over FSYNC */
273 };
274
275 char *tmpdir = NULL;
276
277
278
279 /* Forward declarations */
280 static void QuietExit(int) AFS_NORETURN;
281 static void SalvageShowLog(void);
282 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
283 static int AskVolumeSummary(struct SalvInfo *salvinfo,
284 VolumeId singleVolumeNumber);
285 static void MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId);
286 static void AskError(struct SalvInfo *salvinfo, VolumeId volumeId);
287
288 #ifdef AFS_DEMAND_ATTACH_FS
289 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
290 #endif /* AFS_DEMAND_ATTACH_FS */
291
292 /* Uniquifier stored in the Inode */
293 static Unique
294 IUnique(Unique u)
295 {
296 #ifdef AFS_3DISPARES
297 return (u & 0x3fffff);
298 #else
299 #if defined(AFS_SGI_EXMAG)
300 return (u & SGI_UNIQMASK);
301 #else
302 return (u);
303 #endif /* AFS_SGI_EXMAG */
304 #endif
305 }
306
307 static int
308 BadError(int aerror)
309 {
310 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
311 return 1;
312 return 0; /* otherwise may be transient, e.g. EMFILE */
313 }
314
315 #define MAX_ARGS 128
316 #ifdef AFS_NT40_ENV
317 char *save_args[MAX_ARGS];
318 int n_save_args = 0;
319 extern pthread_t main_thread;
320 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
321 #endif
322
323 /**
324 * Get the salvage lock if not already held. Hold until process exits.
325 *
326 * @param[in] locktype READ_LOCK or WRITE_LOCK
327 */
328 static void
329 _ObtainSalvageLock(int locktype)
330 {
331 struct VLockFile salvageLock;
332 int offset = 0;
333 int nonblock = 1;
334 int code;
335
336 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
337
338 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
339 if (code == EBUSY) {
340 fprintf(stderr,
341 "salvager: There appears to be another salvager running! "
342 "Aborted.\n");
343 Exit(1);
344 } else if (code) {
345 fprintf(stderr,
346 "salvager: Error %d trying to acquire salvage lock! "
347 "Aborted.\n", code);
348 Exit(1);
349 }
350 }
351 void
352 ObtainSalvageLock(void)
353 {
354 _ObtainSalvageLock(WRITE_LOCK);
355 }
356 void
357 ObtainSharedSalvageLock(void)
358 {
359 _ObtainSalvageLock(READ_LOCK);
360 }
361
362
363 #ifdef AFS_SGI_XFS_IOPS_ENV
364 /* Check if the given partition is mounted. For XFS, the root inode is not a
365 * constant. So we check the hard way.
366 */
367 int
368 IsPartitionMounted(char *part)
369 {
370 FILE *mntfp;
371 struct mntent *mntent;
372
373 opr_Verify(mntfp = setmntent(MOUNTED, "r"));
374 while (mntent = getmntent(mntfp)) {
375 if (!strcmp(part, mntent->mnt_dir))
376 break;
377 }
378 endmntent(mntfp);
379
380 return mntent ? 1 : 1;
381 }
382 #endif
383 /* Check if the given inode is the root of the filesystem. */
384 #ifndef AFS_SGI_XFS_IOPS_ENV
385 int
386 IsRootInode(struct afs_stat_st *status)
387 {
388 /*
389 * The root inode is not a fixed value in XFS partitions. So we need to
390 * see if the partition is in the list of mounted partitions. This only
391 * affects the SalvageFileSys path, so we check there.
392 */
393 return (status->st_ino == ROOTINODE);
394 }
395 #endif
396
397 #ifdef AFS_AIX42_ENV
398 #ifndef AFS_NAMEI_ENV
399 /* We don't want to salvage big files filesystems, since we can't put volumes on
400 * them.
401 */
402 int
403 CheckIfBigFilesFS(char *mountPoint, char *devName)
404 {
405 struct superblock fs;
406 char name[128];
407
408 if (strncmp(devName, "/dev/", 5)) {
409 (void)sprintf(name, "/dev/%s", devName);
410 } else {
411 (void)strcpy(name, devName);
412 }
413
414 if (ReadSuper(&fs, name) < 0) {
415 Log("Unable to read superblock. Not salvaging partition %s.\n",
416 mountPoint);
417 return 1;
418 }
419 if (IsBigFilesFileSystem(&fs)) {
420 Log("Partition %s is a big files filesystem, not salvaging.\n",
421 mountPoint);
422 return 1;
423 }
424 return 0;
425 }
426 #endif
427 #endif
428
429 #ifdef AFS_NT40_ENV
430 #define HDSTR "\\Device\\Harddisk"
431 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
432 int
433 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
434 {
435 #define RES_LEN 256
436 char res1[RES_LEN];
437 char res2[RES_LEN];
438
439 static int dowarn = 1;
440
441 if (!QueryDosDevice(p1->devName, res1, RES_LEN - 1))
442 return 1;
443 if (strncmp(res1, HDSTR, HDLEN)) {
444 if (dowarn) {
445 dowarn = 0;
446 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
447 res1, HDSTR, p1->devName);
448 }
449 }
450 if (!QueryDosDevice(p2->devName, res2, RES_LEN - 1))
451 return 1;
452 if (strncmp(res2, HDSTR, HDLEN)) {
453 if (dowarn) {
454 dowarn = 0;
455 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
456 res2, HDSTR, p2->devName);
457 }
458 }
459
460 return (0 == _strnicmp(res1, res2, RES_LEN - 1));
461 }
462 #else
463 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
464 #endif
465
466 /* This assumes that two partitions with the same device number divided by
467 * PartsPerDisk are on the same disk.
468 */
469 void
470 SalvageFileSysParallel(struct DiskPartition64 *partP)
471 {
472 struct job {
473 struct DiskPartition64 *partP;
474 int pid; /* Pid for this job */
475 int jobnumb; /* Log file job number */
476 struct job *nextjob; /* Next partition on disk to salvage */
477 };
478 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
479 struct job *thisjob = 0;
480 static int numjobs = 0;
481 static int jobcount = 0;
482 int wstatus;
483 struct job *oldjob;
484 int startjob;
485 FILE *passLog;
486 int i, j, pid;
487
488 if (partP) {
489 /* We have a partition to salvage. Copy it into thisjob */
490 thisjob = calloc(1, sizeof(struct job));
491 if (!thisjob) {
492 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
493 return;
494 }
495 thisjob->partP = partP;
496 thisjob->jobnumb = jobcount;
497 jobcount++;
498 } else if (jobcount == 0) {
499 /* We are asking to wait for all jobs (partp == 0), yet we never
500 * started any.
501 */
502 Log("No file system partitions named %s* found; not salvaged\n",
503 VICE_PARTITION_PREFIX);
504 return;
505 }
506
507 if (debug || Parallel == 1) {
508 if (thisjob) {
509 SalvageFileSys(thisjob->partP, 0);
510 free(thisjob);
511 }
512 return;
513 }
514
515 if (thisjob) {
516 /* Check to see if thisjob is for a disk that we are already
517 * salvaging. If it is, link it in as the next job to do. The
518 * jobs array has 1 entry per disk being salvages. numjobs is
519 * the total number of disks currently being salvaged. In
520 * order to keep thejobs array compact, when a disk is
521 * completed, the hightest element in the jobs array is moved
522 * down to now open slot.
523 */
524 for (j = 0; j < numjobs; j++) {
525 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
526 /* On same disk, add it to this list and return */
527 thisjob->nextjob = jobs[j]->nextjob;
528 jobs[j]->nextjob = thisjob;
529 thisjob = 0;
530 break;
531 }
532 }
533 }
534
535 /* Loop until we start thisjob or until all existing jobs are finished */
536 while (thisjob || (!partP && (numjobs > 0))) {
537 startjob = -1; /* No new job to start */
538
539 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
540 /* Either the max jobs are running or we have to wait for all
541 * the jobs to finish. In either case, we wait for at least one
542 * job to finish. When it's done, clean up after it.
543 */
544 pid = wait(&wstatus);
545 opr_Assert(pid != -1);
546 for (j = 0; j < numjobs; j++) { /* Find which job it is */
547 if (pid == jobs[j]->pid)
548 break;
549 }
550 opr_Assert(j < numjobs);
551 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
552 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
553 }
554
555 numjobs--; /* job no longer running */
556 oldjob = jobs[j]; /* remember */
557 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
558 free(oldjob); /* free the old job */
559
560 /* If there is another partition on the disk to salvage, then
561 * say we will start it (startjob). If not, then put thisjob there
562 * and say we will start it.
563 */
564 if (jobs[j]) { /* Another partitions to salvage */
565 startjob = j; /* Will start it */
566 } else { /* There is not another partition to salvage */
567 if (thisjob) {
568 jobs[j] = thisjob; /* Add thisjob */
569 thisjob = 0;
570 startjob = j; /* Will start it */
571 } else {
572 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
573 startjob = -1; /* Don't start it - already running */
574 }
575 }
576 } else {
577 /* We don't have to wait for a job to complete */
578 if (thisjob) {
579 jobs[numjobs] = thisjob; /* Add this job */
580 thisjob = 0;
581 startjob = numjobs; /* Will start it */
582 }
583 }
584
585 /* Start up a new salvage job on a partition in job slot "startjob" */
586 if (startjob != -1) {
587 if (!Showmode)
588 Log("Starting salvage of file system partition %s\n",
589 jobs[startjob]->partP->name);
590 #ifdef AFS_NT40_ENV
591 /* For NT, we not only fork, but re-exec the salvager. Pass in the
592 * commands and pass the child job number via the data path.
593 */
594 pid =
595 nt_SalvagePartition(jobs[startjob]->partP->name,
596 jobs[startjob]->jobnumb);
597 jobs[startjob]->pid = pid;
598 numjobs++;
599 #else
600 pid = Fork();
601 if (pid) {
602 jobs[startjob]->pid = pid;
603 numjobs++;
604 } else {
605 int fd;
606 char *filename;
607 struct logOptions logopts;
608
609 memset(&logopts, 0, sizeof(logopts));
610 logopts.lopt_dest = logDest_file;
611
612 for (fd = 0; fd < 16; fd++)
613 close(fd);
614 open(OS_DIRSEP, 0);
615 dup2(0, 1);
616 dup2(0, 2);
617
618 ShowLog = 0; /* Child processes do not display. */
619 if (asprintf(&filename, "%s.%d",
620 AFSDIR_SERVER_SLVGLOG_FILEPATH,
621 jobs[startjob]->jobnumb) >= 0) {
622 logopts.lopt_filename = filename;
623 OpenLog(&logopts);
624 free(filename);
625 }
626
627 SalvageFileSys1(jobs[startjob]->partP, 0);
628 Exit(0);
629 }
630 #endif
631 }
632 } /* while ( thisjob || (!partP && numjobs > 0) ) */
633
634 /*
635 * If waited for all jobs to complete, now collect log files and return.
636 * No files can be collected when logging to the system log (syslog).
637 */
638 if (GetLogDest() == logDest_file) {
639 if (!partP) {
640 char *buf = calloc(1, SALV_BUFFER_SIZE);
641 char *logFileName;
642
643 if (buf == NULL) {
644 Log("out of memory");
645 } else {
646 for (i = 0; i < jobcount; i++) {
647 if (asprintf(&logFileName, "%s.%d",
648 AFSDIR_SERVER_SLVGLOG_FILEPATH, i) < 0) {
649 Log("out of memory");
650 break;
651 }
652 if ((passLog = afs_fopen(logFileName, "r"))) {
653 while (fgets(buf, SALV_BUFFER_SIZE, passLog)) {
654 WriteLogBuffer(buf, strlen(buf));
655 }
656 fclose(passLog);
657 }
658 (void)unlink(logFileName);
659 free(logFileName);
660 }
661 free(buf);
662 }
663 }
664 }
665 return;
666 }
667
668
669 void
670 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
671 {
672 if (!canfork || debug || Fork() == 0) {
673 SalvageFileSys1(partP, singleVolumeNumber);
674 if (canfork && !debug) {
675 QuietExit(0);
676 }
677 } else
678 Wait("SalvageFileSys");
679 }
680
681 char *
682 get_DevName(char *pbuffer, char *wpath)
683 {
684 char pbuf[128], *ptr;
685 strcpy(pbuf, pbuffer);
686 ptr = (char *)strrchr(pbuf, OS_DIRSEPC);
687 if (ptr) {
688 *ptr = '\0';
689 strcpy(wpath, pbuf);
690 } else
691 return NULL;
692 ptr = (char *)strrchr(pbuffer, OS_DIRSEPC);
693 if (ptr) {
694 strcpy(pbuffer, ptr + 1);
695 return pbuffer;
696 } else
697 return NULL;
698 }
699
700 void
701 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
702 {
703 char *name, *tdir;
704 char inodeListPath[256];
705 FD_t inodeFile = INVALID_FD;
706 static char tmpDevName[100];
707 static char wpath[100];
708 struct VolumeSummary *vsp, *esp;
709 int i, j;
710 int code;
711 int tries = 0;
712 struct SalvInfo l_salvinfo;
713 struct SalvInfo *salvinfo = &l_salvinfo;
714
715 retry:
716 memset(salvinfo, 0, sizeof(*salvinfo));
717
718 tries++;
719 if (inodeFile != INVALID_FD) {
720 OS_CLOSE(inodeFile);
721 inodeFile = INVALID_FD;
722 }
723 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
724 Abort("Raced too many times with fileserver restarts while trying to "
725 "checkout/lock volumes; Aborted\n");
726 }
727 #ifdef AFS_DEMAND_ATTACH_FS
728 if (tries > 1) {
729 /* unlock all previous volume locks, since we're about to lock them
730 * again */
731 VLockFileReinit(&partP->volLockFile);
732 }
733 #endif /* AFS_DEMAND_ATTACH_FS */
734
735 salvinfo->fileSysPartition = partP;
736 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
737 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
738
739 #ifdef AFS_NT40_ENV
740 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
741 (void)sprintf(salvinfo->fileSysPath, "%s" OS_DIRSEP, salvinfo->fileSysPathName);
742 name = partP->devName;
743 #else
744 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
745 strcpy(tmpDevName, partP->devName);
746 name = get_DevName(tmpDevName, wpath);
747 salvinfo->fileSysDeviceName = name;
748 salvinfo->filesysfulldev = wpath;
749 #endif
750
751 if (singleVolumeNumber) {
752 #ifndef AFS_DEMAND_ATTACH_FS
753 /* only non-DAFS locks the partition when salvaging a single volume;
754 * DAFS will lock the individual volumes in the VG */
755 VLockPartition(partP->name);
756 #endif /* !AFS_DEMAND_ATTACH_FS */
757
758 ForceSalvage = 1;
759
760 /* salvageserver already setup fssync conn for us */
761 if ((programType != salvageServer) && !VConnectFS()) {
762 Abort("Couldn't connect to file server\n");
763 }
764
765 salvinfo->useFSYNC = 1;
766 AskOffline(salvinfo, singleVolumeNumber);
767 #ifdef AFS_DEMAND_ATTACH_FS
768 if (LockVolume(salvinfo, singleVolumeNumber)) {
769 goto retry;
770 }
771 #endif /* AFS_DEMAND_ATTACH_FS */
772
773 } else {
774 salvinfo->useFSYNC = 0;
775 VLockPartition(partP->name);
776 if (ForceSalvage) {
777 ForceSalvage = 1;
778 } else {
779 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
780 }
781 if (!Showmode)
782 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
783 partP->name, name, (Testing ? "(READONLY mode)" : ""));
784 if (ForceSalvage)
785 Log("***Forced salvage of all volumes on this partition***\n");
786 }
787
788
789 /*
790 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
791 * files
792 */
793 {
794 DIR *dirp;
795 struct dirent *dp;
796
797 opr_Verify((dirp = opendir(salvinfo->fileSysPath)) != NULL);
798 while ((dp = readdir(dirp))) {
799 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
800 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
801 char npath[1024];
802 Log("Removing old salvager temp files %s\n", dp->d_name);
803 strcpy(npath, salvinfo->fileSysPath);
804 strcat(npath, OS_DIRSEP);
805 strcat(npath, dp->d_name);
806 OS_UNLINK(npath);
807 }
808 }
809 closedir(dirp);
810 }
811 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
812 #ifdef AFS_NT40_ENV
813 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
814 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
815 #else
816 snprintf(inodeListPath, 255, "%s" OS_DIRSEP "salvage.inodes.%s.%d", tdir, name,
817 getpid());
818 #endif
819
820 inodeFile = OS_OPEN(inodeListPath, O_RDWR|O_TRUNC|O_CREAT, 0666);
821 if (inodeFile == INVALID_FD) {
822 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
823 }
824 #ifdef AFS_NT40_ENV
825 /* Using nt_unlink here since we're really using the delete on close
826 * semantics of unlink. In most places in the salvager, we really do
827 * mean to unlink the file at that point. Those places have been
828 * modified to actually do that so that the NT crt can be used there.
829 *
830 * jaltman - On NT delete on close cannot be applied to a file while the
831 * process has an open file handle that does not have DELETE file
832 * access and FILE_SHARE_DELETE. fopen() calls CreateFile() without
833 * delete privileges. As a result the nt_unlink() call will always
834 * fail.
835 */
836 code = nt_unlink(inodeListPath);
837 #else
838 code = unlink(inodeListPath);
839 #endif
840 if (code < 0) {
841 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
842 }
843
844 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
845 OS_CLOSE(inodeFile);
846 return;
847 }
848 salvinfo->inodeFd = inodeFile;
849 if (salvinfo->inodeFd == INVALID_FD)
850 Abort("Temporary file %s is missing...\n", inodeListPath);
851 OS_SEEK(salvinfo->inodeFd, 0L, SEEK_SET);
852 if (ListInodeOption) {
853 PrintInodeList(salvinfo);
854 if (singleVolumeNumber) {
855 /* We've checked out the volume from the fileserver, and we need
856 * to give it back. We don't know if the volume exists or not,
857 * so we don't know whether to AskOnline or not. Try to determine
858 * if the volume exists by trying to read the volume header, and
859 * AskOnline if it is readable. */
860 MaybeAskOnline(salvinfo, singleVolumeNumber);
861 }
862 return;
863 }
864 /* enumerate volumes in the partition.
865 * figure out sets of read-only + rw volumes.
866 * salvage each set, read-only volumes first, then read-write.
867 * Fix up inodes on last volume in set (whether it is read-write
868 * or read-only).
869 */
870 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
871 goto retry;
872 }
873
874 if (singleVolumeNumber) {
875 /* If we delete a volume during the salvage, we indicate as such by
876 * setting the volsummary->deleted field. We need to know if we
877 * deleted a volume or not in order to know which volumes to bring
878 * back online after the salvage. If we fork, we will lose this
879 * information, since volsummary->deleted will not get set in the
880 * parent. So, don't fork. */
881 canfork = 0;
882 }
883
884 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
885 i < salvinfo->nVolumesInInodeFile; i = j) {
886 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
887 for (j = i;
888 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
889 j++) {
890 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
891 struct VolumeSummary *tsp;
892 /* Scan volume list (from partition root directory) looking for the
893 * current rw volume number in the volume list from the inode scan.
894 * If there is one here that is not in the inode volume list,
895 * delete it now. */
896 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
897 if (vsp->unused)
898 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
899 }
900 /* Now match up the volume summary info from the root directory with the
901 * entry in the volume list obtained from scanning inodes */
902 salvinfo->inodeSummary[j].volSummary = NULL;
903 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
904 if (tsp->header.id == vid) {
905 salvinfo->inodeSummary[j].volSummary = tsp;
906 tsp->unused = 0;
907 break;
908 }
909 }
910 }
911 /* Salvage the group of volumes (several read-only + 1 read/write)
912 * starting with the current read-only volume we're looking at.
913 */
914 #ifdef AFS_NT40_ENV
915 nt_SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
916 #else
917 DoSalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
918 #endif /* AFS_NT40_ENV */
919
920 }
921
922 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
923 for (; vsp < esp; vsp++) {
924 if (vsp->unused)
925 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
926 }
927
928 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
929 RemoveTheForce(salvinfo->fileSysPath);
930
931 if (!Testing && singleVolumeNumber) {
932 int foundSVN = 0;
933 #ifdef AFS_DEMAND_ATTACH_FS
934 /* unlock vol headers so the fs can attach them when we AskOnline */
935 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
936 #endif /* AFS_DEMAND_ATTACH_FS */
937
938 /* Step through the volumeSummary list and set all volumes on-line.
939 * Most volumes were taken off-line in GetVolumeSummary.
940 * If a volume was deleted, don't tell the fileserver anything, since
941 * we already told the fileserver the volume was deleted back when we
942 * we destroyed the volume header.
943 * Also, make sure we bring the singleVolumeNumber back online first.
944 */
945
946 for (j = 0; j < salvinfo->nVolumes; j++) {
947 if (salvinfo->volumeSummaryp[j].header.id == singleVolumeNumber) {
948 foundSVN = 1;
949 if (!salvinfo->volumeSummaryp[j].deleted) {
950 AskOnline(salvinfo, singleVolumeNumber);
951 }
952 }
953 }
954
955 if (!foundSVN) {
956 /* If singleVolumeNumber is not in our volumeSummary, it means that
957 * at least one other volume in the VG is on the partition, but the
958 * RW volume is not. We've already AskOffline'd it by now, though,
959 * so make sure we don't still have the volume checked out. */
960 AskDelete(salvinfo, singleVolumeNumber);
961 }
962
963 for (j = 0; j < salvinfo->nVolumes; j++) {
964 if (salvinfo->volumeSummaryp[j].header.id != singleVolumeNumber) {
965 if (!salvinfo->volumeSummaryp[j].deleted) {
966 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
967 }
968 }
969 }
970 } else {
971 if (!Showmode)
972 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
973 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
974 }
975
976 OS_CLOSE(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
977 }
978
979 void
980 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
981 {
982 char path[64];
983 char filename[VMAXPATHLEN];
984
985 if (vsp->deleted) {
986 return;
987 }
988
989 VolumeExternalName_r(vsp->header.id, filename, sizeof(filename));
990 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, filename);
991
992 if (!Showmode)
993 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
994 if (!Testing) {
995 afs_int32 code;
996 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
997 if (code) {
998 Log("Error %ld destroying volume disk header for volume %" AFS_VOLID_FMT "\n",
999 afs_printable_int32_ld(code),
1000 afs_printable_VolumeId_lu(vsp->header.id));
1001 }
1002
1003 /* make sure we actually delete the header file; ENOENT
1004 * is fine, since VDestroyVolumeDiskHeader probably already
1005 * unlinked it */
1006 if (unlink(path) && errno != ENOENT) {
1007 Log("Unable to unlink %s (errno = %d)\n", path, errno);
1008 }
1009 if (salvinfo->useFSYNC) {
1010 AskDelete(salvinfo, vsp->header.id);
1011 }
1012 vsp->deleted = 1;
1013 }
1014 }
1015
1016 int
1017 CompareInodes(const void *_p1, const void *_p2)
1018 {
1019 const struct ViceInodeInfo *p1 = _p1;
1020 const struct ViceInodeInfo *p2 = _p2;
1021 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1022 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
1023 VolumeId p1rwid, p2rwid;
1024 p1rwid =
1025 (p1->u.vnode.vnodeNumber ==
1026 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
1027 p2rwid =
1028 (p2->u.vnode.vnodeNumber ==
1029 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
1030 if (p1rwid < p2rwid)
1031 return -1;
1032 if (p1rwid > p2rwid)
1033 return 1;
1034 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1035 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
1036 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
1037 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1038 if (p1->u.vnode.volumeId == p1rwid)
1039 return -1;
1040 if (p2->u.vnode.volumeId == p2rwid)
1041 return 1;
1042 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1043 }
1044 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1045 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1046 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1047 }
1048 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1049 return -1;
1050 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1051 return 1;
1052 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1053 return -1;
1054 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1055 return 1;
1056 /* The following tests are reversed, so that the most desirable
1057 * of several similar inodes comes first */
1058 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1059 #ifdef AFS_3DISPARES
1060 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1061 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1062 return 1;
1063 #endif
1064 #ifdef AFS_SGI_EXMAG
1065 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1066 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1067 return 1;
1068 #endif
1069 return -1;
1070 }
1071 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1072 #ifdef AFS_3DISPARES
1073 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1074 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1075 return -1;
1076 #endif
1077 #ifdef AFS_SGI_EXMAG
1078 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1079 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1080 return 1;
1081 #endif
1082 return 1;
1083 }
1084 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1085 #ifdef AFS_3DISPARES
1086 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1087 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1088 return 1;
1089 #endif
1090 #ifdef AFS_SGI_EXMAG
1091 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1092 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1093 return 1;
1094 #endif
1095 return -1;
1096 }
1097 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1098 #ifdef AFS_3DISPARES
1099 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1100 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1101 return -1;
1102 #endif
1103 #ifdef AFS_SGI_EXMAG
1104 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1105 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1106 return 1;
1107 #endif
1108 return 1;
1109 }
1110 return 0;
1111 }
1112
1113 void
1114 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1115 struct InodeSummary *summary)
1116 {
1117 VolumeId volume = ip->u.vnode.volumeId;
1118 VolumeId rwvolume = volume;
1119 int n, nSpecial;
1120 Unique maxunique;
1121 n = nSpecial = 0;
1122 maxunique = 0;
1123 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1124 n++;
1125 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1126 nSpecial++;
1127 rwvolume = ip->u.special.parentId;
1128 /* This isn't quite right, as there could (in error) be different
1129 * parent inodes in different special vnodes */
1130 } else {
1131 if (maxunique < ip->u.vnode.vnodeUniquifier)
1132 maxunique = ip->u.vnode.vnodeUniquifier;
1133 }
1134 ip++;
1135 }
1136 summary->volumeId = volume;
1137 summary->RWvolumeId = rwvolume;
1138 summary->nInodes = n;
1139 summary->nSpecialInodes = nSpecial;
1140 summary->maxUniquifier = maxunique;
1141 }
1142
1143 int
1144 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, VolumeId singleVolumeNumber, void *rock)
1145 {
1146 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1147 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1148 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1149 }
1150
1151 /* GetInodeSummary
1152 *
1153 * Collect list of inodes in file named by path. If a truly fatal error,
1154 * unlink the file and abort. For lessor errors, return -1. The file will
1155 * be unlinked by the caller.
1156 */
1157 int
1158 GetInodeSummary(struct SalvInfo *salvinfo, FD_t inodeFile, VolumeId singleVolumeNumber)
1159 {
1160 int forceSal, err;
1161 int code;
1162 struct ViceInodeInfo *ip, *ip_save;
1163 struct InodeSummary summary;
1164 char summaryFileName[50];
1165 FD_t summaryFile = INVALID_FD;
1166 #ifdef AFS_NT40_ENV
1167 char *dev = salvinfo->fileSysPath;
1168 char *wpath = salvinfo->fileSysPath;
1169 #else
1170 char *dev = salvinfo->fileSysDeviceName;
1171 char *wpath = salvinfo->filesysfulldev;
1172 #endif
1173 char *part = salvinfo->fileSysPath;
1174 char *tdir;
1175 int i;
1176 int retcode = 0;
1177 int deleted = 0;
1178 afs_sfsize_t st_size;
1179
1180 /* This file used to come from vfsck; cobble it up ourselves now... */
1181 if ((err =
1182 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1183 singleVolumeNumber ? OnlyOneVolume : 0,
1184 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1185 if (err == -2) {
1186 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1187 retcode = -1;
1188 goto error;
1189 }
1190 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1191 }
1192 if (forceSal && !ForceSalvage) {
1193 Log("***Forced salvage of all volumes on this partition***\n");
1194 ForceSalvage = 1;
1195 }
1196 OS_SEEK(inodeFile, 0L, SEEK_SET);
1197 salvinfo->inodeFd = inodeFile;
1198 if (salvinfo->inodeFd == INVALID_FD ||
1199 (st_size = OS_SIZE(salvinfo->inodeFd)) == -1) {
1200 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1201 }
1202 tdir = (tmpdir ? tmpdir : part);
1203 #ifdef AFS_NT40_ENV
1204 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1205 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp."));
1206 #else
1207 snprintf(summaryFileName, sizeof summaryFileName,
1208 "%s" OS_DIRSEP "salvage.temp.%d", tdir, getpid());
1209 #endif
1210 summaryFile = OS_OPEN(summaryFileName, O_RDWR|O_APPEND|O_CREAT, 0666);
1211 if (summaryFile == INVALID_FD) {
1212 Abort("Unable to create inode summary file\n");
1213 }
1214
1215 #ifdef AFS_NT40_ENV
1216 /* Using nt_unlink here since we're really using the delete on close
1217 * semantics of unlink. In most places in the salvager, we really do
1218 * mean to unlink the file at that point. Those places have been
1219 * modified to actually do that so that the NT crt can be used there.
1220 *
1221 * jaltman - As commented elsewhere, this cannot work because fopen()
1222 * does not open files with DELETE and FILE_SHARE_DELETE.
1223 */
1224 code = nt_unlink(summaryFileName);
1225 #else
1226 code = unlink(summaryFileName);
1227 #endif
1228 if (code < 0) {
1229 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1230 }
1231
1232 if (!canfork || debug || Fork() == 0) {
1233 int nInodes = st_size / sizeof(struct ViceInodeInfo);
1234 if (nInodes == 0) {
1235 OS_CLOSE(summaryFile);
1236 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1237 RemoveTheForce(salvinfo->fileSysPath);
1238 else {
1239 struct VolumeSummary *vsp;
1240 int i;
1241 int foundSVN = 0;
1242
1243 GetVolumeSummary(salvinfo, singleVolumeNumber);
1244
1245 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1246 if (vsp->unused) {
1247 if (vsp->header.id == singleVolumeNumber) {
1248 foundSVN = 1;
1249 }
1250 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1251 }
1252 }
1253
1254 if (!foundSVN) {
1255 if (Testing) {
1256 MaybeAskOnline(salvinfo, singleVolumeNumber);
1257 } else {
1258 /* make sure we get rid of stray .vol headers, even if
1259 * they're not in our volume summary (might happen if
1260 * e.g. something else created them and they're not in the
1261 * fileserver VGC) */
1262 VDestroyVolumeDiskHeader(salvinfo->fileSysPartition,
1263 singleVolumeNumber, 0 /*parent*/);
1264 AskDelete(salvinfo, singleVolumeNumber);
1265 }
1266 }
1267 }
1268 Log("%s vice inodes on %s; not salvaged\n",
1269 singleVolumeNumber ? "No applicable" : "No", dev);
1270 retcode = -1;
1271 deleted = 1;
1272 goto error;
1273 }
1274 ip = malloc(nInodes*sizeof(struct ViceInodeInfo));
1275 if (ip == NULL) {
1276 OS_CLOSE(summaryFile);
1277 Abort
1278 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1279 dev);
1280 }
1281 if (OS_READ(salvinfo->inodeFd, ip, st_size) != st_size) {
1282 OS_CLOSE(summaryFile);
1283 Abort("Unable to read inode table; %s not salvaged\n", dev);
1284 }
1285 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1286 if (OS_SEEK(salvinfo->inodeFd, 0, SEEK_SET) == -1
1287 || OS_WRITE(salvinfo->inodeFd, ip, st_size) != st_size) {
1288 OS_CLOSE(summaryFile);
1289 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1290 }
1291 summary.index = 0;
1292 ip_save = ip;
1293 while (nInodes) {
1294 CountVolumeInodes(ip, nInodes, &summary);
1295 if (OS_WRITE(summaryFile, &summary, sizeof(summary)) != sizeof(summary)) {
1296 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1297 OS_CLOSE(summaryFile);
1298 retcode = -1;
1299 goto error;
1300 }
1301 summary.index += (summary.nInodes);
1302 nInodes -= summary.nInodes;
1303 ip += summary.nInodes;
1304 }
1305 free(ip_save);
1306 ip = ip_save = NULL;
1307 /* Following fflush is not fclose, because if it was debug mode would not work */
1308 if (OS_SYNC(summaryFile) == -1) {
1309 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1310 OS_CLOSE(summaryFile);
1311 retcode = -1;
1312 goto error;
1313 }
1314 if (canfork && !debug) {
1315 QuietExit(0);
1316 }
1317 } else {
1318 if (Wait("Inode summary") == -1) {
1319 OS_CLOSE(summaryFile);
1320 Exit(1); /* salvage of this partition aborted */
1321 }
1322 }
1323
1324 st_size = OS_SIZE(summaryFile);
1325 opr_Assert(st_size >= 0);
1326 if (st_size != 0) {
1327 int ret;
1328 salvinfo->inodeSummary = malloc(st_size);
1329 opr_Assert(salvinfo->inodeSummary != NULL);
1330 /* For GNU we need to do lseek to get the file pointer moved. */
1331 opr_Assert(OS_SEEK(summaryFile, 0, SEEK_SET) == 0);
1332 ret = OS_READ(summaryFile, salvinfo->inodeSummary, st_size);
1333 opr_Assert(ret == st_size);
1334 }
1335 salvinfo->nVolumesInInodeFile = st_size / sizeof(struct InodeSummary);
1336 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1337 salvinfo->inodeSummary[i].volSummary = NULL;
1338 }
1339 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)st_size);
1340 OS_CLOSE(summaryFile);
1341
1342 error:
1343 if (retcode && singleVolumeNumber && !deleted) {
1344 AskError(salvinfo, singleVolumeNumber);
1345 }
1346
1347 return retcode;
1348 }
1349
1350 /* Comparison routine for volume sort.
1351 This is setup so that a read-write volume comes immediately before
1352 any read-only clones of that volume */
1353 int
1354 CompareVolumes(const void *_p1, const void *_p2)
1355 {
1356 const struct VolumeSummary *p1 = _p1;
1357 const struct VolumeSummary *p2 = _p2;
1358 if (p1->header.parent != p2->header.parent)
1359 return p1->header.parent < p2->header.parent ? -1 : 1;
1360 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1361 return -1;
1362 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1363 return 1;
1364 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1365 }
1366
1367 /**
1368 * Gleans volumeSummary information by asking the fileserver
1369 *
1370 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1371 * salvaging a whole partition
1372 *
1373 * @return whether we obtained the volume summary information or not
1374 * @retval 0 success; we obtained the volume summary information
1375 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1376 * must be retried
1377 * @retval 1 we did not get the volume summary information; either the
1378 * fileserver responded with an error, or we are not supposed to
1379 * ask the fileserver for the information (e.g. we are salvaging
1380 * the entire partition or we are not the salvageserver)
1381 *
1382 * @note for non-DAFS, always returns 1
1383 */
1384 static int
1385 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1386 {
1387 afs_int32 code = 1;
1388 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1389 if (programType == salvageServer) {
1390 if (singleVolumeNumber) {
1391 FSSYNC_VGQry_response_t q_res;
1392 SYNC_response res;
1393 struct VolumeSummary *vsp;
1394 int i;
1395 struct VolumeDiskHeader diskHdr;
1396
1397 memset(&res, 0, sizeof(res));
1398
1399 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1400
1401 /*
1402 * We must wait for the partition to finish scanning before
1403 * can continue, since we will not know if we got the entire
1404 * VG membership unless the partition is fully scanned.
1405 * We could, in theory, just scan the partition ourselves if
1406 * the VG cache is not ready, but we would be doing the exact
1407 * same scan the fileserver is doing; it will almost always
1408 * be faster to wait for the fileserver. The only exceptions
1409 * are if the partition does not take very long to scan, and
1410 * in that case it's fast either way, so who cares?
1411 */
1412 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1413 Log("waiting for fileserver to finish scanning partition %s...\n",
1414 salvinfo->fileSysPartition->name);
1415
1416 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1417 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1418 * just so small partitions don't need to wait over 10
1419 * seconds every time, and large partitions are generally
1420 * polled only once every ten seconds. */
1421 sleep((i > 10) ? (i = 10) : i);
1422
1423 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1424 }
1425 }
1426
1427 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1428 /* This can happen if there's no header for the volume
1429 * we're salvaging, or no headers exist for the VG (if
1430 * we're salvaging an RW). Act as if we got a response
1431 * with no VG members. The headers may be created during
1432 * salvaging, if there are inodes in this VG. */
1433 code = 0;
1434 memset(&q_res, 0, sizeof(q_res));
1435 q_res.rw = singleVolumeNumber;
1436 }
1437
1438 if (code) {
1439 Log("fileserver refused VGCQuery request for volume %" AFS_VOLID_FMT " on "
1440 "partition %s, code %ld reason %ld\n",
1441 afs_printable_VolumeId_lu(singleVolumeNumber),
1442 salvinfo->fileSysPartition->name,
1443 afs_printable_int32_ld(code),
1444 afs_printable_int32_ld(res.hdr.reason));
1445 goto done;
1446 }
1447
1448 if (q_res.rw != singleVolumeNumber) {
1449 Log("fileserver requested salvage of clone %" AFS_VOLID_FMT "; scheduling salvage of volume group %" AFS_VOLID_FMT "...\n",
1450 afs_printable_VolumeId_lu(singleVolumeNumber),
1451 afs_printable_VolumeId_lu(q_res.rw));
1452 #ifdef SALVSYNC_BUILD_CLIENT
1453 if (SALVSYNC_LinkVolume(q_res.rw,
1454 singleVolumeNumber,
1455 salvinfo->fileSysPartition->name,
1456 NULL) != SYNC_OK) {
1457 Log("schedule request failed\n");
1458 }
1459 #endif /* SALVSYNC_BUILD_CLIENT */
1460 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1461 }
1462
1463 salvinfo->volumeSummaryp = calloc(VOL_VG_MAX_VOLS, sizeof(struct VolumeSummary));
1464 opr_Assert(salvinfo->volumeSummaryp != NULL);
1465
1466 salvinfo->nVolumes = 0;
1467 vsp = salvinfo->volumeSummaryp;
1468
1469 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1470 char name[VMAXPATHLEN];
1471
1472 if (!q_res.children[i]) {
1473 continue;
1474 }
1475
1476 /* AskOffline for singleVolumeNumber was called much earlier */
1477 if (q_res.children[i] != singleVolumeNumber) {
1478 AskOffline(salvinfo, q_res.children[i]);
1479 if (LockVolume(salvinfo, q_res.children[i])) {
1480 /* need to retry */
1481 return -1;
1482 }
1483 }
1484
1485 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1486 if (code) {
1487 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1488 afs_printable_uint32_lu(q_res.children[i]));
1489 code = 0;
1490 continue;
1491 }
1492
1493 DiskToVolumeHeader(&vsp->header, &diskHdr);
1494 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1495 vsp->unused = 1;
1496 salvinfo->nVolumes++;
1497 vsp++;
1498 }
1499
1500 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1501 CompareVolumes);
1502 }
1503 done:
1504 if (code) {
1505 Log("Cannot get volume summary from fileserver; falling back to scanning "
1506 "entire partition\n");
1507 }
1508 }
1509 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1510 return code;
1511 }
1512
1513 /**
1514 * count how many volume headers are found by VWalkVolumeHeaders.
1515 *
1516 * @param[in] dp the disk partition (unused)
1517 * @param[in] name full path to the .vol header (unused)
1518 * @param[in] hdr the header data (unused)
1519 * @param[in] last whether this is the last try or not (unused)
1520 * @param[in] rock actually an afs_int32*; the running count of how many
1521 * volumes we have found
1522 *
1523 * @retval 0 always
1524 */
1525 static int
1526 CountHeader(struct DiskPartition64 *dp, const char *name,
1527 struct VolumeDiskHeader *hdr, int last, void *rock)
1528 {
1529 afs_int32 *nvols = (afs_int32 *)rock;
1530 (*nvols)++;
1531 return 0;
1532 }
1533
1534 /**
1535 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1536 * data.
1537 */
1538 struct SalvageScanParams {
1539 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1540 * vol id of the VG we're salvaging */
1541 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1542 * we're filling in */
1543 afs_int32 nVolumes; /**< # of vols we've encountered */
1544 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1545 * # of vols we've alloc'd memory for) */
1546 int retry; /**< do we need to retry vol lock/checkout? */
1547 struct SalvInfo *salvinfo; /**< salvage job info */
1548 };
1549
1550 /**
1551 * records volume summary info found from VWalkVolumeHeaders.
1552 *
1553 * Found volumes are also taken offline if they are in the specific volume
1554 * group we are looking for.
1555 *
1556 * @param[in] dp the disk partition
1557 * @param[in] name full path to the .vol header
1558 * @param[in] hdr the header data
1559 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1560 * @param[in] rock actually a struct SalvageScanParams*, containing the
1561 * information needed to record the volume summary data
1562 *
1563 * @return operation status
1564 * @retval 0 success
1565 * @retval -1 volume locking raced with fileserver restart; checking out
1566 * and locking volumes needs to be retried
1567 * @retval 1 volume header is mis-named and should be deleted
1568 */
1569 static int
1570 RecordHeader(struct DiskPartition64 *dp, const char *name,
1571 struct VolumeDiskHeader *hdr, int last, void *rock)
1572 {
1573 char nameShouldBe[64];
1574 struct SalvageScanParams *params;
1575 struct VolumeSummary summary;
1576 VolumeId singleVolumeNumber;
1577 struct SalvInfo *salvinfo;
1578
1579 params = (struct SalvageScanParams *)rock;
1580
1581 memset(&summary, 0, sizeof(summary));
1582
1583 singleVolumeNumber = params->singleVolumeNumber;
1584 salvinfo = params->salvinfo;
1585
1586 DiskToVolumeHeader(&summary.header, hdr);
1587
1588 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1589 && summary.header.parent != singleVolumeNumber) {
1590
1591 if (programType == salvageServer) {
1592 #ifdef SALVSYNC_BUILD_CLIENT
1593 Log("fileserver requested salvage of clone %" AFS_VOLID_FMT "; scheduling salvage of volume group %" AFS_VOLID_FMT "...\n",
1594 afs_printable_VolumeId_lu(summary.header.id),
1595 afs_printable_VolumeId_lu(summary.header.parent));
1596 if (SALVSYNC_LinkVolume(summary.header.parent,
1597 summary.header.id,
1598 dp->name,
1599 NULL) != SYNC_OK) {
1600 Log("schedule request failed\n");
1601 }
1602 #endif
1603 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1604
1605 } else {
1606 Log("%" AFS_VOLID_FMT " is a read-only volume; not salvaged\n",
1607 afs_printable_VolumeId_lu(singleVolumeNumber));
1608 Exit(1);
1609 }
1610 }
1611
1612 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1613 || summary.header.parent == singleVolumeNumber) {
1614
1615 /* check if the header file is incorrectly named */
1616 int badname = 0;
1617 const char *base = strrchr(name, OS_DIRSEPC);
1618 if (base) {
1619 base++;
1620 } else {
1621 base = name;
1622 }
1623
1624 snprintf(nameShouldBe, sizeof nameShouldBe,
1625 VFORMAT, afs_printable_VolumeId_lu(summary.header.id));
1626
1627
1628 if (strcmp(nameShouldBe, base)) {
1629 /* .vol file has wrong name; retry/delete */
1630 badname = 1;
1631 }
1632
1633 if (!badname || last) {
1634 /* only offline the volume if the header is good, or if this is
1635 * the last try looking at it; avoid AskOffline'ing the same vol
1636 * multiple times */
1637
1638 if (singleVolumeNumber
1639 && summary.header.id != singleVolumeNumber) {
1640 /* don't offline singleVolumeNumber; we already did that
1641 * earlier */
1642
1643 AskOffline(salvinfo, summary.header.id);
1644
1645 #ifdef AFS_DEMAND_ATTACH_FS
1646 if (!badname) {
1647 /* don't lock the volume if the header is bad, since we're
1648 * about to delete it anyway. */
1649 if (LockVolume(salvinfo, summary.header.id)) {
1650 params->retry = 1;
1651 return -1;
1652 }
1653 }
1654 #endif /* AFS_DEMAND_ATTACH_FS */
1655 }
1656 }
1657 if (badname) {
1658 if (last && !Showmode) {
1659 Log("Volume header file %s is incorrectly named (should be %s "
1660 "not %s); %sdeleted (it will be recreated later, if "
1661 "necessary)\n", name, nameShouldBe, base,
1662 (Testing ? "it would have been " : ""));
1663 }
1664 return 1;
1665 }
1666
1667 summary.unused = 1;
1668 params->nVolumes++;
1669
1670 if (params->nVolumes > params->totalVolumes) {
1671 /* We found more volumes than we found on the first partition walk;
1672 * apparently something created a volume while we were
1673 * partition-salvaging, or we found more than 20 vols when salvaging a
1674 * particular volume. Abort if we detect this, since other programs
1675 * supposed to not touch the partition while it is partition-salvaging,
1676 * and we shouldn't find more than 20 vols in a VG.
1677 */
1678 Abort("Found %ld vol headers, but should have found at most %ld! "
1679 "Make sure the volserver/fileserver are not running at the "
1680 "same time as a partition salvage\n",
1681 afs_printable_int32_ld(params->nVolumes),
1682 afs_printable_int32_ld(params->totalVolumes));
1683 }
1684
1685 memcpy(params->vsp, &summary, sizeof(summary));
1686 params->vsp++;
1687 }
1688
1689 return 0;
1690 }
1691
1692 /**
1693 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1694 *
1695 * If the header could not be read in at all, the header is always unlinked.
1696 * If instead RecordHeader said the header was bad (that is, the header file
1697 * is mis-named), we only unlink if we are doing a partition salvage, as
1698 * opposed to salvaging a specific volume group.
1699 *
1700 * @param[in] dp the disk partition
1701 * @param[in] name full path to the .vol header
1702 * @param[in] hdr header data, or NULL if the header could not be read
1703 * @param[in] rock actually a struct SalvageScanParams*, with some information
1704 * about the scan
1705 */
1706 static void
1707 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1708 struct VolumeDiskHeader *hdr, void *rock)
1709 {
1710 struct SalvageScanParams *params;
1711 int dounlink = 0;
1712
1713 params = (struct SalvageScanParams *)rock;
1714
1715 if (!hdr) {
1716 /* no header; header is too bogus to read in at all */
1717 if (!Showmode) {
1718 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1719 }
1720 if (!Testing) {
1721 dounlink = 1;
1722 }
1723
1724 } else if (!params->singleVolumeNumber) {
1725 /* We were able to read in a header, but RecordHeader said something
1726 * was wrong with it. We only unlink those if we are doing a partition
1727 * salvage. */
1728 if (!Testing) {
1729 dounlink = 1;
1730 }
1731 }
1732
1733 if (dounlink && unlink(name)) {
1734 Log("Error %d while trying to unlink %s\n", errno, name);
1735 }
1736 }
1737
1738 /**
1739 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1740 * the fileserver for VG information, or by scanning the /vicepX partition.
1741 *
1742 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1743 * are salvaging, or 0 if this is a partition
1744 * salvage
1745 *
1746 * @return operation status
1747 * @retval 0 success
1748 * @retval -1 we raced with a fileserver restart; checking out and locking
1749 * volumes must be retried
1750 */
1751 int
1752 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1753 {
1754 afs_int32 nvols = 0;
1755 struct SalvageScanParams params;
1756 int code;
1757
1758 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1759 if (code == 0) {
1760 /* we successfully got the vol information from the fileserver; no
1761 * need to scan the partition */
1762 return 0;
1763 }
1764 if (code < 0) {
1765 /* we need to retry volume checkout */
1766 return code;
1767 }
1768
1769 if (!singleVolumeNumber) {
1770 /* Count how many volumes we have in /vicepX */
1771 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1772 NULL, &nvols);
1773 if (code < 0) {
1774 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1775 }
1776 if (!nvols)
1777 nvols = 1;
1778 } else {
1779 nvols = VOL_VG_MAX_VOLS;
1780 }
1781
1782 salvinfo->volumeSummaryp = calloc(nvols, sizeof(struct VolumeSummary));
1783 opr_Assert(salvinfo->volumeSummaryp != NULL);
1784
1785 params.singleVolumeNumber = singleVolumeNumber;
1786 params.vsp = salvinfo->volumeSummaryp;
1787 params.nVolumes = 0;
1788 params.totalVolumes = nvols;
1789 params.retry = 0;
1790 params.salvinfo = salvinfo;
1791
1792 /* walk the partition directory of volume headers and record the info
1793 * about them; unlinking invalid headers */
1794 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1795 UnlinkHeader, &params);
1796 if (params.retry) {
1797 /* we apparently need to retry checking-out/locking volumes */
1798 return -1;
1799 }
1800 if (code < 0) {
1801 Abort("Failed to get volume header summary\n");
1802 }
1803 salvinfo->nVolumes = params.nVolumes;
1804
1805 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1806 CompareVolumes);
1807
1808 return 0;
1809 }
1810
1811 #ifdef AFS_NAMEI_ENV
1812 /* Find the link table. This should be associated with the RW volume, even
1813 * if there is only an RO volume at this site.
1814 */
1815 static Inode
1816 FindLinkHandle(struct InodeSummary *isp, int nVols,
1817 struct ViceInodeInfo *allInodes)
1818 {
1819 int i, j;
1820 struct ViceInodeInfo *ip;
1821
1822 for (i = 0; i < nVols; i++) {
1823 ip = allInodes + isp[i].index;
1824 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1825 if (ip[j].u.special.volumeId == isp->RWvolumeId &&
1826 ip[j].u.special.parentId == isp->RWvolumeId &&
1827 ip[j].u.special.type == VI_LINKTABLE) {
1828 return ip[j].inodeNumber;
1829 }
1830 }
1831 }
1832 return (Inode) - 1;
1833 }
1834
1835 static int
1836 CheckDupLinktable(struct SalvInfo *salvinfo, struct InodeSummary *isp, struct ViceInodeInfo *ip)
1837 {
1838 afs_ino_str_t stmp;
1839 if (ip->u.vnode.vnodeNumber != INODESPECIAL) {
1840 /* not a linktable; process as a normal file */
1841 return 0;
1842 }
1843 if (ip->u.special.type != VI_LINKTABLE) {
1844 /* not a linktable; process as a normal file */
1845 return 0;
1846 }
1847
1848 /* make sure nothing inc/decs it */
1849 ip->linkCount = 0;
1850
1851 if (ip->u.special.volumeId == ip->u.special.parentId) {
1852 /* This is a little weird, but shouldn't break anything, and there is
1853 * no known way that this can happen; just do nothing, in case deleting
1854 * it would screw something up. */
1855 Log("Inode %s appears to be a valid linktable for id (%u), but it's not\n",
1856 PrintInode(stmp, ip->inodeNumber), ip->u.special.parentId);
1857 Log("the linktable for our volume group (%u). This is unusual, since\n",
1858 isp->RWvolumeId);
1859 Log("there should only be one linktable per volume group. I'm leaving\n");
1860 Log("it alone, just to be safe.\n");
1861 return -1;
1862 }
1863
1864 Log("Linktable %s appears to be invalid (parentid/volumeid mismatch: %u != %u)\n",
1865 PrintInode(stmp, ip->inodeNumber), ip->u.special.parentId, ip->u.special.volumeId);
1866 if (Testing) {
1867 Log("Would have deleted linktable inode %s\n", PrintInode(stmp, ip->inodeNumber));
1868 } else {
1869 IHandle_t *tmpH;
1870 namei_t ufs_name;
1871
1872 Log("Deleting linktable inode %s\n", PrintInode(stmp, ip->inodeNumber));
1873 IH_INIT(tmpH, salvinfo->fileSysDevice, isp->RWvolumeId, ip->inodeNumber);
1874 namei_HandleToName(&ufs_name, tmpH);
1875 if (unlink(ufs_name.n_path) < 0) {
1876 Log("Error %d unlinking path %s\n", errno, ufs_name.n_path);
1877 }
1878 }
1879
1880 return -1;
1881 }
1882 #endif
1883
1884 int
1885 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1886 {
1887 struct versionStamp version;
1888 FdHandle_t *fdP;
1889
1890 if (!VALID_INO(ino))
1891 ino =
1892 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->RWvolumeId,
1893 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1894 if (!VALID_INO(ino))
1895 Abort
1896 ("Unable to allocate link table inode for volume %" AFS_VOLID_FMT " (error = %d)\n",
1897 afs_printable_VolumeId_lu(isp->RWvolumeId), errno);
1898 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1899 fdP = IH_OPEN(salvinfo->VGLinkH);
1900 if (fdP == NULL)
1901 Abort("Can't open link table for volume %" AFS_VOLID_FMT " (error = %d)\n",
1902 afs_printable_VolumeId_lu(isp->RWvolumeId), errno);
1903
1904 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1905 Abort("Can't truncate link table for volume %" AFS_VOLID_FMT " (error = %d)\n",
1906 afs_printable_VolumeId_lu(isp->RWvolumeId), errno);
1907
1908 version.magic = LINKTABLEMAGIC;
1909 version.version = LINKTABLEVERSION;
1910
1911 if (FDH_PWRITE(fdP, (char *)&version, sizeof(version), 0)
1912 != sizeof(version))
1913 Abort("Can't truncate link table for volume %" AFS_VOLID_FMT " (error = %d)\n",
1914 afs_printable_VolumeId_lu(isp->RWvolumeId), errno);
1915
1916 FDH_REALLYCLOSE(fdP);
1917
1918 /* If the volume summary exits (i.e., the V*.vol header file exists),
1919 * then set this inode there as well.
1920 */
1921 if (isp->volSummary)
1922 isp->volSummary->header.linkTable = ino;
1923
1924 return 0;
1925 }
1926
1927 #ifdef AFS_NT40_ENV
1928 void *
1929 nt_SVG(void *arg)
1930 {
1931 SVGParms_t *parms = (SVGParms_t *) arg;
1932 DoSalvageVolumeGroup(parms->svgp_salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1933 return NULL;
1934 }
1935
1936 void
1937 nt_SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1938 {
1939 pthread_t tid;
1940 pthread_attr_t tattr;
1941 int code;
1942 SVGParms_t parms;
1943
1944 /* Initialize per volume global variables, even if later code does so */
1945 salvinfo->VolumeChanged = 0;
1946 salvinfo->VGLinkH = NULL;
1947 salvinfo->VGLinkH_cnt = 0;
1948 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1949
1950 parms.svgp_inodeSummaryp = isp;
1951 parms.svgp_count = nVols;
1952 parms.svgp_salvinfo = salvinfo;
1953 code = pthread_attr_init(&tattr);
1954 if (code) {
1955 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1956 isp->RWvolumeId);
1957 return;
1958 }
1959 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1960 if (code) {
1961 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1962 return;
1963 }
1964 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1965 if (code) {
1966 Log("Failed to create thread to salvage volume group %u\n",
1967 isp->RWvolumeId);
1968 return;
1969 }
1970 (void)pthread_join(tid, NULL);
1971 }
1972 #endif /* AFS_NT40_ENV */
1973
1974 void
1975 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1976 {
1977 struct ViceInodeInfo *inodes, *allInodes, *ip;
1978 int i, totalInodes, size, salvageTo;
1979 int haveRWvolume;
1980 int check;
1981 Inode ino;
1982 int dec_VGLinkH = 0;
1983 int VGLinkH_p1 =0;
1984 FdHandle_t *fdP = NULL;
1985
1986 salvinfo->VGLinkH_cnt = 0;
1987 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1988 && isp->nSpecialInodes > 0);
1989 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1990 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1991 return;
1992 }
1993 if (ShowMounts && !haveRWvolume)
1994 return;
1995 if (canfork && !debug && Fork() != 0) {
1996 (void)Wait("Salvage volume group");
1997 return;
1998 }
1999 for (i = 0, totalInodes = 0; i < nVols; i++)
2000 totalInodes += isp[i].nInodes;
2001 size = totalInodes * sizeof(struct ViceInodeInfo);
2002 inodes = malloc(size);
2003 allInodes = inodes - isp->index; /* this would the base of all the inodes
2004 * for the partition, if all the inodes
2005 * had been read into memory */
2006 opr_Verify(OS_SEEK
2007 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
2008 SEEK_SET) != -1);
2009 opr_Verify(OS_READ(salvinfo->inodeFd, inodes, size) == size);
2010
2011 /* Don't try to salvage a read write volume if there isn't one on this
2012 * partition */
2013 salvageTo = haveRWvolume ? 0 : 1;
2014
2015 #ifdef AFS_NAMEI_ENV
2016 ino = FindLinkHandle(isp, nVols, allInodes);
2017 if (VALID_INO(ino)) {
2018 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
2019 fdP = IH_OPEN(salvinfo->VGLinkH);
2020 }
2021 if (VALID_INO(ino) && fdP != NULL) {
2022 struct versionStamp header;
2023 afs_sfsize_t nBytes;
2024
2025 nBytes = FDH_PREAD(fdP, (char *)&header, sizeof(struct versionStamp), 0);
2026 if (nBytes != sizeof(struct versionStamp)
2027 || header.magic != LINKTABLEMAGIC) {
2028 Log("Bad linktable header for volume %" AFS_VOLID_FMT ".\n", afs_printable_VolumeId_lu(isp->RWvolumeId));
2029 FDH_REALLYCLOSE(fdP);
2030 fdP = NULL;
2031 }
2032 }
2033 if (!VALID_INO(ino) || fdP == NULL) {
2034 Log("%s link table for volume %" AFS_VOLID_FMT ".\n",
2035 Testing ? "Would have recreated" : "Recreating", afs_printable_VolumeId_lu(isp->RWvolumeId));
2036 if (Testing) {
2037 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
2038 } else {
2039 int i, j;
2040 struct ViceInodeInfo *ip;
2041 CreateLinkTable(salvinfo, isp, ino);
2042 fdP = IH_OPEN(salvinfo->VGLinkH);
2043 /* Sync fake 1 link counts to the link table, now that it exists */
2044 if (fdP) {
2045 for (i = 0; i < nVols; i++) {
2046 ip = allInodes + isp[i].index;
2047 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
2048 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 0);
2049 ip[j].linkCount = 1;
2050 }
2051 }
2052 }
2053 }
2054 }
2055 if (fdP)
2056 FDH_REALLYCLOSE(fdP);
2057 #else
2058 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
2059 #endif
2060
2061 /* Salvage in reverse order--read/write volume last; this way any
2062 * Inodes not referenced by the time we salvage the read/write volume
2063 * can be picked up by the read/write volume */
2064 /* ACTUALLY, that's not done right now--the inodes just vanish */
2065 for (i = nVols - 1; i >= salvageTo; i--) {
2066 int rw = (i == 0);
2067 struct InodeSummary *lisp = &isp[i];
2068 #ifdef AFS_NAMEI_ENV
2069 if (rw && (nVols > 1 || isp[i].nSpecialInodes == isp[i].nInodes)) {
2070 /* If nVols > 1, we have more than one vol in this volgroup, so
2071 * the RW inodes we detected may just be for the linktable, and
2072 * there is no actual RW volume.
2073 *
2074 * Additionally, if we only have linktable inodes (no other
2075 * special inodes, no data inodes), there is also no actual RW
2076 * volume to salvage; this is just cruft left behind by something
2077 * else. In that case nVols will only be 1, though, so also
2078 * perform this linktables-only check if we don't have any
2079 * non-special inodes. */
2080 int inode_i;
2081 int all_linktables = 1;
2082 for (inode_i = 0; inode_i < isp[i].nSpecialInodes; inode_i++) {
2083 if (inodes[inode_i].u.special.type != VI_LINKTABLE) {
2084 all_linktables = 0;
2085 break;
2086 }
2087 }
2088 if (all_linktables) {
2089 /* All we have are linktable special inodes, so skip salvaging
2090 * the RW; there was never an RW volume here. If we don't do
2091 * this, we risk creating a new "phantom" RW that the VLDB
2092 * doesn't know about, which is confusing and can cause
2093 * problems. */
2094 haveRWvolume = 0;
2095 continue;
2096 }
2097 }
2098 #endif
2099 if (!Showmode)
2100 Log("%s VOLUME %" AFS_VOLID_FMT "%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
2101 afs_printable_VolumeId_lu(lisp->volumeId), (Testing ? "(READONLY mode)" : ""));
2102 /* Check inodes twice. The second time do things seriously. This
2103 * way the whole RO volume can be deleted, below, if anything goes wrong */
2104 for (check = 1; check >= 0; check--) {
2105 int deleteMe;
2106 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
2107 == -1) {
2108 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
2109 if (rw && deleteMe) {
2110 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
2111 * volume won't be called */
2112 break;
2113 }
2114 if (!rw)
2115 break;
2116 }
2117 if (rw && check == 1)
2118 continue;
2119 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
2120 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
2121 break;
2122 }
2123 }
2124 }
2125
2126 /* Fix actual inode counts */
2127 if (!Showmode) {
2128 afs_ino_str_t stmp;
2129 Log("totalInodes %d\n",totalInodes);
2130 for (ip = inodes; totalInodes; ip++, totalInodes--) {
2131 static int TraceBadLinkCounts = 0;
2132 #ifdef AFS_NAMEI_ENV
2133 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
2134 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
2135 VGLinkH_p1 = ip->u.param[0];
2136 continue; /* Deal with this last. */
2137 } else if (CheckDupLinktable(salvinfo, isp, ip)) {
2138 /* Don't touch this inode; CheckDupLinktable has handled it */
2139 continue;
2140 }
2141 #endif
2142 if (ip->linkCount != 0 && TraceBadLinkCounts) {
2143 TraceBadLinkCounts--; /* Limit reports, per volume */
2144 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(stmp, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]); /* VolumeId in param */
2145 }
2146
2147 /* If ip->linkCount is non-zero at this point, then the linkcount
2148 * for the inode on disk is wrong. Initially linkCount is set to
2149 * the actual link count of the inode on disk, and then we (the
2150 * salvager) decrement it for every reference to that inode that we
2151 * find. So if linkCount is still positive by this point, it means
2152 * that the linkcount on disk is too high, so we should DEC the
2153 * inode. If linkCount is negative, it means the linkcount is too
2154 * low, so we should INC the inode.
2155 *
2156 * If we get an error while INC'ing or DEC'ing, that's a little
2157 * odd and indicates a bug, but try to continue anyway, so the
2158 * volume may still be made accessible. */
2159 while (ip->linkCount > 0) {
2160 if (!Testing) {
2161 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2162 Log("idec failed. inode %s errno %d\n",
2163 PrintInode(stmp, ip->inodeNumber), errno);
2164 break;
2165 }
2166 }
2167 ip->linkCount--;
2168 }
2169 while (ip->linkCount < 0) {
2170 if (!Testing) {
2171 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2172 Log("iinc failed. inode %s errno %d\n",
2173 PrintInode(stmp, ip->inodeNumber), errno);
2174 break;
2175 }
2176 }
2177 ip->linkCount++;
2178 }
2179 }
2180 #ifdef AFS_NAMEI_ENV
2181 while (dec_VGLinkH > 0) {
2182 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2183 Log("idec failed on link table, errno = %d\n", errno);
2184 }
2185 dec_VGLinkH--;
2186 }
2187 while (dec_VGLinkH < 0) {
2188 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2189 Log("iinc failed on link table, errno = %d\n", errno);
2190 }
2191 dec_VGLinkH++;
2192 }
2193 #endif
2194 }
2195 free(inodes);
2196 /* Directory consistency checks on the rw volume */
2197 if (haveRWvolume)
2198 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2199 IH_RELEASE(salvinfo->VGLinkH);
2200
2201 if (canfork && !debug) {
2202 QuietExit(0);
2203 }
2204 }
2205
2206 int
2207 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2208 {
2209 /* Check headers BEFORE forking */
2210 int i;
2211 IHandle_t *h;
2212
2213 for (i = 0; i < nVols; i++) {
2214 struct VolumeSummary *vs = isp[i].volSummary;
2215 VolumeDiskData volHeader;
2216 if (!vs) {
2217 /* Don't salvage just because phantom rw volume is there... */
2218 /* (If a read-only volume exists, read/write inodes must also exist) */
2219 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2220 continue;
2221 return 0;
2222 }
2223 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2224 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2225 == sizeof(volHeader)
2226 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2227 && volHeader.dontSalvage == DONT_SALVAGE
2228 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2229 if (volHeader.inUse != 0) {
2230 volHeader.inUse = 0;
2231 volHeader.inService = 1;
2232 if (!Testing) {
2233 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2234 != sizeof(volHeader)) {
2235 IH_RELEASE(h);
2236 return 0;
2237 }
2238 }
2239 }
2240 IH_RELEASE(h);
2241 } else {
2242 IH_RELEASE(h);
2243 return 0;
2244 }
2245 }
2246 return 1;
2247 }
2248
2249
2250 /* SalvageVolumeHeaderFile
2251 *
2252 * Salvage the top level V*.vol header file. Make sure the special files
2253 * exist and that there are no duplicates.
2254 *
2255 * Calls SalvageHeader for each possible type of volume special file.
2256 */
2257
2258 int
2259 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2260 struct ViceInodeInfo *inodes, int RW,
2261 int check, int *deleteMe)
2262 {
2263 int i;
2264 struct ViceInodeInfo *ip;
2265 int allinodesobsolete = 1;
2266 struct VolumeDiskHeader diskHeader;
2267 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2268 int *skip;
2269 struct VolumeHeader tempHeader;
2270 struct afs_inode_info stuff[MAXINODETYPE];
2271
2272 /* keeps track of special inodes that are probably 'good'; they are
2273 * referenced in the vol header, and are included in the given inodes
2274 * array */
2275 struct {
2276 int valid;
2277 Inode inode;
2278 } goodspecial[MAXINODETYPE];
2279
2280 if (deleteMe)
2281 *deleteMe = 0;
2282
2283 memset(goodspecial, 0, sizeof(goodspecial));
2284
2285 skip = calloc(isp->nSpecialInodes, sizeof(*skip));
2286 if (skip == NULL) {
2287 Log("cannot allocate memory for inode skip array when salvaging "
2288 "volume %lu; not performing duplicate special inode recovery\n",
2289 afs_printable_uint32_lu(isp->volumeId));
2290 /* still try to perform the salvage; the skip array only does anything
2291 * if we detect duplicate special inodes */
2292 }
2293
2294 init_inode_info(&tempHeader, stuff);
2295
2296 /*
2297 * First, look at the special inodes and see if any are referenced by
2298 * the existing volume header. If we find duplicate special inodes, we
2299 * can use this information to use the referenced inode (it's more
2300 * likely to be the 'good' one), and throw away the duplicates.
2301 */
2302 if (isp->volSummary && skip) {
2303 /* use tempHeader, so we can use the stuff[] array to easily index
2304 * into the isp->volSummary special inodes */
2305 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2306
2307 for (i = 0; i < isp->nSpecialInodes; i++) {
2308 ip = &inodes[isp->index + i];
2309 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2310 /* will get taken care of in a later loop */
2311 continue;
2312 }
2313 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2314 goodspecial[ip->u.special.type-1].valid = 1;
2315 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2316 }
2317 }
2318 }
2319
2320 memset(&tempHeader, 0, sizeof(tempHeader));
2321 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2322 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2323 tempHeader.id = isp->volumeId;
2324 tempHeader.parent = isp->RWvolumeId;
2325
2326 /* Check for duplicates (inodes are sorted by type field) */
2327 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2328 ip = &inodes[isp->index + i];
2329 if (ip->u.special.type == (ip + 1)->u.special.type) {
2330 afs_ino_str_t stmp1, stmp2;
2331
2332 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2333 /* Will be caught in the loop below */
2334 continue;
2335 }
2336 if (!Showmode) {
2337 Log("Duplicate special %d inodes for volume %" AFS_VOLID_FMT " found (%s, %s);\n",
2338 ip->u.special.type, afs_printable_VolumeId_lu(isp->volumeId),
2339 PrintInode(stmp1, ip->inodeNumber),
2340 PrintInode(stmp2, (ip+1)->inodeNumber));
2341 }
2342 if (skip && goodspecial[ip->u.special.type-1].valid) {
2343 Inode gi = goodspecial[ip->u.special.type-1].inode;
2344
2345 if (!Showmode) {
2346 Log("using special inode referenced by vol header (%s)\n",
2347 PrintInode(stmp1, gi));
2348 }
2349
2350 /* the volume header references some special inode of
2351 * this type in the inodes array; are we it? */
2352 if (ip->inodeNumber != gi) {
2353 skip[i] = 1;
2354 } else if ((ip+1)->inodeNumber != gi) {
2355 /* in case this is the last iteration; we need to
2356 * make sure we check ip+1, too */
2357 skip[i+1] = 1;
2358 }
2359 } else {
2360 if (!Showmode)
2361 Log("cannot determine which is correct; salvage of volume %" AFS_VOLID_FMT " aborted\n", afs_printable_VolumeId_lu(isp->volumeId));
2362 if (skip) {
2363 free(skip);
2364 }
2365 return -1;
2366 }
2367 }
2368 }
2369 for (i = 0; i < isp->nSpecialInodes; i++) {
2370 afs_ino_str_t stmp;
2371 ip = &inodes[isp->index + i];
2372 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2373 if (check) {
2374 Log("Rubbish header inode %s of type %d\n",
2375 PrintInode(stmp, ip->inodeNumber),
2376 ip->u.special.type);
2377 if (skip) {
2378 free(skip);
2379 }
2380 return -1;
2381 }
2382 Log("Rubbish header inode %s of type %d; deleted\n",
2383 PrintInode(stmp, ip->inodeNumber),
2384 ip->u.special.type);
2385 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2386 if (skip && skip[i]) {
2387 if (orphans == ORPH_REMOVE) {
2388 Log("Removing orphan special inode %s of type %d\n",
2389 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2390 continue;
2391 } else {
2392 Log("Ignoring orphan special inode %s of type %d\n",
2393 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2394 /* fall through to the ip->linkCount--; line below */
2395 }
2396 } else {
2397 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2398 allinodesobsolete = 0;
2399 }
2400 if (!check && ip->u.special.type != VI_LINKTABLE)
2401 ip->linkCount--; /* Keep the inode around */
2402 }
2403 }
2404 if (skip) {
2405 free(skip);
2406 }
2407 skip = NULL;
2408
2409 if (allinodesobsolete) {
2410 if (deleteMe)
2411 *deleteMe = 1;
2412 return -1;
2413 }
2414
2415 if (!check)
2416 salvinfo->VGLinkH_cnt++; /* one for every header. */
2417
2418 if (!RW && !check && isp->volSummary) {
2419 ClearROInUseBit(isp->volSummary);
2420 return 0;
2421 }
2422
2423 for (i = 0; i < MAXINODETYPE; i++) {
2424 if (stuff[i].inodeType == VI_LINKTABLE) {
2425 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2426 * And we may have recreated the link table earlier, so set the
2427 * RW header as well. The header magic was already checked.
2428 */
2429 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2430 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2431 }
2432 continue;
2433 }
2434 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2435 return -1;
2436 }
2437
2438 if (isp->volSummary == NULL) {
2439 char path[64];
2440 char headerName[64];
2441 snprintf(headerName, sizeof headerName, VFORMAT,
2442 afs_printable_VolumeId_lu(isp->volumeId));
2443 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2444 salvinfo->fileSysPath, headerName);
2445 if (check) {
2446 Log("No header file for volume %" AFS_VOLID_FMT "\n", afs_printable_VolumeId_lu(isp->volumeId));
2447 return -1;
2448 }
2449 if (!Showmode)
2450 Log("No header file for volume %" AFS_VOLID_FMT "; %screating %s\n",
2451 afs_printable_VolumeId_lu(isp->volumeId), (Testing ? "it would have been " : ""),
2452 path);
2453 isp->volSummary = calloc(1, sizeof(struct VolumeSummary));
2454
2455 writefunc = VCreateVolumeDiskHeader;
2456 } else {
2457 char path[64];
2458 char headerName[64];
2459 /* hack: these two fields are obsolete... */
2460 isp->volSummary->header.volumeAcl = 0;
2461 isp->volSummary->header.volumeMountTable = 0;
2462
2463 if (memcmp
2464 (&isp->volSummary->header, &tempHeader,
2465 sizeof(struct VolumeHeader))) {
2466 VolumeExternalName_r(isp->volumeId, headerName, sizeof(headerName));
2467 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2468 salvinfo->fileSysPath, headerName);
2469
2470 Log("Header file %s is damaged or no longer valid%s\n", path,
2471 (check ? "" : "; repairing"));
2472 if (check)
2473 return -1;
2474
2475 writefunc = VWriteVolumeDiskHeader;
2476 }
2477 }
2478 if (writefunc) {
2479 memcpy(&isp->volSummary->header, &tempHeader,
2480 sizeof(struct VolumeHeader));
2481 if (Testing) {
2482 if (!Showmode)
2483 Log("It would have written a new header file for volume %" AFS_VOLID_FMT "\n",
2484 afs_printable_VolumeId_lu(isp->volumeId));
2485 } else {
2486 afs_int32 code;
2487 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2488 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2489 if (code) {
2490 Log("Error %ld writing volume header file for volume %" AFS_VOLID_FMT "\n",
2491 afs_printable_int32_ld(code),
2492 afs_printable_VolumeId_lu(diskHeader.id));
2493 return -1;
2494 }
2495 }
2496 }
2497 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2498 isp->volSummary->header.volumeInfo);
2499 return 0;
2500 }
2501
2502 int
2503 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2504 struct InodeSummary *isp, int check, int *deleteMe)
2505 {
2506 union {
2507 VolumeDiskData volumeInfo;
2508 struct versionStamp fileHeader;
2509 } header;
2510 IHandle_t *specH;
2511 int recreate = 0;
2512 ssize_t nBytes;
2513 FdHandle_t *fdP;
2514
2515 if (sp->obsolete)
2516 return 0;
2517 #ifndef AFS_NAMEI_ENV
2518 if (sp->inodeType == VI_LINKTABLE)
2519 return 0; /* header magic was already checked */
2520 #endif
2521 if (*(sp->inode) == 0) {
2522 if (check) {
2523 Log("Missing inode in volume header (%s)\n", sp->description);
2524 return -1;
2525 }
2526 if (!Showmode)
2527 Log("Missing inode in volume header (%s); %s\n", sp->description,
2528 (Testing ? "it would have recreated it" : "recreating"));
2529 if (!Testing) {
2530 *(sp->inode) =
2531 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2532 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2533 if (!VALID_INO(*(sp->inode)))
2534 Abort
2535 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2536 sp->description, errno);
2537 }
2538 recreate = 1;
2539 }
2540
2541 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2542 fdP = IH_OPEN(specH);
2543 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2544 /* bail out early and destroy the volume */
2545 if (!Showmode)
2546 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2547 if (deleteMe)
2548 *deleteMe = 1;
2549 IH_RELEASE(specH);
2550 return -1;
2551 }
2552 if (fdP == NULL)
2553 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2554 sp->description, errno);
2555
2556 if (!recreate
2557 && (FDH_PREAD(fdP, (char *)&header, sp->size, 0) != sp->size
2558 || header.fileHeader.magic != sp->stamp.magic)) {
2559 if (check) {
2560 Log("Part of the header (%s) is corrupted\n", sp->description);
2561 FDH_REALLYCLOSE(fdP);
2562 IH_RELEASE(specH);
2563 return -1;
2564 }
2565 Log("Part of the header (%s) is corrupted; recreating\n",
2566 sp->description);
2567 recreate = 1;
2568 /* header can be garbage; make sure we don't read garbage data from
2569 * it below */
2570 memset(&header, 0, sizeof(header));
2571 }
2572 #ifdef AFS_NAMEI_ENV
2573 if (namei_FixSpecialOGM(fdP, check)) {
2574 Log("Error with namei header OGM data (%s)\n", sp->description);
2575 FDH_REALLYCLOSE(fdP);
2576 IH_RELEASE(specH);
2577 return -1;
2578 }
2579 #endif
2580 if (sp->inodeType == VI_VOLINFO
2581 && header.volumeInfo.destroyMe == DESTROY_ME) {
2582 if (deleteMe)
2583 *deleteMe = 1;
2584 FDH_REALLYCLOSE(fdP);
2585 IH_RELEASE(specH);
2586 return -1;
2587 }
2588 if (recreate && !Testing) {
2589 if (check)
2590 Abort
2591 ("Internal error: recreating volume header (%s) in check mode\n",
2592 sp->description);
2593 nBytes = FDH_TRUNC(fdP, 0);
2594 if (nBytes == -1)
2595 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2596 sp->description, errno);
2597
2598 /* The following code should be moved into vutil.c */
2599 if (sp->inodeType == VI_VOLINFO) {
2600 struct timeval tp;
2601 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2602 header.volumeInfo.stamp = sp->stamp;
2603 header.volumeInfo.id = isp->volumeId;
2604 header.volumeInfo.parentId = isp->RWvolumeId;
2605 sprintf(header.volumeInfo.name, "bogus.%" AFS_VOLID_FMT, afs_printable_VolumeId_lu(isp->volumeId));
2606 Log("Warning: the name of volume %" AFS_VOLID_FMT " is now \"bogus.%" AFS_VOLID_FMT "\"\n",
2607 afs_printable_VolumeId_lu(isp->volumeId), afs_printable_VolumeId_lu(isp->volumeId));
2608 header.volumeInfo.inService = 0;
2609 header.volumeInfo.blessed = 0;
2610 /* The + 1000 is a hack in case there are any files out in venus caches */
2611 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2612 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2613 header.volumeInfo.needsCallback = 0;
2614 gettimeofday(&tp, NULL);
2615 header.volumeInfo.creationDate = tp.tv_sec;
2616 nBytes =
2617 FDH_PWRITE(fdP, (char *)&header.volumeInfo,
2618 sizeof(header.volumeInfo), 0);
2619 if (nBytes != sizeof(header.volumeInfo)) {
2620 if (nBytes < 0)
2621 Abort
2622 ("Unable to write volume header file (%s) (errno = %d)\n",
2623 sp->description, errno);
2624 Abort("Unable to write entire volume header file (%s)\n",
2625 sp->description);
2626 }
2627 } else {
2628 nBytes = FDH_PWRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp), 0);
2629 if (nBytes != sizeof(sp->stamp)) {
2630 if (nBytes < 0)
2631 Abort
2632 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2633 sp->description, errno);
2634 Abort
2635 ("Unable to write entire version stamp in volume header file (%s)\n",
2636 sp->description);
2637 }
2638 }
2639 }
2640 FDH_REALLYCLOSE(fdP);
2641 IH_RELEASE(specH);
2642 if (sp->inodeType == VI_VOLINFO) {
2643 salvinfo->VolInfo = header.volumeInfo;
2644 if (check) {
2645 char update[64];
2646 char buffer[64];
2647
2648 if (salvinfo->VolInfo.updateDate) {
2649 strcpy(update, TimeStamp(buffer, sizeof(buffer), salvinfo->VolInfo.updateDate, 0));
2650 if (!Showmode)
2651 Log("%s (%" AFS_VOLID_FMT ") %supdated %s\n", salvinfo->VolInfo.name,
2652 afs_printable_VolumeId_lu(salvinfo->VolInfo.id),
2653 (Testing ? "it would have been " : ""), update);
2654 } else {
2655 strcpy(update, TimeStamp(buffer, sizeof(buffer), salvinfo->VolInfo.creationDate, 0));
2656 if (!Showmode)
2657 Log("%s (%" AFS_VOLID_FMT ") not updated (created %s)\n",
2658 salvinfo->VolInfo.name, afs_printable_VolumeId_lu(salvinfo->VolInfo.id), update);
2659 }
2660
2661 }
2662 }
2663
2664 return 0;
2665 }
2666
2667 int
2668 SalvageVnodes(struct SalvInfo *salvinfo,
2669 struct InodeSummary *rwIsp,
2670 struct InodeSummary *thisIsp,
2671 struct ViceInodeInfo *inodes, int check)
2672 {
2673 int ilarge, ismall, ioffset, RW, nInodes;
2674 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2675 if (Showmode)
2676 return 0;
2677 RW = (rwIsp == thisIsp);
2678 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2679 ismall =
2680 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2681 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2682 if (check && ismall == -1)
2683 return -1;
2684 ilarge =
2685 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2686 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2687 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2688 }
2689
2690 int
2691 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2692 struct ViceInodeInfo *ip, int nInodes,
2693 struct VolumeSummary *volSummary, int check)
2694 {
2695 char buf[SIZEOF_LARGEDISKVNODE];
2696 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2697 int err = 0;
2698 StreamHandle_t *file;
2699 struct VnodeClassInfo *vcp;
2700 afs_sfsize_t size;
2701 afs_sfsize_t nVnodes;
2702 afs_fsize_t vnodeLength;
2703 int vnodeIndex;
2704 afs_ino_str_t stmp1, stmp2;
2705 IHandle_t *handle;
2706 FdHandle_t *fdP;
2707
2708 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2709 fdP = IH_OPEN(handle);
2710 opr_Assert(fdP != NULL);
2711 file = FDH_FDOPEN(fdP, "r+");
2712 opr_Assert(file != NULL);
2713 vcp = &VnodeClassInfo[class];
2714 size = OS_SIZE(fdP->fd_fd);
2715 opr_Assert(size != -1);
2716 nVnodes = (size / vcp->diskSize) - 1;
2717 if (nVnodes > 0) {
2718 opr_Assert((nVnodes + 1) * vcp->diskSize == size);
2719 opr_Verify(STREAM_ASEEK(file, vcp->diskSize) == 0);
2720 } else {
2721 nVnodes = 0;
2722 }
2723 for (vnodeIndex = 0;
2724 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2725 nVnodes--, vnodeIndex++) {
2726 if (vnode->type != vNull) {
2727 int vnodeChanged = 0;
2728 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2729 if (VNDISK_GET_INO(vnode) == 0) {
2730 if (RW) {
2731 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2732 memset(vnode, 0, vcp->diskSize);
2733 vnodeChanged = 1;
2734 }
2735 } else {
2736 if (vcp->magic != vnode->vnodeMagic) {
2737 /* bad magic #, probably partially created vnode */
2738 if (check) {
2739 Log("Partially allocated vnode %d: bad magic (is %lx should be %lx)\n",
2740 vnodeNumber, afs_printable_uint32_lu(vnode->vnodeMagic),
2741 afs_printable_uint32_lu(vcp->magic));
2742 memset(vnode, 0, vcp->diskSize);
2743 err = -1;
2744 goto zooks;
2745 }
2746 Log("Partially allocated vnode %d deleted.\n",
2747 vnodeNumber);
2748 memset(vnode, 0, vcp->diskSize);
2749 vnodeChanged = 1;
2750 goto vnodeDone;
2751 }
2752 /* ****** Should do a bit more salvage here: e.g. make sure
2753 * vnode type matches what it should be given the index */
2754 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2755 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2756 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2757 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2758 * }
2759 */
2760 ip++;
2761 nInodes--;
2762 }
2763 if (!RW) {
2764 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2765 /* The following doesn't work, because the version number
2766 * is not maintained correctly by the file server */
2767 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2768 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2769 * break; */
2770 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2771 break;
2772 ip++;
2773 nInodes--;
2774 }
2775 } else {
2776 /* For RW volume, look for vnode with matching inode number;
2777 * if no such match, take the first determined by our sort
2778 * order */
2779 struct ViceInodeInfo *lip = ip;
2780 int lnInodes = nInodes;
2781 while (lnInodes
2782 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2783 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2784 ip = lip;
2785 nInodes = lnInodes;
2786 break;
2787 }
2788 lip++;
2789 lnInodes--;
2790 }
2791 }
2792 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2793 /* "Matching" inode */
2794 if (RW) {
2795 Unique vu, iu;
2796 FileVersion vd, id;
2797 vu = vnode->uniquifier;
2798 iu = ip->u.vnode.vnodeUniquifier;
2799 vd = vnode->dataVersion;
2800 id = ip->u.vnode.inodeDataVersion;
2801 /*
2802 * Because of the possibility of the uniquifier overflows (> 4M)
2803 * we compare them modulo the low 22-bits; we shouldn't worry
2804 * about mismatching since they shouldn't to many old
2805 * uniquifiers of the same vnode...
2806 */
2807 if (IUnique(vu) != IUnique(iu)) {
2808 if (!Showmode) {
2809 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2810 }
2811
2812 vnode->uniquifier = iu;
2813 #ifdef AFS_3DISPARES
2814 vnode->dataVersion = (id >= vd ?
2815 /* 90% of 2.1M */
2816 ((id - vd) >
2817 1887437 ? vd : id) :
2818 /* 90% of 2.1M */
2819 ((vd - id) >
2820 1887437 ? id : vd));
2821 #else
2822 #if defined(AFS_SGI_EXMAG)
2823 vnode->dataVersion = (id >= vd ?
2824 /* 90% of 16M */
2825 ((id - vd) >
2826 15099494 ? vd : id) :
2827 /* 90% of 16M */
2828 ((vd - id) >
2829 15099494 ? id : vd));
2830 #else
2831 vnode->dataVersion = (id > vd ? id : vd);
2832 #endif /* AFS_SGI_EXMAG */
2833 #endif /* AFS_3DISPARES */
2834 vnodeChanged = 1;
2835 } else {
2836 /* don't bother checking for vd > id any more, since
2837 * partial file transfers always result in this state,
2838 * and you can't do much else anyway (you've already
2839 * found the best data you can) */
2840 #ifdef AFS_3DISPARES
2841 if (!vnodeIsDirectory(vnodeNumber)
2842 && ((vd < id && (id - vd) < 1887437)
2843 || ((vd > id && (vd - id) > 1887437)))) {
2844 #else
2845 #if defined(AFS_SGI_EXMAG)
2846 if (!vnodeIsDirectory(vnodeNumber)
2847 && ((vd < id && (id - vd) < 15099494)
2848 || ((vd > id && (vd - id) > 15099494)))) {
2849 #else
2850 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2851 #endif /* AFS_SGI_EXMAG */
2852 #endif
2853 if (!Showmode)
2854 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2855 vnode->dataVersion = id;
2856 vnodeChanged = 1;
2857 }
2858 }
2859 }
2860 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2861 if (check) {
2862 if (!Showmode) {
2863 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2864 }
2865 VNDISK_SET_INO(vnode, ip->inodeNumber);
2866 err = -1;
2867 goto zooks;
2868 }
2869 if (!Showmode) {
2870 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2871 }
2872 VNDISK_SET_INO(vnode, ip->inodeNumber);
2873 vnodeChanged = 1;
2874 }
2875 VNDISK_GET_LEN(vnodeLength, vnode);
2876 if (ip->byteCount != vnodeLength) {
2877 if (check) {
2878 if (!Showmode)
2879 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2880 err = -1;
2881 goto zooks;
2882 }
2883 if (!Showmode)
2884 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2885 VNDISK_SET_LEN(vnode, ip->byteCount);
2886 vnodeChanged = 1;
2887 }
2888 if (!check)
2889 ip->linkCount--; /* Keep the inode around */
2890 ip++;
2891 nInodes--;
2892 } else { /* no matching inode */
2893 afs_ino_str_t stmp;
2894 if (VNDISK_GET_INO(vnode) != 0
2895 || vnode->type == vDirectory) {
2896 /* No matching inode--get rid of the vnode */
2897 if (check) {
2898 if (VNDISK_GET_INO(vnode)) {
2899 if (!Showmode) {
2900 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)));
2901 }
2902 } else {
2903 if (!Showmode)
2904 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2905 }
2906 err = -1;
2907 goto zooks;
2908 }
2909 if (VNDISK_GET_INO(vnode)) {
2910 if (!Showmode) {
2911 time_t serverModifyTime = vnode->serverModifyTime;
2912 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2913 }
2914 } else {
2915 if (!Showmode) {
2916 time_t serverModifyTime = vnode->serverModifyTime;
2917 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2918 }
2919 }
2920 memset(vnode, 0, vcp->diskSize);
2921 vnodeChanged = 1;
2922 } else {
2923 /* Should not reach here becuase we checked for
2924 * (inodeNumber == 0) above. And where we zero the vnode,
2925 * we also goto vnodeDone.
2926 */
2927 }
2928 }
2929 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2930 ip++;
2931 nInodes--;
2932 }
2933 } /* VNDISK_GET_INO(vnode) != 0 */
2934 vnodeDone:
2935 opr_Assert(!(vnodeChanged && check));
2936 if (vnodeChanged && !Testing) {
2937 opr_Verify(IH_IWRITE(handle,
2938 vnodeIndexOffset(vcp, vnodeNumber),
2939 (char *)vnode, vcp->diskSize)
2940 == vcp->diskSize);
2941 salvinfo->VolumeChanged = 1; /* For break call back */
2942 }
2943 }
2944 }
2945 zooks:
2946 STREAM_CLOSE(file);
2947 FDH_CLOSE(fdP);
2948 IH_RELEASE(handle);
2949 return err;
2950 }
2951
2952 struct VnodeEssence *
2953 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2954 {
2955 VnodeClass class;
2956 struct VnodeInfo *vip;
2957 int offset;
2958
2959 class = vnodeIdToClass(vnodeNumber);
2960 vip = &salvinfo->vnodeInfo[class];
2961 offset = vnodeIdToBitNumber(vnodeNumber);
2962 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2963 }
2964
2965 void
2966 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2967 {
2968 /* Copy the directory unconditionally if we are going to change it:
2969 * not just if was cloned.
2970 */
2971 struct VnodeDiskObject vnode;
2972 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2973 Inode oldinode, newinode;
2974 afs_sfsize_t code;
2975
2976 if (dir->copied || Testing)
2977 return;
2978 DFlush(); /* Well justified paranoia... */
2979
2980 code =
2981 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2982 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2983 sizeof(vnode));
2984 opr_Assert(code == sizeof(vnode));
2985 oldinode = VNDISK_GET_INO(&vnode);
2986 /* Increment the version number by a whole lot to avoid problems with
2987 * clients that were promised new version numbers--but the file server
2988 * crashed before the versions were written to disk.
2989 */
2990 newinode =
2991 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2992 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2993 200);
2994 opr_Assert(VALID_INO(newinode));
2995 opr_Verify(CopyInode(salvinfo->fileSysDevice, oldinode, newinode,
2996 dir->rwVid) == 0);
2997 vnode.cloned = 0;
2998 VNDISK_SET_INO(&vnode, newinode);
2999 code =
3000 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3001 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
3002 sizeof(vnode));
3003 opr_Assert(code == sizeof(vnode));
3004
3005 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
3006 salvinfo->fileSysDevice, newinode,
3007 &salvinfo->VolumeChanged);
3008 /* Don't delete the original inode right away, because the directory is
3009 * still being scanned.
3010 */
3011 dir->copied = 1;
3012 }
3013
3014 /*
3015 * This function should either successfully create a new dir, or give up
3016 * and leave things the way they were. In particular, if it fails to write
3017 * the new dir properly, it should return w/o changing the reference to the
3018 * old dir.
3019 */
3020 void
3021 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
3022 {
3023 struct VnodeDiskObject vnode;
3024 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
3025 Inode oldinode, newinode;
3026 DirHandle newdir;
3027 FdHandle_t *fdP;
3028 afs_int32 code;
3029 afs_sfsize_t lcode;
3030 afs_int32 parentUnique = 1;
3031 struct VnodeEssence *vnodeEssence;
3032 afs_fsize_t length;
3033
3034 if (Testing)
3035 return;
3036 Log("Salvaging directory %u...\n", dir->vnodeNumber);
3037 lcode =
3038 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
3039 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
3040 sizeof(vnode));
3041 opr_Assert(lcode == sizeof(vnode));
3042 oldinode = VNDISK_GET_INO(&vnode);
3043 /* Increment the version number by a whole lot to avoid problems with
3044 * clients that were promised new version numbers--but the file server
3045 * crashed before the versions were written to disk.
3046 */
3047 newinode =
3048 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
3049 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
3050 200);
3051 opr_Assert(VALID_INO(newinode));
3052 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
3053 &salvinfo->VolumeChanged);
3054
3055 /* Assign . and .. vnode numbers from dir and vnode.parent.
3056 * The uniquifier for . is in the vnode.
3057 * The uniquifier for .. might be set to a bogus value of 1 and
3058 * the salvager will later clean it up.
3059 */
3060 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
3061 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
3062 }
3063 code =
3064 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
3065 vnode.uniquifier,
3066 (vnode.parent ? vnode.parent : dir->vnodeNumber),
3067 parentUnique);
3068 if (code == 0)
3069 code = DFlush();
3070 if (code) {
3071 /* didn't really build the new directory properly, let's just give up. */
3072 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
3073 Log("Directory salvage returned code %d, continuing.\n", code);
3074 if (code) {
3075 Log("also failed to decrement link count on new inode");
3076 }
3077 opr_Assert(0);
3078 }
3079 Log("Checking the results of the directory salvage...\n");
3080 if (!DirOK(&newdir)) {
3081 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
3082 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
3083 opr_Assert(code == 0);
3084 opr_Assert(0);
3085 }
3086 vnode.cloned = 0;
3087 VNDISK_SET_INO(&vnode, newinode);
3088 length = afs_dir_Length(&newdir);
3089 VNDISK_SET_LEN(&vnode, length);
3090 lcode =
3091 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3092 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
3093 sizeof(vnode));
3094 opr_Assert(lcode == sizeof(vnode));
3095 IH_CONDSYNC(salvinfo->vnodeInfo[vLarge].handle);
3096
3097 /* make sure old directory file is really closed */
3098 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
3099 FDH_REALLYCLOSE(fdP);
3100
3101 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
3102 opr_Assert(code == 0);
3103 dir->dirHandle = newdir;
3104 }
3105
3106 /**
3107 * arguments for JudgeEntry.
3108 */
3109 struct judgeEntry_params {
3110 struct DirSummary *dir; /**< directory we're examining entries in */
3111 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
3112 };
3113
3114 int
3115 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
3116 afs_int32 unique)
3117 {
3118 struct judgeEntry_params *params = arock;
3119 struct DirSummary *dir = params->dir;
3120 struct SalvInfo *salvinfo = params->salvinfo;
3121 struct VnodeEssence *vnodeEssence;
3122 afs_int32 dirOrphaned, todelete;
3123
3124 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
3125
3126 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3127 if (vnodeEssence == NULL) {
3128 if (!Showmode) {
3129 Log("dir vnode %u: invalid entry deleted: %s" OS_DIRSEP "%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
3130 }
3131 if (!Testing) {
3132 CopyOnWrite(salvinfo, dir);
3133 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3134 }
3135 return 0;
3136 }
3137 #ifdef AFS_AIX_ENV
3138 #ifndef AFS_NAMEI_ENV
3139 /* On AIX machines, don't allow entries to point to inode 0. That is a special
3140 * mount inode for the partition. If this inode were deleted, it would crash
3141 * the machine.
3142 */
3143 if (vnodeEssence->InodeNumber == 0) {
3144 Log("dir vnode %d: invalid entry: %s" OS_DIRSEP "%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
3145 if (!Testing) {
3146 CopyOnWrite(salvinfo, dir);
3147 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3148 }
3149 return 0;
3150 }
3151 #endif
3152 #endif
3153
3154 if (!(vnodeNumber & 1) && !Showmode
3155 && !(vnodeEssence->count || vnodeEssence->unique
3156 || vnodeEssence->modeBits)) {
3157 Log("dir vnode %u: invalid entry: %s" OS_DIRSEP "%s (vnode %u, unique %u)%s\n",
3158 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
3159 vnodeNumber, unique,
3160 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
3161 ""));
3162 if (!unique) {
3163 if (!Testing) {
3164 CopyOnWrite(salvinfo, dir);
3165 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3166 }
3167 return 0;
3168 }
3169 }
3170
3171 /* Check if the Uniquifiers match. If not, change the directory entry
3172 * so its unique matches the vnode unique. Delete if the unique is zero
3173 * or if the directory is orphaned.
3174 */
3175 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3176 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3177
3178 if (todelete
3179 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3180 if (dirOrphaned) {
3181 /* This is an orphaned directory. Don't delete the . or ..
3182 * entry. Otherwise, it will get created in the next
3183 * salvage and deleted again here. So Just skip it.
3184 * */
3185 return 0;
3186 }
3187 /* (vnodeEssence->unique == 0 && ('.' || '..'));
3188 * Entries arriving here should be deleted, but the directory
3189 * is not orphaned. Therefore, the entry must be pointing at
3190 * the wrong vnode. Skip the 'else' clause and fall through;
3191 * the code below will repair the entry so it correctly points
3192 * at the vnode of the current directory (if '.') or the parent
3193 * directory (if '..'). */
3194 } else {
3195 if (!Showmode) {
3196 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u): unique changed from %u to %u %s\n",
3197 dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique,
3198 vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3199 }
3200 if (!Testing) {
3201 AFSFid fid;
3202 fid.Vnode = vnodeNumber;
3203 fid.Unique = vnodeEssence->unique;
3204 CopyOnWrite(salvinfo, dir);
3205 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3206 if (!todelete)
3207 opr_Verify(afs_dir_Create(&dir->dirHandle, name, &fid) == 0);
3208 }
3209 if (todelete)
3210 return 0; /* no need to continue */
3211 }
3212 }
3213
3214 if (strcmp(name, ".") == 0) {
3215 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3216 if (!Showmode)
3217 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3218 if (!Testing) {
3219 AFSFid fid;
3220 CopyOnWrite(salvinfo, dir);
3221 opr_Verify(afs_dir_Delete(&dir->dirHandle, ".") == 0);
3222 fid.Vnode = dir->vnodeNumber;
3223 fid.Unique = dir->unique;
3224 opr_Verify(afs_dir_Create(&dir->dirHandle, ".", &fid) == 0);
3225 vnodeNumber = fid.Vnode; /* Get the new Essence */
3226 unique = fid.Unique;
3227 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3228 }
3229 }
3230 dir->haveDot = 1;
3231 } else if (strcmp(name, "..") == 0) {
3232 AFSFid pa;
3233 if (dir->parent) {
3234 struct VnodeEssence *dotdot;
3235 pa.Vnode = dir->parent;
3236 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3237 opr_Assert(dotdot != NULL); /* XXX Should not be assert */
3238 pa.Unique = dotdot->unique;
3239 } else {
3240 pa.Vnode = dir->vnodeNumber;
3241 pa.Unique = dir->unique;
3242 }
3243 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3244 if (!Showmode)
3245 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3246 if (!Testing) {
3247 CopyOnWrite(salvinfo, dir);
3248 opr_Verify(afs_dir_Delete(&dir->dirHandle, "..") == 0);
3249 opr_Verify(afs_dir_Create(&dir->dirHandle, "..", &pa) == 0);
3250 }
3251
3252 vnodeNumber = pa.Vnode; /* Get the new Essence */
3253 unique = pa.Unique;
3254 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3255 }
3256 dir->haveDotDot = 1;
3257 } else if (strncmp(name, ".__afs", 6) == 0) {
3258 if (!Showmode) {
3259 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3260 }
3261 if (!Testing) {
3262 CopyOnWrite(salvinfo, dir);
3263 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3264 }
3265 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3266 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3267 return 0;
3268 } else {
3269 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3270 Log("FOUND suid/sgid file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3271 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3272 && !(vnodeEssence->modeBits & 0111)) {
3273 afs_sfsize_t nBytes;
3274 afs_sfsize_t size;
3275 char buf[1025];
3276 IHandle_t *ihP;
3277 FdHandle_t *fdP;
3278
3279 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3280 vnodeEssence->InodeNumber);
3281 fdP = IH_OPEN(ihP);
3282 if (fdP == NULL) {
3283 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3284 IH_RELEASE(ihP);
3285 return 0;
3286 }
3287 size = FDH_SIZE(fdP);
3288 if (size < 0) {
3289 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3290 FDH_REALLYCLOSE(fdP);
3291 IH_RELEASE(ihP);
3292 return 0;
3293 }
3294
3295 if (size > 1024)
3296 size = 1024;
3297 nBytes = FDH_PREAD(fdP, buf, size, 0);
3298 if (nBytes == size) {
3299 buf[size] = '\0';
3300 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3301 Log("Volume %" AFS_VOLID_FMT " (%s) mount point %s" OS_DIRSEP "%s to '%s' invalid, %s to symbolic link\n",
3302 afs_printable_VolumeId_lu(dir->dirHandle.dirh_handle->ih_vid), dir->vname, dir->name ? dir->name : "??", name, buf,
3303 Testing ? "would convert" : "converted");
3304 vnodeEssence->modeBits |= 0111;
3305 vnodeEssence->changed = 1;
3306 } else if (ShowMounts)
3307 Log("In volume %" AFS_VOLID_FMT " (%s) found mountpoint %s" OS_DIRSEP "%s to '%s'\n",
3308 afs_printable_VolumeId_lu(dir->dirHandle.dirh_handle->ih_vid),
3309 dir->vname, dir->name ? dir->name : "??", name, buf);
3310 } else {
3311 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3312 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3313 }
3314 FDH_REALLYCLOSE(fdP);
3315 IH_RELEASE(ihP);
3316 }
3317 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3318 Log("FOUND root file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3319 if (vnodeIdToClass(vnodeNumber) == vLarge
3320 && vnodeEssence->name == NULL) {
3321 vnodeEssence->name = strdup(name);
3322 }
3323
3324 /* The directory entry points to the vnode. Check to see if the
3325 * vnode points back to the directory. If not, then let the
3326 * directory claim it (else it might end up orphaned). Vnodes
3327 * already claimed by another directory are deleted from this
3328 * directory: hardlinks to the same vnode are not allowed
3329 * from different directories.
3330 */
3331 if (vnodeEssence->parent != dir->vnodeNumber) {
3332 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3333 /* Vnode does not point back to this directory.
3334 * Orphaned dirs cannot claim a file (it may belong to
3335 * another non-orphaned dir).
3336 */
3337 if (!Showmode) {
3338 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3339 }
3340 vnodeEssence->parent = dir->vnodeNumber;
3341 vnodeEssence->changed = 1;
3342 } else {
3343 /* Vnode was claimed by another directory */
3344 if (!Showmode) {
3345 if (dirOrphaned) {
3346 Log("dir vnode %u: %s" OS_DIRSEP "%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3347 } else if (vnodeNumber == 1) {
3348 Log("dir vnode %d: %s" OS_DIRSEP "%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3349 } else {
3350 Log("dir vnode %u: %s" OS_DIRSEP "%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3351 }
3352 }
3353 if (!Testing) {
3354 CopyOnWrite(salvinfo, dir);
3355 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3356 }
3357 return 0;
3358 }
3359 }
3360 /* This directory claims the vnode */
3361 vnodeEssence->claimed = 1;
3362 }
3363 vnodeEssence->count--;
3364 return 0;
3365 }
3366
3367 void
3368 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3369 VnodeClass class, Inode ino, Unique * maxu)
3370 {
3371 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3372 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3373 char buf[SIZEOF_LARGEDISKVNODE];
3374 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3375 afs_sfsize_t size;
3376 StreamHandle_t *file;
3377 int vnodeIndex;
3378 int nVnodes;
3379 FdHandle_t *fdP;
3380
3381 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3382 fdP = IH_OPEN(vip->handle);
3383 opr_Assert(fdP != NULL);
3384 file = FDH_FDOPEN(fdP, "r+");
3385 opr_Assert(file != NULL);
3386 size = OS_SIZE(fdP->fd_fd);
3387 opr_Assert(size != -1);
3388 vip->nVnodes = (size / vcp->diskSize) - 1;
3389 if (vip->nVnodes > 0) {
3390 opr_Assert((vip->nVnodes + 1) * vcp->diskSize == size);
3391 opr_Verify(STREAM_ASEEK(file, vcp->diskSize) == 0);
3392 opr_Verify((vip->vnodes = calloc(vip->nVnodes,
3393 sizeof(struct VnodeEssence)))
3394 != NULL);
3395 if (class == vLarge) {
3396 opr_Verify((vip->inodes = calloc(vip->nVnodes, sizeof(Inode)))
3397 != NULL);
3398 } else {
3399 vip->inodes = NULL;
3400 }
3401 } else {
3402 vip->nVnodes = 0;
3403 vip->vnodes = NULL;
3404 vip->inodes = NULL;
3405 }
3406 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3407 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3408 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3409 nVnodes--, vnodeIndex++) {
3410 if (vnode->type != vNull) {
3411 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3412 afs_fsize_t vnodeLength;
3413 vip->nAllocatedVnodes++;
3414 vep->count = vnode->linkCount;
3415 VNDISK_GET_LEN(vnodeLength, vnode);
3416 vep->blockCount = nBlocks(vnodeLength);
3417 vip->volumeBlockCount += vep->blockCount;
3418 vep->parent = vnode->parent;
3419 vep->unique = vnode->uniquifier;
3420 if (*maxu < vnode->uniquifier)
3421 *maxu = vnode->uniquifier;
3422 vep->modeBits = vnode->modeBits;
3423 vep->InodeNumber = VNDISK_GET_INO(vnode);
3424 vep->type = vnode->type;
3425 vep->author = vnode->author;
3426 vep->owner = vnode->owner;
3427 vep->group = vnode->group;
3428 if (vnode->type == vDirectory) {
3429 if (class != vLarge) {
3430 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3431 vip->nAllocatedVnodes--;
3432 memset(vnode, 0, sizeof(*vnode));
3433 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3434 vnodeIndexOffset(vcp, vnodeNumber),
3435 (char *)&vnode, sizeof(vnode));
3436 salvinfo->VolumeChanged = 1;
3437 } else
3438 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3439 }
3440 }
3441 }
3442 STREAM_CLOSE(file);
3443 FDH_CLOSE(fdP);
3444 }
3445
3446 static char *
3447 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3448 char *path)
3449 {
3450 struct VnodeEssence *parentvp;
3451
3452 if (vnode == 1) {
3453 strcpy(path, ".");
3454 return path;
3455 }
3456 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3457 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3458 strcat(path, OS_DIRSEP);
3459 strcat(path, vp->name);
3460 return path;
3461 }
3462 return 0;
3463 }
3464
3465 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3466 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3467 */
3468 static int
3469 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3470 {
3471 struct VnodeEssence *vep;
3472
3473 if (vnode == 0)
3474 return (1); /* Vnode zero does not exist */
3475 if (vnode == 1)
3476 return (0); /* The root dir vnode is always claimed */
3477 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3478 if (!vep || !vep->claimed)
3479 return (1); /* Vnode is not claimed - it is orphaned */
3480
3481 return (IsVnodeOrphaned(salvinfo, vep->parent));
3482 }
3483
3484 void
3485 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3486 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3487 struct DirSummary *rootdir, int *rootdirfound)
3488 {
3489 static struct DirSummary dir;
3490 static struct DirHandle dirHandle;
3491 struct VnodeEssence *parent;
3492 static char path[MAXPATHLEN];
3493 int dirok, code;
3494
3495 if (dirVnodeInfo->vnodes[i].salvaged)
3496 return; /* already salvaged */
3497
3498 dir.rwVid = rwVid;
3499 dirVnodeInfo->vnodes[i].salvaged = 1;
3500
3501 if (dirVnodeInfo->inodes[i] == 0)
3502 return; /* Not allocated to a directory */
3503
3504 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3505 if (dirVnodeInfo->vnodes[i].parent) {
3506 Log("Bad parent, vnode 1; %s...\n",
3507 (Testing ? "skipping" : "salvaging"));
3508 dirVnodeInfo->vnodes[i].parent = 0;
3509 dirVnodeInfo->vnodes[i].changed = 1;
3510 }
3511 } else {
3512 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3513 if (parent && parent->salvaged == 0)
3514 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3515 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3516 rootdir, rootdirfound);
3517 }
3518
3519 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3520 dir.unique = dirVnodeInfo->vnodes[i].unique;
3521 dir.copied = 0;
3522 dir.vname = name;
3523 dir.parent = dirVnodeInfo->vnodes[i].parent;
3524 dir.haveDot = dir.haveDotDot = 0;
3525 dir.ds_linkH = alinkH;
3526 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3527 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3528
3529 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3530 if (!dirok) {
3531 if (!RebuildDirs) {
3532 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3533 (Testing ? "skipping" : "salvaging"));
3534 }
3535 if (!Testing) {
3536 CopyAndSalvage(salvinfo, &dir);
3537 dirok = 1;
3538 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3539 }
3540 }
3541 dirHandle = dir.dirHandle;
3542
3543 dir.name =
3544 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3545 &dirVnodeInfo->vnodes[i], path);
3546
3547 if (dirok) {
3548 /* If enumeration failed for random reasons, we will probably delete
3549 * too much stuff, so we guard against this instead.
3550 */
3551 struct judgeEntry_params judge_params;
3552 judge_params.salvinfo = salvinfo;
3553 judge_params.dir = &dir;
3554
3555 opr_Verify(afs_dir_EnumerateDir(&dirHandle, JudgeEntry,
3556 &judge_params) == 0);
3557 }
3558
3559 /* Delete the old directory if it was copied in order to salvage.
3560 * CopyOnWrite has written the new inode # to the disk, but we still
3561 * have the old one in our local structure here. Thus, we idec the
3562 * local dude.
3563 */
3564 DFlush();
3565 if (dir.copied && !Testing) {
3566 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3567 opr_Assert(code == 0);
3568 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3569 }
3570
3571 /* Remember rootdir DirSummary _after_ it has been judged */
3572 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3573 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3574 *rootdirfound = 1;
3575 }
3576
3577 return;
3578 }
3579
3580 /**
3581 * Get a new FID that can be used to create a new file.
3582 *
3583 * @param[in] volHeader vol header for the volume
3584 * @param[in] class what type of vnode we'll be creating (vLarge or vSmall)
3585 * @param[out] afid the FID that we can use (only Vnode and Unique are set)
3586 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3587 * updated to the new max unique if we create a new
3588 * vnode
3589 */
3590 static void
3591 GetNewFID(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3592 VnodeClass class, AFSFid *afid, Unique *maxunique)
3593 {
3594 int i;
3595 for (i = 0; i < salvinfo->vnodeInfo[class].nVnodes; i++) {
3596 if (salvinfo->vnodeInfo[class].vnodes[i].type == vNull) {
3597 break;
3598 }
3599 }
3600 if (i == salvinfo->vnodeInfo[class].nVnodes) {
3601 /* no free vnodes; make a new one */
3602 salvinfo->vnodeInfo[class].nVnodes++;
3603 salvinfo->vnodeInfo[class].vnodes =
3604 realloc(salvinfo->vnodeInfo[class].vnodes,
3605 sizeof(struct VnodeEssence) * (i+1));
3606
3607 salvinfo->vnodeInfo[class].vnodes[i].type = vNull;
3608 }
3609
3610 afid->Vnode = bitNumberToVnodeNumber(i, class);
3611
3612 if (volHeader->uniquifier < (*maxunique + 1)) {
3613 /* header uniq is bad; it will get bumped by 2000 later */
3614 afid->Unique = *maxunique + 1 + 2000;
3615 (*maxunique)++;
3616 } else {
3617 /* header uniq seems okay; just use that */
3618 afid->Unique = *maxunique = volHeader->uniquifier++;
3619 }
3620 }
3621
3622 /**
3623 * Create a vnode for a README file explaining not to use a recreated-root vol.
3624 *
3625 * @param[in] volHeader vol header for the volume
3626 * @param[in] alinkH ihandle for i/o for the volume
3627 * @param[in] vid volume id
3628 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3629 * updated to the new max unique if we create a new
3630 * vnode
3631 * @param[out] afid FID for the new readme vnode
3632 * @param[out] ainode the inode for the new readme file
3633 *
3634 * @return operation status
3635 * @retval 0 success
3636 * @retval -1 error
3637 */
3638 static int
3639 CreateReadme(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3640 IHandle_t *alinkH, VolumeId vid, Unique *maxunique, AFSFid *afid,
3641 Inode *ainode)
3642 {
3643 Inode readmeinode;
3644 struct VnodeDiskObject *rvnode = NULL;
3645 afs_sfsize_t bytes;
3646 IHandle_t *readmeH = NULL;
3647 struct VnodeEssence *vep;
3648 afs_fsize_t length;
3649 time_t now = time(NULL);
3650
3651 /* Try to make the note brief, but informative. Only administrators should
3652 * be able to read this file at first, so we can hopefully assume they
3653 * know what AFS is, what a volume is, etc. */
3654 char readme[] =
3655 "This volume has been salvaged, but has lost its original root directory.\n"
3656 "The root directory that exists now has been recreated from orphan files\n"
3657 "from the rest of the volume. This recreated root directory may interfere\n"
3658 "with old cached data on clients, and there is no way the salvager can\n"
3659 "reasonably prevent that. So, it is recommended that you do not continue to\n"
3660 "use this volume, but only copy the salvaged data to a new volume.\n"
3661 "Continuing to use this volume as it exists now may cause some clients to\n"
3662 "behave oddly when accessing this volume.\n"
3663 "\n\t -- Your friendly neighborhood OpenAFS salvager\n";
3664 /* ^ the person reading this probably just lost some data, so they could
3665 * use some cheering up. */
3666
3667 /* -1 for the trailing NUL */
3668 length = sizeof(readme) - 1;
3669
3670 GetNewFID(salvinfo, volHeader, vSmall, afid, maxunique);
3671
3672 vep = &salvinfo->vnodeInfo[vSmall].vnodes[vnodeIdToBitNumber(afid->Vnode)];
3673
3674 /* create the inode and write the contents */
3675 readmeinode = IH_CREATE(alinkH, salvinfo->fileSysDevice,
3676 salvinfo->fileSysPath, 0, vid,
3677 afid->Vnode, afid->Unique, 1);
3678 if (!VALID_INO(readmeinode)) {
3679 Log("CreateReadme: readme IH_CREATE failed\n");
3680 goto error;
3681 }
3682
3683 IH_INIT(readmeH, salvinfo->fileSysDevice, vid, readmeinode);
3684 bytes = IH_IWRITE(readmeH, 0, readme, length);
3685 IH_RELEASE(readmeH);
3686
3687 if (bytes != length) {
3688 Log("CreateReadme: IWRITE failed (%d/%d)\n", (int)bytes,
3689 (int)sizeof(readme));
3690 goto error;
3691 }
3692
3693 /* create the vnode and write it out */
3694 rvnode = calloc(1, SIZEOF_SMALLDISKVNODE);
3695 if (!rvnode) {
3696 Log("CreateRootDir: error alloc'ing memory\n");
3697 goto error;
3698 }
3699
3700 rvnode->type = vFile;
3701 rvnode->cloned = 0;
3702 rvnode->modeBits = 0777;
3703 rvnode->linkCount = 1;
3704 VNDISK_SET_LEN(rvnode, length);
3705 rvnode->uniquifier = afid->Unique;
3706 rvnode->dataVersion = 1;
3707 VNDISK_SET_INO(rvnode, readmeinode);
3708 rvnode->unixModifyTime = rvnode->serverModifyTime = now;
3709 rvnode->author = 0;
3710 rvnode->owner = 0;
3711 rvnode->parent = 1;
3712 rvnode->group = 0;
3713 rvnode->vnodeMagic = VnodeClassInfo[vSmall].magic;
3714
3715 bytes = IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3716 vnodeIndexOffset(&VnodeClassInfo[vSmall], afid->Vnode),
3717 (char*)rvnode, SIZEOF_SMALLDISKVNODE);
3718
3719 if (bytes != SIZEOF_SMALLDISKVNODE) {
3720 Log("CreateReadme: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3721 (int)SIZEOF_SMALLDISKVNODE);
3722 goto error;
3723 }
3724
3725 /* update VnodeEssence for new readme vnode */
3726 salvinfo->vnodeInfo[vSmall].nAllocatedVnodes++;
3727 vep->count = 0;
3728 vep->blockCount = nBlocks(length);
3729 salvinfo->vnodeInfo[vSmall].volumeBlockCount += vep->blockCount;
3730 vep->parent = rvnode->parent;
3731 vep->unique = rvnode->uniquifier;
3732 vep->modeBits = rvnode->modeBits;
3733 vep->InodeNumber = VNDISK_GET_INO(rvnode);
3734 vep->type = rvnode->type;
3735 vep->author = rvnode->author;
3736 vep->owner = rvnode->owner;
3737 vep->group = rvnode->group;
3738
3739 free(rvnode);
3740 rvnode = NULL;
3741
3742 vep->claimed = 1;
3743 vep->changed = 0;
3744 vep->salvaged = 1;
3745 vep->todelete = 0;
3746
3747 *ainode = readmeinode;
3748
3749 return 0;
3750
3751 error:
3752 if (IH_DEC(alinkH, readmeinode, vid)) {
3753 Log("CreateReadme (recovery): IH_DEC failed\n");
3754 }
3755
3756 if (rvnode) {
3757 free(rvnode);
3758 rvnode = NULL;
3759 }
3760
3761 return -1;
3762 }
3763
3764 /**
3765 * create a root dir for a volume that lacks one.
3766 *
3767 * @param[in] volHeader vol header for the volume
3768 * @param[in] alinkH ihandle for disk access for this volume group
3769 * @param[in] vid volume id we're dealing with
3770 * @param[out] rootdir populated with info about the new root dir
3771 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3772 * updated to the new max unique if we create a new
3773 * vnode
3774 *
3775 * @return operation status
3776 * @retval 0 success
3777 * @retval -1 error
3778 */
3779 static int
3780 CreateRootDir(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3781 IHandle_t *alinkH, VolumeId vid, struct DirSummary *rootdir,
3782 Unique *maxunique)
3783 {
3784 FileVersion dv;
3785 int decroot = 0, decreadme = 0;
3786 AFSFid did, readmeid;
3787 afs_fsize_t length;
3788 Inode rootinode;
3789 struct VnodeDiskObject *rootvnode = NULL;
3790 struct acl_accessList *ACL;
3791 Inode *ip;
3792 afs_sfsize_t bytes;
3793 struct VnodeEssence *vep;
3794 Inode readmeinode = 0;
3795 time_t now = time(NULL);
3796
3797 if (!salvinfo->vnodeInfo[vLarge].vnodes && !salvinfo->vnodeInfo[vSmall].vnodes) {
3798 Log("Not creating new root dir; volume appears to lack any vnodes\n");
3799 goto error;
3800 }
3801
3802 if (!salvinfo->vnodeInfo[vLarge].vnodes) {
3803 /* We don't have any large vnodes in the volume; allocate room
3804 * for one so we can recreate the root dir */
3805 salvinfo->vnodeInfo[vLarge].nVnodes = 1;
3806 salvinfo->vnodeInfo[vLarge].vnodes = calloc(1, sizeof(struct VnodeEssence));
3807 salvinfo->vnodeInfo[vLarge].inodes = calloc(1, sizeof(Inode));
3808
3809 opr_Assert(salvinfo->vnodeInfo[vLarge].vnodes);
3810 opr_Assert(salvinfo->vnodeInfo[vLarge].inodes);
3811 }
3812
3813 vep = &salvinfo->vnodeInfo[vLarge].vnodes[vnodeIdToBitNumber(1)];
3814 ip = &salvinfo->vnodeInfo[vLarge].inodes[vnodeIdToBitNumber(1)];
3815 if (vep->type != vNull) {
3816 Log("Not creating new root dir; existing vnode 1 is non-null\n");
3817 goto error;
3818 }
3819
3820 if (CreateReadme(salvinfo, volHeader, alinkH, vid, maxunique, &readmeid,
3821 &readmeinode) != 0) {
3822 goto error;
3823 }
3824 decreadme = 1;
3825
3826 /* set the DV to a very high number, so it is unlikely that we collide
3827 * with a cached DV */
3828 dv = 1 << 30;
3829
3830 rootinode = IH_CREATE(alinkH, salvinfo->fileSysDevice, salvinfo->fileSysPath,
3831 0, vid, 1, 1, dv);
3832 if (!VALID_INO(rootinode)) {
3833 Log("CreateRootDir: IH_CREATE failed\n");
3834 goto error;
3835 }
3836 decroot = 1;
3837
3838 SetSalvageDirHandle(&rootdir->dirHandle, vid, salvinfo->fileSysDevice,
3839 rootinode, &salvinfo->VolumeChanged);
3840 did.Volume = vid;
3841 did.Vnode = 1;
3842 did.Unique = 1;
3843 if (afs_dir_MakeDir(&rootdir->dirHandle, (afs_int32*)&did, (afs_int32*)&did)) {
3844 Log("CreateRootDir: MakeDir failed\n");
3845 goto error;
3846 }
3847 if (afs_dir_Create(&rootdir->dirHandle, "README.ROOTDIR", &readmeid)) {
3848 Log("CreateRootDir: Create failed\n");
3849 goto error;
3850 }
3851 DFlush();
3852 length = afs_dir_Length(&rootdir->dirHandle);
3853 DZap(&rootdir->dirHandle);
3854
3855 /* create the new root dir vnode */
3856 rootvnode = calloc(1, SIZEOF_LARGEDISKVNODE);
3857 if (!rootvnode) {
3858 Log("CreateRootDir: malloc failed\n");
3859 goto error;
3860 }
3861
3862 /* only give 'rl' permissions to 'system:administrators'. We do this to
3863 * try to catch the attention of an administrator, that they should not
3864 * be writing to this directory or continue to use it. */
3865 ACL = VVnodeDiskACL(rootvnode);
3866 ACL->size = sizeof(struct acl_accessList);
3867 ACL->version = ACL_ACLVERSION;
3868 ACL->total = 1;
3869 ACL->positive = 1;
3870 ACL->negative = 0;
3871 ACL->entries[0].id = -204; /* system:administrators */
3872 ACL->entries[0].rights = PRSFS_READ | PRSFS_LOOKUP;
3873
3874 rootvnode->type = vDirectory;
3875 rootvnode->cloned = 0;
3876 rootvnode->modeBits = 0777;
3877 rootvnode->linkCount = 2;
3878 VNDISK_SET_LEN(rootvnode, length);
3879 rootvnode->uniquifier = 1;
3880 rootvnode->dataVersion = dv;
3881 VNDISK_SET_INO(rootvnode, rootinode);
3882 rootvnode->unixModifyTime = rootvnode->serverModifyTime = now;
3883 rootvnode->author = 0;
3884 rootvnode->owner = 0;
3885 rootvnode->parent = 0;
3886 rootvnode->group = 0;
3887 rootvnode->vnodeMagic = VnodeClassInfo[vLarge].magic;
3888
3889 /* write it out to disk */
3890 bytes = IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3891 vnodeIndexOffset(&VnodeClassInfo[vLarge], 1),
3892 (char*)rootvnode, SIZEOF_LARGEDISKVNODE);
3893
3894 if (bytes != SIZEOF_LARGEDISKVNODE) {
3895 /* just cast to int and don't worry about printing real 64-bit ints;
3896 * a large disk vnode isn't anywhere near the 32-bit limit */
3897 Log("CreateRootDir: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3898 (int)SIZEOF_LARGEDISKVNODE);
3899 goto error;
3900 }
3901
3902 /* update VnodeEssence for the new root vnode */
3903 salvinfo->vnodeInfo[vLarge].nAllocatedVnodes++;
3904 vep->count = 0;
3905 vep->blockCount = nBlocks(length);
3906 salvinfo->vnodeInfo[vLarge].volumeBlockCount += vep->blockCount;
3907 vep->parent = rootvnode->parent;
3908 vep->unique = rootvnode->uniquifier;
3909 vep->modeBits = rootvnode->modeBits;
3910 vep->InodeNumber = VNDISK_GET_INO(rootvnode);
3911 vep->type = rootvnode->type;
3912 vep->author = rootvnode->author;
3913 vep->owner = rootvnode->owner;
3914 vep->group = rootvnode->group;
3915
3916 free(rootvnode);
3917 rootvnode = NULL;
3918
3919 vep->claimed = 0;
3920 vep->changed = 0;
3921 vep->salvaged = 1;
3922 vep->todelete = 0;
3923
3924 /* update DirSummary for the new root vnode */
3925 rootdir->vnodeNumber = 1;
3926 rootdir->unique = 1;
3927 rootdir->haveDot = 1;
3928 rootdir->haveDotDot = 1;
3929 rootdir->rwVid = vid;
3930 rootdir->copied = 0;
3931 rootdir->parent = 0;
3932 rootdir->name = strdup(".");
3933 rootdir->vname = volHeader->name;
3934 rootdir->ds_linkH = alinkH;
3935
3936 *ip = rootinode;
3937
3938 return 0;
3939
3940 error:
3941 if (decroot && IH_DEC(alinkH, rootinode, vid)) {
3942 Log("CreateRootDir (recovery): IH_DEC (root) failed\n");
3943 }
3944 if (decreadme && IH_DEC(alinkH, readmeinode, vid)) {
3945 Log("CreateRootDir (recovery): IH_DEC (readme) failed\n");
3946 }
3947 if (rootvnode) {
3948 free(rootvnode);
3949 rootvnode = NULL;
3950 }
3951 return -1;
3952 }
3953
3954 /**
3955 * salvage a volume group.
3956 *
3957 * @param[in] salvinfo information for the curent salvage job
3958 * @param[in] rwIsp inode summary for rw volume
3959 * @param[in] alinkH link table inode handle
3960 *
3961 * @return operation status
3962 * @retval 0 success
3963 */
3964 int
3965 SalvageVolume(struct SalvInfo *salvinfo, struct InodeSummary *rwIsp, IHandle_t * alinkH)
3966 {
3967 /* This routine, for now, will only be called for read-write volumes */
3968 int i, j, code;
3969 int BlocksInVolume = 0, FilesInVolume = 0;
3970 VnodeClass class;
3971 struct DirSummary rootdir, oldrootdir;
3972 struct VnodeInfo *dirVnodeInfo;
3973 struct VnodeDiskObject vnode;
3974 VolumeDiskData volHeader;
3975 VolumeId vid;
3976 int orphaned, rootdirfound = 0;
3977 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3978 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3979 struct VnodeEssence *vep;
3980 afs_int32 v, pv;
3981 IHandle_t *h;
3982 afs_sfsize_t nBytes;
3983 AFSFid pa;
3984 VnodeId LFVnode, ThisVnode;
3985 Unique LFUnique, ThisUnique;
3986 char npath[128];
3987 int newrootdir = 0;
3988
3989 vid = rwIsp->volSummary->header.id;
3990 IH_INIT(h, salvinfo->fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3991 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3992 opr_Assert(nBytes == sizeof(volHeader));
3993 opr_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3994 opr_Assert(volHeader.destroyMe != DESTROY_ME);
3995 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3996
3997 DistilVnodeEssence(salvinfo, vid, vLarge,
3998 rwIsp->volSummary->header.largeVnodeIndex, &maxunique);
3999 DistilVnodeEssence(salvinfo, vid, vSmall,
4000 rwIsp->volSummary->header.smallVnodeIndex, &maxunique);
4001
4002 dirVnodeInfo = &salvinfo->vnodeInfo[vLarge];
4003 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
4004 SalvageDir(salvinfo, volHeader.name, vid, dirVnodeInfo, alinkH, i,
4005 &rootdir, &rootdirfound);
4006 }
4007 #ifdef AFS_NT40_ENV
4008 nt_sync(salvinfo->fileSysDevice);
4009 #else
4010 sync(); /* This used to be done lower level, for every dir */
4011 #endif
4012 if (Showmode) {
4013 IH_RELEASE(h);
4014 return 0;
4015 }
4016
4017 if (!rootdirfound && (orphans == ORPH_ATTACH) && !Testing) {
4018
4019 Log("Cannot find root directory for volume %lu; attempting to create "
4020 "a new one\n", afs_printable_uint32_lu(vid));
4021
4022 code = CreateRootDir(salvinfo, &volHeader, alinkH, vid, &rootdir,
4023 &maxunique);
4024 if (code == 0) {
4025 rootdirfound = 1;
4026 newrootdir = 1;
4027 salvinfo->VolumeChanged = 1;
4028 }
4029 }
4030
4031 /* Parse each vnode looking for orphaned vnodes and
4032 * connect them to the tree as orphaned (if requested).
4033 */
4034 oldrootdir = rootdir;
4035 for (class = 0; class < nVNODECLASSES; class++) {
4036 for (v = 0; v < salvinfo->vnodeInfo[class].nVnodes; v++) {
4037 vep = &(salvinfo->vnodeInfo[class].vnodes[v]);
4038 ThisVnode = bitNumberToVnodeNumber(v, class);
4039 ThisUnique = vep->unique;
4040
4041 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
4042 continue; /* Ignore unused, claimed, and root vnodes */
4043
4044 /* This vnode is orphaned. If it is a directory vnode, then the '..'
4045 * entry in this vnode had incremented the parent link count (In
4046 * JudgeEntry()). We need to go to the parent and decrement that
4047 * link count. But if the parent's unique is zero, then the parent
4048 * link count was not incremented in JudgeEntry().
4049 */
4050 if (class == vLarge) { /* directory vnode */
4051 pv = vnodeIdToBitNumber(vep->parent);
4052 if (salvinfo->vnodeInfo[vLarge].vnodes[pv].unique != 0) {
4053 if (vep->parent == 1 && newrootdir) {
4054 /* this vnode's parent was the volume root, and
4055 * we just created the volume root. So, the parent
4056 * dir didn't exist during JudgeEntry, so the link
4057 * count was not inc'd there, so don't dec it here.
4058 */
4059
4060 /* noop */
4061
4062 } else {
4063 salvinfo->vnodeInfo[vLarge].vnodes[pv].count++;
4064 }
4065 }
4066 }
4067
4068 if (!rootdirfound)
4069 continue; /* If no rootdir, can't attach orphaned files */
4070
4071 /* Here we attach orphaned files and directories into the
4072 * root directory, LVVnode, making sure link counts stay correct.
4073 */
4074 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
4075 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
4076 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
4077
4078 /* Update this orphaned vnode's info. Its parent info and
4079 * link count (do for orphaned directories and files).
4080 */
4081 vep->parent = LFVnode; /* Parent is the root dir */
4082 vep->unique = LFUnique;
4083 vep->changed = 1;
4084 vep->claimed = 1;
4085 vep->count--; /* Inc link count (root dir will pt to it) */
4086
4087 /* If this orphaned vnode is a directory, change '..'.
4088 * The name of the orphaned dir/file is unknown, so we
4089 * build a unique name. No need to CopyOnWrite the directory
4090 * since it is not connected to tree in BK or RO volume and
4091 * won't be visible there.
4092 */
4093 if (class == vLarge) {
4094 AFSFid pa;
4095 DirHandle dh;
4096
4097 /* Remove and recreate the ".." entry in this orphaned directory */
4098 SetSalvageDirHandle(&dh, vid, salvinfo->fileSysDevice,
4099 salvinfo->vnodeInfo[class].inodes[v],
4100 &salvinfo->VolumeChanged);
4101 pa.Vnode = LFVnode;
4102 pa.Unique = LFUnique;
4103 opr_Verify(afs_dir_Delete(&dh, "..") == 0);
4104 opr_Verify(afs_dir_Create(&dh, "..", &pa) == 0);
4105
4106 /* The original parent's link count was decremented above.
4107 * Here we increment the new parent's link count.
4108 */
4109 pv = vnodeIdToBitNumber(LFVnode);
4110 salvinfo->vnodeInfo[vLarge].vnodes[pv].count--;
4111
4112 }
4113
4114 /* Go to the root dir and add this entry. The link count of the
4115 * root dir was incremented when ".." was created. Try 10 times.
4116 */
4117 for (j = 0; j < 10; j++) {
4118 pa.Vnode = ThisVnode;
4119 pa.Unique = ThisUnique;
4120
4121 snprintf(npath, sizeof npath, "%s.%u.%u",
4122 ((class == vLarge) ? "__ORPHANDIR__"
4123 : "__ORPHANFILE__"),
4124 ThisVnode, ThisUnique);
4125
4126 CopyOnWrite(salvinfo, &rootdir);
4127 code = afs_dir_Create(&rootdir.dirHandle, npath, &pa);
4128 if (!code)
4129 break;
4130
4131 ThisUnique += 50; /* Try creating a different file */
4132 }
4133 opr_Assert(code == 0);
4134 Log("Attaching orphaned %s to volume's root dir as %s\n",
4135 ((class == vLarge) ? "directory" : "file"), npath);
4136 }
4137 } /* for each vnode in the class */
4138 } /* for each class of vnode */
4139
4140 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
4141 DFlush();
4142 if (rootdirfound && !oldrootdir.copied && rootdir.copied) {
4143 code =
4144 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
4145 oldrootdir.rwVid);
4146 opr_Assert(code == 0);
4147 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
4148 }
4149
4150 DFlush(); /* Flush the changes */
4151 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
4152 Log("Cannot attach orphaned files and directories: Root directory not found\n");
4153 orphans = ORPH_IGNORE;
4154 }
4155
4156 /* Write out all changed vnodes. Orphaned files and directories
4157 * will get removed here also (if requested).
4158 */
4159 for (class = 0; class < nVNODECLASSES; class++) {
4160 afs_sfsize_t nVnodes = salvinfo->vnodeInfo[class].nVnodes;
4161 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
4162 struct VnodeEssence *vnodes = salvinfo->vnodeInfo[class].vnodes;
4163 FilesInVolume += salvinfo->vnodeInfo[class].nAllocatedVnodes;
4164 BlocksInVolume += salvinfo->vnodeInfo[class].volumeBlockCount;
4165 for (i = 0; i < nVnodes; i++) {
4166 struct VnodeEssence *vnp = &vnodes[i];
4167 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
4168
4169 /* If the vnode is good but is unclaimed (not listed in
4170 * any directory entries), then it is orphaned.
4171 */
4172 orphaned = -1;
4173 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber))) {
4174 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
4175 vnp->changed = 1;
4176 }
4177
4178 if (vnp->changed || vnp->count) {
4179 int oldCount;
4180 nBytes =
4181 IH_IREAD(salvinfo->vnodeInfo[class].handle,
4182 vnodeIndexOffset(vcp, vnodeNumber),
4183 (char *)&vnode, sizeof(vnode));
4184 opr_Assert(nBytes == sizeof(vnode));
4185
4186 vnode.parent = vnp->parent;
4187 oldCount = vnode.linkCount;
4188 vnode.linkCount = vnode.linkCount - vnp->count;
4189
4190 if (orphaned == -1)
4191 orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber);
4192 if (orphaned) {
4193 if (!vnp->todelete) {
4194 /* Orphans should have already been attached (if requested) */
4195 opr_Assert(orphans != ORPH_ATTACH);
4196 oblocks += vnp->blockCount;
4197 ofiles++;
4198 }
4199 if (((orphans == ORPH_REMOVE) || vnp->todelete)
4200 && !Testing) {
4201 BlocksInVolume -= vnp->blockCount;
4202 FilesInVolume--;
4203 if (VNDISK_GET_INO(&vnode)) {
4204 code =
4205 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
4206 opr_Assert(code == 0);
4207 }
4208 memset(&vnode, 0, sizeof(vnode));
4209 }
4210 } else if (vnp->count) {
4211 if (!Showmode) {
4212 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
4213 }
4214 } else {
4215 vnode.modeBits = vnp->modeBits;
4216 }
4217
4218 vnode.dataVersion++;
4219 if (!Testing) {
4220 nBytes =
4221 IH_IWRITE(salvinfo->vnodeInfo[class].handle,
4222 vnodeIndexOffset(vcp, vnodeNumber),
4223 (char *)&vnode, sizeof(vnode));
4224 opr_Assert(nBytes == sizeof(vnode));
4225 }
4226 salvinfo->VolumeChanged = 1;
4227 }
4228 }
4229 }
4230 if (!Showmode && ofiles) {
4231 Log("%s %d orphaned files and directories (approx. %u KB)\n",
4232 (!Testing
4233 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
4234 oblocks);
4235 }
4236
4237 for (class = 0; class < nVNODECLASSES; class++) {
4238 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
4239 for (i = 0; i < vip->nVnodes; i++)
4240 if (vip->vnodes[i].name)
4241 free(vip->vnodes[i].name);
4242 if (vip->vnodes)
4243 free(vip->vnodes);
4244 if (vip->inodes)
4245 free(vip->inodes);
4246 }
4247
4248 /* Set correct resource utilization statistics */
4249 volHeader.filecount = FilesInVolume;
4250 volHeader.diskused = BlocksInVolume;
4251
4252 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
4253 if (volHeader.uniquifier < (maxunique + 1)) {
4254 if (!Showmode)
4255 Log("Volume uniquifier %u is too low (max uniq %u); fixed\n", volHeader.uniquifier, maxunique);
4256 /* Plus 2,000 in case there are workstations out there with
4257 * cached vnodes that have since been deleted
4258 */
4259 volHeader.uniquifier = (maxunique + 1 + 2000);
4260 }
4261
4262 if (newrootdir) {
4263 Log("*** WARNING: Root directory recreated, but volume is fragile! "
4264 "Only use this salvaged volume to copy data to another volume; "
4265 "do not continue to use this volume (%lu) as-is.\n",
4266 afs_printable_uint32_lu(vid));
4267 }
4268
4269 if (!Testing && salvinfo->VolumeChanged) {
4270 #ifdef FSSYNC_BUILD_CLIENT
4271 if (salvinfo->useFSYNC) {
4272 afs_int32 fsync_code;
4273
4274 fsync_code = FSYNC_VolOp(vid, NULL, FSYNC_VOL_BREAKCBKS, FSYNC_SALVAGE, NULL);
4275 if (fsync_code) {
4276 Log("Error trying to tell the fileserver to break callbacks for "
4277 "changed volume %lu; error code %ld\n",
4278 afs_printable_uint32_lu(vid),
4279 afs_printable_int32_ld(fsync_code));
4280 } else {
4281 salvinfo->VolumeChanged = 0;
4282 }
4283 }
4284 #endif /* FSSYNC_BUILD_CLIENT */
4285
4286 #ifdef AFS_DEMAND_ATTACH_FS
4287 if (!salvinfo->useFSYNC) {
4288 /* A volume's contents have changed, but the fileserver will not
4289 * break callbacks on the volume until it tries to load the vol
4290 * header. So, to reduce the amount of time a client could have
4291 * stale data, remove fsstate.dat, so the fileserver will init
4292 * callback state with all clients. This is a very coarse hammer,
4293 * and in the future we should just record which volumes have
4294 * changed. */
4295 code = unlink(AFSDIR_SERVER_FSSTATE_FILEPATH);
4296 if (code && errno != ENOENT) {
4297 Log("Error %d when trying to unlink FS state file %s\n", errno,
4298 AFSDIR_SERVER_FSSTATE_FILEPATH);
4299 }
4300 }
4301 #endif
4302 }
4303
4304 /* Turn off the inUse bit; the volume's been salvaged! */
4305 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
4306 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
4307 volHeader.inService = 1; /* allow service again */
4308 if (salvinfo->VolumeChanged) {
4309 volHeader.needsCallback = 1;
4310 volHeader.updateDate = time(NULL);
4311 } else {
4312 volHeader.needsCallback = 0;
4313 }
4314 volHeader.dontSalvage = DONT_SALVAGE;
4315 salvinfo->VolumeChanged = 0;
4316 if (!Testing) {
4317 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4318 opr_Assert(nBytes == sizeof(volHeader));
4319 }
4320 if (!Showmode) {
4321 Log("%sSalvaged %s (%" AFS_VOLID_FMT "): %d files, %d blocks\n",
4322 (Testing ? "It would have " : ""), volHeader.name, afs_printable_VolumeId_lu(volHeader.id),
4323 FilesInVolume, BlocksInVolume);
4324 }
4325
4326 IH_RELEASE(salvinfo->vnodeInfo[vSmall].handle);
4327 IH_RELEASE(salvinfo->vnodeInfo[vLarge].handle);
4328 IH_RELEASE(h);
4329 return 0;
4330 }
4331
4332 void
4333 ClearROInUseBit(struct VolumeSummary *summary)
4334 {
4335 IHandle_t *h = summary->volumeInfoHandle;
4336 afs_sfsize_t nBytes;
4337
4338 VolumeDiskData volHeader;
4339
4340 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
4341 opr_Assert(nBytes == sizeof(volHeader));
4342 opr_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
4343 volHeader.inUse = 0;
4344 volHeader.needsSalvaged = 0;
4345 volHeader.inService = 1;
4346 volHeader.dontSalvage = DONT_SALVAGE;
4347 if (!Testing) {
4348 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4349 opr_Assert(nBytes == sizeof(volHeader));
4350 }
4351 }
4352
4353 /* MaybeZapVolume
4354 * Possible delete the volume.
4355 *
4356 * deleteMe - Always do so, only a partial volume.
4357 */
4358 void
4359 MaybeZapVolume(struct SalvInfo *salvinfo, struct InodeSummary *isp,
4360 char *message, int deleteMe, int check)
4361 {
4362 if (readOnly(isp) || deleteMe) {
4363 if (isp->volSummary && !isp->volSummary->deleted) {
4364 if (deleteMe) {
4365 if (!Showmode)
4366 Log("Volume %" AFS_VOLID_FMT " (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", afs_printable_VolumeId_lu(isp->volumeId));
4367 if (!Showmode)
4368 Log("It will be deleted on this server (you may find it elsewhere)\n");
4369 } else {
4370 if (!Showmode)
4371 Log("Volume %" AFS_VOLID_FMT " needs to be salvaged. Since it is read-only, however,\n", afs_printable_VolumeId_lu(isp->volumeId));
4372 if (!Showmode)
4373 Log("it will be deleted instead. It should be recloned.\n");
4374 }
4375 if (!Testing) {
4376 afs_int32 code;
4377 char path[64];
4378 char filename[VMAXPATHLEN];
4379 VolumeExternalName_r(isp->volumeId, filename, sizeof(filename));
4380 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, filename);
4381
4382 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, isp->volumeId, isp->RWvolumeId);
4383 if (code) {
4384 Log("Error %ld destroying volume disk header for volume %" AFS_VOLID_FMT "\n",
4385 afs_printable_int32_ld(code),
4386 afs_printable_VolumeId_lu(isp->volumeId));
4387 }
4388
4389 /* make sure we actually delete the header file; ENOENT
4390 * is fine, since VDestroyVolumeDiskHeader probably already
4391 * unlinked it */
4392 if (unlink(path) && errno != ENOENT) {
4393 Log("Unable to unlink %s (errno = %d)\n", path, errno);
4394 }
4395 if (salvinfo->useFSYNC) {
4396 AskDelete(salvinfo, isp->volumeId);
4397 }
4398 isp->volSummary->deleted = 1;
4399 }
4400 }
4401 } else if (!check) {
4402 Log("%s salvage was unsuccessful: read-write volume %" AFS_VOLID_FMT "\n", message,
4403 afs_printable_VolumeId_lu(isp->volumeId));
4404 Abort("Salvage of volume %" AFS_VOLID_FMT " aborted\n", afs_printable_VolumeId_lu(isp->volumeId));
4405 }
4406 }
4407
4408 #ifdef AFS_DEMAND_ATTACH_FS
4409 /**
4410 * Locks a volume on disk for salvaging.
4411 *
4412 * @param[in] volumeId volume ID to lock
4413 *
4414 * @return operation status
4415 * @retval 0 success
4416 * @retval -1 volume lock raced with a fileserver restart; all volumes must
4417 * checked out and locked again
4418 *
4419 * @note DAFS only
4420 */
4421 static int
4422 LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId)
4423 {
4424 afs_int32 code;
4425 int locktype;
4426
4427 /* should always be WRITE_LOCK, but keep the lock-type logic all
4428 * in one place, in VVolLockType. Params will be ignored, but
4429 * try to provide what we're logically doing. */
4430 locktype = VVolLockType(V_VOLUPD, 1);
4431
4432 code = VLockVolumeByIdNB(volumeId, salvinfo->fileSysPartition, locktype);
4433 if (code) {
4434 if (code == EBUSY) {
4435 Abort("Someone else appears to be using volume %lu; Aborted\n",
4436 afs_printable_uint32_lu(volumeId));
4437 }
4438 Abort("Error %ld trying to lock volume %lu; Aborted\n",
4439 afs_printable_int32_ld(code),
4440 afs_printable_uint32_lu(volumeId));
4441 }
4442
4443 code = FSYNC_VerifyCheckout(volumeId, salvinfo->fileSysPartition->name, FSYNC_VOL_OFF, FSYNC_SALVAGE);
4444 if (code == SYNC_DENIED) {
4445 /* need to retry checking out volumes */
4446 return -1;
4447 }
4448 if (code != SYNC_OK) {
4449 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
4450 afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
4451 }
4452
4453 /* set inUse = programType in the volume header to ensure that nobody
4454 * tries to use this volume again without salvaging, if we somehow crash
4455 * or otherwise exit before finishing the salvage.
4456 */
4457 if (!Testing) {
4458 IHandle_t *h;
4459 struct VolumeHeader header;
4460 struct VolumeDiskHeader diskHeader;
4461 struct VolumeDiskData volHeader;
4462
4463 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHeader);
4464 if (code) {
4465 return 0;
4466 }
4467
4468 DiskToVolumeHeader(&header, &diskHeader);
4469
4470 IH_INIT(h, salvinfo->fileSysDevice, header.parent, header.volumeInfo);
4471 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
4472 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
4473
4474 IH_RELEASE(h);
4475 return 0;
4476 }
4477
4478 volHeader.inUse = programType;
4479
4480 /* If we can't re-write the header, bail out and error. We don't
4481 * assert when reading the header, since it's possible the
4482 * header isn't really there (when there's no data associated
4483 * with the volume; we just delete the vol header file in that
4484 * case). But if it's there enough that we can read it, but
4485 * somehow we cannot write to it to signify we're salvaging it,
4486 * we've got a big problem and we cannot continue. */
4487 opr_Verify(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader))
4488 == sizeof(volHeader));
4489
4490 IH_RELEASE(h);
4491 }
4492
4493 return 0;
4494 }
4495 #endif /* AFS_DEMAND_ATTACH_FS */
4496
4497 static void
4498 AskError(struct SalvInfo *salvinfo, VolumeId volumeId)
4499 {
4500 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4501 afs_int32 code;
4502 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4503 FSYNC_VOL_FORCE_ERROR, FSYNC_WHATEVER, NULL);
4504 if (code != SYNC_OK) {
4505 Log("AskError: failed to force volume %lu into error state; "
4506 "SYNC error code %ld (%s)\n", (long unsigned)volumeId,
4507 (long)code, SYNC_res2string(code));
4508 }
4509 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
4510 }
4511
4512 void
4513 AskOffline(struct SalvInfo *salvinfo, VolumeId volumeId)
4514 {
4515 afs_int32 code, i;
4516 SYNC_response res;
4517
4518 memset(&res, 0, sizeof(res));
4519
4520 for (i = 0; i < 3; i++) {
4521 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4522 FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
4523
4524 if (code == SYNC_OK) {
4525 break;
4526 } else if (code == SYNC_DENIED) {
4527 if (AskDAFS())
4528 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
4529 else
4530 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
4531 Abort("Salvage aborted\n");
4532 } else if (code == SYNC_BAD_COMMAND) {
4533 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
4534 FSYNC_VOL_OFF);
4535 if (AskDAFS()) {
4536 #ifdef AFS_DEMAND_ATTACH_FS
4537 Log("AskOffline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4538 #else
4539 Log("AskOffline: fileserver is DAFS but we are not.\n");
4540 #endif
4541 } else {
4542 #ifdef AFS_DEMAND_ATTACH_FS
4543 Log("AskOffline: fileserver is not DAFS but we are.\n");
4544 #else
4545 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4546 #endif
4547 }
4548 Abort("Salvage aborted\n");
4549 } else if (i < 2) {
4550 /* try it again */
4551 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
4552 FSYNC_clientFinis();
4553 FSYNC_clientInit();
4554 }
4555 }
4556 if (code != SYNC_OK) {
4557 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
4558 Abort("Salvage aborted\n");
4559 }
4560 }
4561
4562 /* don't want to pass around state; remember it here */
4563 static int isDAFS = -1;
4564 int
4565 AskDAFS(void)
4566 {
4567 SYNC_response res;
4568 afs_int32 code = 1, i;
4569
4570 /* we don't care if we race. the answer shouldn't change */
4571 if (isDAFS != -1)
4572 return isDAFS;
4573
4574 memset(&res, 0, sizeof(res));
4575
4576 for (i = 0; code && i < 3; i++) {
4577 code = FSYNC_VolOp(0, NULL, FSYNC_VOL_LISTVOLUMES, FSYNC_SALVAGE, &res);
4578 if (code) {
4579 Log("AskDAFS: FSYNC_VOL_LISTVOLUMES failed with code %ld reason "
4580 "%ld (%s); trying again...\n", (long)code, (long)res.hdr.reason,
4581 FSYNC_reason2string(res.hdr.reason));
4582 FSYNC_clientFinis();
4583 FSYNC_clientInit();
4584 }
4585 }
4586
4587 if (code) {
4588 Log("AskDAFS: could not determine DAFS-ness, assuming not DAFS\n");
4589 res.hdr.flags = 0;
4590 }
4591
4592 if ((res.hdr.flags & SYNC_FLAG_DAFS_EXTENSIONS)) {
4593 isDAFS = 1;
4594 } else {
4595 isDAFS = 0;
4596 }
4597
4598 return isDAFS;
4599 }
4600
4601 static void
4602 MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4603 {
4604 struct VolumeDiskHeader diskHdr;
4605 int code;
4606 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHdr);
4607 if (code) {
4608 /* volume probably does not exist; no need to bring back online */
4609 return;
4610 }
4611 AskOnline(salvinfo, volumeId);
4612 }
4613
4614 void
4615 AskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4616 {
4617 afs_int32 code, i;
4618
4619 for (i = 0; i < 3; i++) {
4620 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4621 FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
4622
4623 if (code == SYNC_OK) {
4624 break;
4625 } else if (code == SYNC_DENIED) {
4626 Log("AskOnline: file server denied online request to volume %" AFS_VOLID_FMT " partition %s; trying again...\n", afs_printable_VolumeId_lu(volumeId), salvinfo->fileSysPartition->name);
4627 } else if (code == SYNC_BAD_COMMAND) {
4628 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4629 FSYNC_VOL_ON);
4630 Log("AskOnline: please make sure file server binaries are same version.\n");
4631 break;
4632 } else if (i < 2) {
4633 /* try it again */
4634 Log("AskOnline: request for fileserver to put volume online failed; trying again...\n");
4635 FSYNC_clientFinis();
4636 FSYNC_clientInit();
4637 }
4638 }
4639 }
4640
4641 void
4642 AskDelete(struct SalvInfo *salvinfo, VolumeId volumeId)
4643 {
4644 afs_int32 code, i;
4645 SYNC_response res;
4646
4647 for (i = 0; i < 3; i++) {
4648 memset(&res, 0, sizeof(res));
4649 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4650 FSYNC_VOL_DONE, FSYNC_SALVAGE, &res);
4651
4652 if (code == SYNC_OK) {
4653 break;
4654 } else if (code == SYNC_DENIED) {
4655 Log("AskOnline: file server denied DONE request to volume %" AFS_VOLID_FMT " partition %s; trying again...\n", afs_printable_VolumeId_lu(volumeId), salvinfo->fileSysPartition->name);
4656 } else if (code == SYNC_BAD_COMMAND) {
4657 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4658 FSYNC_VOL_DONE);
4659 if (AskDAFS()) {
4660 #ifdef AFS_DEMAND_ATTACH_FS
4661 Log("AskOnline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4662 #else
4663 Log("AskOnline: fileserver is DAFS but we are not.\n");
4664 #endif
4665 } else {
4666 #ifdef AFS_DEMAND_ATTACH_FS
4667 Log("AskOnline: fileserver is not DAFS but we are.\n");
4668 #else
4669 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4670 #endif
4671 }
4672 break;
4673 } else if (code == SYNC_FAILED &&
4674 (res.hdr.reason == FSYNC_UNKNOWN_VOLID ||
4675 res.hdr.reason == FSYNC_WRONG_PART)) {
4676 /* volume is already effectively 'deleted' */
4677 break;
4678 } else if (i < 2) {
4679 /* try it again */
4680 Log("AskOnline: request for fileserver to delete volume failed; trying again...\n");
4681 FSYNC_clientFinis();
4682 FSYNC_clientInit();
4683 }
4684 }
4685 }
4686
4687 int
4688 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
4689 {
4690 /* Volume parameter is passed in case iopen is upgraded in future to
4691 * require a volume Id to be passed
4692 */
4693 char buf[4096];
4694 IHandle_t *srcH, *destH;
4695 FdHandle_t *srcFdP, *destFdP;
4696 ssize_t nBytes = 0;
4697 afs_foff_t size = 0;
4698
4699 IH_INIT(srcH, device, rwvolume, inode1);
4700 srcFdP = IH_OPEN(srcH);
4701 opr_Assert(srcFdP != NULL);
4702 IH_INIT(destH, device, rwvolume, inode2);
4703 destFdP = IH_OPEN(destH);
4704 while ((nBytes = FDH_PREAD(srcFdP, buf, sizeof(buf), size)) > 0) {
4705 opr_Verify(FDH_PWRITE(destFdP, buf, nBytes, size) == nBytes);
4706 size += nBytes;
4707 }
4708 opr_Assert(nBytes == 0);
4709 FDH_REALLYCLOSE(srcFdP);
4710 FDH_REALLYCLOSE(destFdP);
4711 IH_RELEASE(srcH);
4712 IH_RELEASE(destH);
4713 return 0;
4714 }
4715
4716 void
4717 PrintInodeList(struct SalvInfo *salvinfo)
4718 {
4719 struct ViceInodeInfo *ip;
4720 struct ViceInodeInfo *buf;
4721 int nInodes;
4722 afs_ino_str_t stmp;
4723 afs_sfsize_t st_size;
4724
4725 st_size = OS_SIZE(salvinfo->inodeFd);
4726 opr_Assert(st_size >= 0);
4727 buf = malloc(st_size);
4728 opr_Assert(buf != NULL);
4729 nInodes = st_size / sizeof(struct ViceInodeInfo);
4730 opr_Verify(OS_READ(salvinfo->inodeFd, buf, st_size) == st_size);
4731 for (ip = buf; nInodes--; ip++) {
4732 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%" AFS_VOLID_FMT ",%u,%u,%u)\n", /* VolumeId in param */
4733 PrintInode(stmp, ip->inodeNumber), ip->linkCount,
4734 (afs_uintmax_t) ip->byteCount,
4735 afs_printable_VolumeId_lu(ip->u.param[0]), ip->u.param[1],
4736 ip->u.param[2], ip->u.param[3]);
4737 }
4738 free(buf);
4739 }
4740
4741 void
4742 PrintInodeSummary(struct SalvInfo *salvinfo)
4743 {
4744 int i;
4745 struct InodeSummary *isp;
4746
4747 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
4748 isp = &salvinfo->inodeSummary[i];
4749 Log("VID:%" AFS_VOLID_FMT ", RW:%" AFS_VOLID_FMT ", index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", afs_printable_VolumeId_lu(isp->volumeId), afs_printable_VolumeId_lu(isp->RWvolumeId), isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
4750 }
4751 }
4752
4753 int
4754 Fork(void)
4755 {
4756 int f;
4757 #ifdef AFS_NT40_ENV
4758 f = 0;
4759 opr_Assert(0); /* Fork is never executed in the NT code path */
4760 #else
4761 f = fork();
4762 opr_Assert(f >= 0);
4763 #ifdef AFS_DEMAND_ATTACH_FS
4764 if ((f == 0) && (programType == salvageServer)) {
4765 /* we are a salvageserver child */
4766 #ifdef FSSYNC_BUILD_CLIENT
4767 VChildProcReconnectFS_r();
4768 #endif
4769 #ifdef SALVSYNC_BUILD_CLIENT
4770 VReconnectSALV_r();
4771 #endif
4772 }
4773 #endif /* AFS_DEMAND_ATTACH_FS */
4774 #endif /* !AFS_NT40_ENV */
4775 return f;
4776 }
4777
4778 static void
4779 QuietExit(int code)
4780 {
4781 #ifdef AFS_DEMAND_ATTACH_FS
4782 if (programType == salvageServer) {
4783 /* release all volume locks before closing down our SYNC channels.
4784 * the fileserver may try to online volumes we have checked out when
4785 * we close down FSSYNC, so we should make sure we don't have those
4786 * volumes locked when it does */
4787 struct DiskPartition64 *dp;
4788 int i;
4789 for (i = 0; i <= VOLMAXPARTS; i++) {
4790 dp = VGetPartitionById(i, 0);
4791 if (dp) {
4792 VLockFileReinit(&dp->volLockFile);
4793 }
4794 }
4795 # ifdef SALVSYNC_BUILD_CLIENT
4796 VDisconnectSALV();
4797 # endif
4798 # ifdef FSSYNC_BUILD_CLIENT
4799 VDisconnectFS();
4800 # endif
4801 }
4802 #endif /* AFS_DEMAND_ATTACH_FS */
4803
4804 #ifdef AFS_NT40_ENV
4805 if (main_thread != pthread_self())
4806 pthread_exit((void *)code);
4807 else
4808 exit(code);
4809 #else
4810 exit(code);
4811 #endif
4812 }
4813
4814 void
4815 Exit(int code)
4816 {
4817 SalvageShowLog();
4818 QuietExit(code);
4819 }
4820
4821
4822 int
4823 Wait(char *prog)
4824 {
4825 int status;
4826 int pid;
4827 pid = wait(&status);
4828 opr_Assert(pid != -1);
4829 if (WCOREDUMP(status))
4830 Log("\"%s\" core dumped!\n", prog);
4831 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
4832 return -1;
4833 return pid;
4834 }
4835
4836 static char *
4837 TimeStamp(char *buffer, size_t size, time_t clock, int precision)
4838 {
4839 struct tm *lt;
4840 size_t nbytes;
4841
4842 lt = localtime(&clock);
4843 if (precision)
4844 nbytes = strftime(buffer, size, "%m/%d/%Y %H:%M:%S", lt);
4845 else
4846 nbytes = strftime(buffer, size, "%m/%d/%Y %H:%M", lt);
4847 if (nbytes == 0)
4848 memset(buffer, 0, size);
4849 return buffer;
4850 }
4851
4852 static void
4853 SalvageShowLog(void)
4854 {
4855 char line[256];
4856 char *filename;
4857 FILE *logFile;
4858
4859 if (ShowLog == 0 || ClientMode) {
4860 return; /* nothing to do */
4861 }
4862 filename = strdup(GetLogFilename());
4863 opr_Assert(filename != NULL);
4864 CloseLog();
4865
4866 logFile = afs_fopen(filename, "r");
4867 if (!logFile)
4868 printf("Can't read %s, exiting\n", ShowLogFilename);
4869 else {
4870 while (fgets(line, sizeof(line), logFile))
4871 printf("%s", line);
4872 fflush(stdout);
4873 }
4874 free(filename);
4875 }
4876
4877 static void
4878 vLog(const char *format, va_list args)
4879 {
4880 if (!ClientMode) {
4881 vFSLog(format, args);
4882 } else {
4883 struct timeval now;
4884 char buffer[64];
4885
4886 gettimeofday(&now, NULL);
4887 fprintf(stderr, "%s ", TimeStamp(buffer, sizeof(buffer), now.tv_sec, 1));
4888 vfprintf(stderr, format, args);
4889 fflush(stderr);
4890 }
4891 }
4892
4893 void
4894 Log(const char *format, ...)
4895 {
4896 va_list args;
4897
4898 va_start(args, format);
4899 vLog(format, args);
4900 va_end(args);
4901 }
4902
4903 void
4904 Abort(const char *format, ...)
4905 {
4906 va_list args;
4907
4908 va_start(args, format);
4909 vLog(format, args);
4910 va_end(args);
4911 SalvageShowLog();
4912 if (debug)
4913 abort();
4914 QuietExit(1);
4915 }
4916
4917 char *
4918 ToString(const char *s)
4919 {
4920 char *p;
4921 p = strdup(s);
4922 opr_Assert(p != NULL);
4923 return p;
4924 }
4925
4926 /* Remove the FORCESALVAGE file */
4927 void
4928 RemoveTheForce(char *path)
4929 {
4930 char target[1024];
4931 struct afs_stat_st force; /* so we can use afs_stat to find it */
4932 strcpy(target,path);
4933 strcat(target,"/FORCESALVAGE");
4934 if (!Testing && ForceSalvage) {
4935 if (afs_stat(target,&force) == 0) unlink(target);
4936 }
4937 }
4938
4939 #ifndef AFS_AIX32_ENV
4940 /*
4941 * UseTheForceLuke - see if we can use the force
4942 */
4943 int
4944 UseTheForceLuke(char *path)
4945 {
4946 struct afs_stat_st force;
4947 char target[1024];
4948 strcpy(target,path);
4949 strcat(target,"/FORCESALVAGE");
4950
4951 return (afs_stat(target, &force) == 0);
4952 }
4953 #else
4954 /*
4955 * UseTheForceLuke - see if we can use the force
4956 *
4957 * NOTE:
4958 * The VRMIX fsck will not muck with the filesystem it is supposedly
4959 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
4960 * muck directly with the root inode, which is within the normal
4961 * domain of fsck.
4962 * ListViceInodes() has a side effect of setting ForceSalvage if
4963 * it detects a need, based on root inode examination.
4964 */
4965 int
4966 UseTheForceLuke(char *path)
4967 {
4968
4969 return 0; /* sorry OB1 */
4970 }
4971 #endif
4972
4973 #ifdef AFS_NT40_ENV
4974 /* NT support routines */
4975
4976 static char execpathname[MAX_PATH];
4977 int
4978 nt_SalvagePartition(char *partName, int jobn)
4979 {
4980 int pid;
4981 int n;
4982 childJob_t job;
4983 if (!*execpathname) {
4984 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
4985 if (!n || n == 1023)
4986 return -1;
4987 }
4988 job.cj_magic = SALVAGER_MAGIC;
4989 job.cj_number = jobn;
4990 (void)strcpy(job.cj_part, partName);
4991 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
4992 return pid;
4993 }
4994
4995 int
4996 nt_SetupPartitionSalvage(void *datap, int len)
4997 {
4998 childJob_t *jobp = (childJob_t *) datap;
4999 char *logname;
5000
5001 if (len != sizeof(childJob_t))
5002 return -1;
5003 if (jobp->cj_magic != SALVAGER_MAGIC)
5004 return -1;
5005 myjob = *jobp;
5006
5007 /* Open logFile */
5008 if (asprintf(&logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
5009 myjob.cj_number) < 0)
5010 return -1;
5011 OpenLog(logname);
5012 free(logname);
5013
5014 return 0;
5015 }
5016
5017
5018 #endif /* AFS_NT40_ENV */