Import Upstream version 1.8.5
[hcoop/debian/openafs.git] / src / vol / salvaged.c
1 /*
2 * Copyright 2006-2007, Sine Nomine Associates and others.
3 * All Rights Reserved.
4 *
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
8 */
9
10 /*
11 * demand attach fs
12 * online salvager daemon
13 */
14
15 /* Main program file. Define globals. */
16 #define MAIN 1
17
18 #include <afsconfig.h>
19 #include <afs/param.h>
20
21 #include <roken.h>
22
23 #ifdef HAVE_SYS_FILE_H
24 #include <sys/file.h>
25 #endif
26
27 #ifdef AFS_NT40_ENV
28 #include <WINNT/afsevent.h>
29 #endif
30
31 #ifndef WCOREDUMP
32 #define WCOREDUMP(x) ((x) & 0200)
33 #endif
34
35 #include <afs/opr.h>
36 #include <opr/lock.h>
37 #include <afs/afsint.h>
38 #include <rx/rx_queue.h>
39
40 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
41 #if defined(AFS_VFSINCL_ENV)
42 #include <sys/vnode.h>
43 #ifdef AFS_SUN5_ENV
44 #include <sys/fs/ufs_inode.h>
45 #else
46 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
47 #include <ufs/ufs/dinode.h>
48 #include <ufs/ffs/fs.h>
49 #else
50 #include <ufs/inode.h>
51 #endif
52 #endif
53 #else /* AFS_VFSINCL_ENV */
54 #ifdef AFS_OSF_ENV
55 #include <ufs/inode.h>
56 #else /* AFS_OSF_ENV */
57 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_DARWIN_ENV)
58 #include <sys/inode.h>
59 #endif
60 #endif
61 #endif /* AFS_VFSINCL_ENV */
62 #endif /* AFS_SGI_ENV */
63 #ifdef AFS_AIX_ENV
64 #include <sys/vfs.h>
65 #include <sys/lockf.h>
66 #else
67 #ifdef AFS_HPUX_ENV
68 #include <checklist.h>
69 #else
70 #if defined(AFS_SGI_ENV)
71 #include <mntent.h>
72 #else
73 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
74 #ifdef AFS_SUN5_ENV
75 #include <sys/mnttab.h>
76 #include <sys/mntent.h>
77 #else
78 #include <mntent.h>
79 #endif
80 #else
81 #endif /* AFS_SGI_ENV */
82 #endif /* AFS_HPUX_ENV */
83 #endif
84 #endif
85 #ifndef AFS_NT40_ENV
86 #include <afs/osi_inode.h>
87 #endif
88 #include <afs/cmd.h>
89 #include <afs/afsutil.h>
90 #include <afs/fileutil.h>
91 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
92 #include <afs/dir.h>
93
94 #include "nfs.h"
95 #include "lwp.h"
96 #include "lock.h"
97 #include <afs/afssyscalls.h>
98 #include "ihandle.h"
99 #include "vnode.h"
100 #include "volume.h"
101 #include "partition.h"
102 #include "daemon_com.h"
103 #include "fssync.h"
104 #include "salvsync.h"
105 #include "viceinode.h"
106 #include "salvage.h"
107 #include "vol-salvage.h"
108 #include "common.h"
109 #ifdef AFS_NT40_ENV
110 #include <pthread.h>
111 #endif
112
113 extern int ClientMode;
114
115 #if !defined(AFS_DEMAND_ATTACH_FS)
116 #error "online salvager only supported for demand attach fileserver"
117 #endif /* AFS_DEMAND_ATTACH_FS */
118
119 #if defined(AFS_NT40_ENV)
120 #error "online salvager not supported on NT"
121 #endif /* AFS_NT40_ENV */
122
123 /*@+fcnmacros +macrofcndecl@*/
124 #ifdef O_LARGEFILE
125 #define afs_fopen fopen64
126 #else /* !O_LARGEFILE */
127 #define afs_fopen fopen
128 #endif /* !O_LARGEFILE */
129 /*@=fcnmacros =macrofcndecl@*/
130
131
132
133 static volatile int current_workers = 0;
134 static volatile struct rx_queue pending_q;
135 static pthread_mutex_t worker_lock;
136 static pthread_cond_t worker_cv;
137
138 static void * SalvageChildReaperThread(void *);
139 static int DoSalvageVolume(struct SalvageQueueNode * node, int slot);
140
141 static void SalvageServer(int argc, char **argv, struct logOptions *logopts);
142 static void SalvageClient(VolumeId vid, char * pname);
143
144 static int Reap_Child(char * prog, int * pid, int * status);
145
146 static void * SalvageLogCleanupThread(void *);
147 static void SalvageLogCleanup(int pid);
148
149 static void * SalvageLogScanningThread(void *);
150 static void ScanLogs(struct rx_queue *log_watch_queue);
151
152 struct cmdline_rock {
153 int argc;
154 char **argv;
155 };
156
157 struct log_cleanup_node {
158 struct rx_queue q;
159 int pid;
160 };
161
162 struct {
163 struct rx_queue queue_head;
164 pthread_cond_t queue_change_cv;
165 } log_cleanup_queue;
166
167
168 #define DEFAULT_PARALLELISM 4 /* allow 4 parallel salvage workers by default */
169
170 enum optionsList {
171 OPT_partition,
172 OPT_volumeid,
173 OPT_debug,
174 OPT_nowrite,
175 OPT_inodes,
176 OPT_oktozap,
177 OPT_rootinodes,
178 OPT_salvagedirs,
179 OPT_blockreads,
180 OPT_parallel,
181 OPT_tmpdir,
182 OPT_orphans,
183 OPT_syslog,
184 OPT_syslogfacility,
185 OPT_logfile,
186 OPT_client,
187 OPT_transarc_logs
188 };
189
190 static int
191 handleit(struct cmd_syndesc *opts, void *arock)
192 {
193 char pname[100];
194 afs_int32 seenpart = 0, seenvol = 0;
195 VolumeId vid = 0;
196 struct cmdline_rock *rock = (struct cmdline_rock *)arock;
197 char *optstring = NULL;
198 struct logOptions logopts;
199
200 memset(&logopts, 0, sizeof(logopts));
201
202 #ifdef AFS_SGI_VNODE_GLUE
203 if (afs_init_kernel_config(-1) < 0) {
204 printf
205 ("Can't determine NUMA configuration, not starting salvager.\n");
206 exit(1);
207 }
208 #endif
209
210 cmd_OptionAsFlag(opts, OPT_debug, &debug);
211 cmd_OptionAsFlag(opts, OPT_nowrite, &Testing);
212 cmd_OptionAsFlag(opts, OPT_inodes, &ListInodeOption);
213 cmd_OptionAsFlag(opts, OPT_oktozap, &OKToZap);
214 cmd_OptionAsFlag(opts, OPT_rootinodes, &ShowRootFiles);
215 cmd_OptionAsFlag(opts, OPT_salvagedirs, &RebuildDirs);
216 cmd_OptionAsFlag(opts, OPT_blockreads, &forceR);
217 if (cmd_OptionAsString(opts, OPT_parallel, &optstring) == 0) {
218 if (strncmp(optstring, "all", 3) == 0) {
219 PartsPerDisk = 1;
220 }
221 if (strlen(optstring) != 0) {
222 Parallel = atoi(optstring);
223 if (Parallel < 1)
224 Parallel = 1;
225 if (Parallel > MAXPARALLEL) {
226 printf("Setting parallel salvages to maximum of %d \n",
227 MAXPARALLEL);
228 Parallel = MAXPARALLEL;
229 }
230 }
231 free(optstring);
232 optstring = NULL;
233 } else {
234 Parallel = min(DEFAULT_PARALLELISM, MAXPARALLEL);
235 }
236 if (cmd_OptionAsString(opts, OPT_tmpdir, &optstring) == 0) {
237 DIR *dirp;
238 dirp = opendir(optstring);
239 if (!dirp) {
240 printf
241 ("Can't open temporary placeholder dir %s; using current partition \n",
242 optstring);
243 tmpdir = NULL;
244 } else
245 closedir(dirp);
246 free(optstring);
247 optstring = NULL;
248 }
249 if (cmd_OptionAsString(opts, OPT_orphans, &optstring) == 0) {
250 if (Testing)
251 orphans = ORPH_IGNORE;
252 else if (strcmp(optstring, "remove") == 0
253 || strcmp(optstring, "r") == 0)
254 orphans = ORPH_REMOVE;
255 else if (strcmp(optstring, "attach") == 0
256 || strcmp(optstring, "a") == 0)
257 orphans = ORPH_ATTACH;
258 free(optstring);
259 optstring = NULL;
260 }
261
262 #ifdef HAVE_SYSLOG
263 if (cmd_OptionPresent(opts, OPT_syslog)) {
264 if (cmd_OptionPresent(opts, OPT_logfile)) {
265 fprintf(stderr, "Invalid options: -syslog and -logfile are exclusive.\n");
266 return -1;
267 }
268 if (cmd_OptionPresent(opts, OPT_transarc_logs)) {
269 fprintf(stderr, "Invalid options: -syslog and -transarc-logs are exclusive.\n");
270 return -1;
271 }
272 logopts.lopt_dest = logDest_syslog;
273 logopts.lopt_facility = LOG_DAEMON;
274 logopts.lopt_tag = "salvageserver";
275 cmd_OptionAsInt(opts, OPT_syslogfacility, &logopts.lopt_facility);
276 } else
277 #endif
278 {
279 logopts.lopt_dest = logDest_file;
280 if (cmd_OptionPresent(opts, OPT_transarc_logs)) {
281 logopts.lopt_rotateOnOpen = 1;
282 logopts.lopt_rotateStyle = logRotate_old;
283 }
284 if (cmd_OptionPresent(opts, OPT_logfile))
285 cmd_OptionAsString(opts, OPT_logfile, (char**)&logopts.lopt_filename);
286 else
287 logopts.lopt_filename = AFSDIR_SERVER_SALSRVLOG_FILEPATH;
288 }
289
290 if (cmd_OptionPresent(opts, OPT_client)) {
291 if (cmd_OptionAsString(opts, OPT_partition, &optstring) == 0) {
292 seenpart = 1;
293 strlcpy(pname, optstring, sizeof(pname));
294 free(optstring);
295 optstring = NULL;
296 }
297 if (cmd_OptionAsString(opts, OPT_volumeid, &optstring) == 0) {
298 char *end;
299 unsigned long vid_l;
300 seenvol = 1;
301 vid_l = strtoul(optstring, &end, 10);
302 if (vid_l >= MAX_AFS_UINT32 || vid_l == ULONG_MAX || *end != '\0') {
303 printf("Invalid volume id specified; salvage aborted\n");
304 exit(-1);
305 }
306 vid = (VolumeId)vid_l;
307 }
308
309 if (!seenpart || !seenvol) {
310 printf("You must specify '-partition' and '-volumeid' with the '-client' option\n");
311 exit(-1);
312 }
313
314 SalvageClient(vid, pname);
315
316 } else { /* salvageserver mode */
317 SalvageServer(rock->argc, rock->argv, &logopts);
318 }
319 return (0);
320 }
321
322
323 #ifndef AFS_NT40_ENV
324 #include "AFS_component_version_number.c"
325 #endif
326 #define MAX_ARGS 128
327 #ifdef AFS_NT40_ENV
328 char *save_args[MAX_ARGS];
329 int n_save_args = 0;
330 pthread_t main_thread;
331 #endif
332
333 int
334 main(int argc, char **argv)
335 {
336 struct cmd_syndesc *ts;
337 int err = 0;
338 struct cmdline_rock arock;
339
340 #ifdef AFS_AIX32_ENV
341 /*
342 * The following signal action for AIX is necessary so that in case of a
343 * crash (i.e. core is generated) we can include the user's data section
344 * in the core dump. Unfortunately, by default, only a partial core is
345 * generated which, in many cases, isn't too useful.
346 */
347 struct sigaction nsa;
348
349 sigemptyset(&nsa.sa_mask);
350 nsa.sa_handler = SIG_DFL;
351 nsa.sa_flags = SA_FULLDUMP;
352 sigaction(SIGABRT, &nsa, NULL);
353 sigaction(SIGSEGV, &nsa, NULL);
354 #endif
355
356 /* Initialize directory paths */
357 if (!(initAFSDirPath() & AFSDIR_SERVER_PATHS_OK)) {
358 #ifdef AFS_NT40_ENV
359 ReportErrorEventAlt(AFSEVT_SVR_NO_INSTALL_DIR, 0, argv[0], 0);
360 #endif
361 fprintf(stderr, "%s: Unable to obtain AFS server directory.\n",
362 argv[0]);
363 exit(2);
364 }
365 #ifdef AFS_NT40_ENV
366 /* Default to binary mode for fopen() */
367 _set_fmode(_O_BINARY);
368
369 main_thread = pthread_self();
370 if (spawnDatap && spawnDataLen) {
371 /* This is a child per partition salvager. Don't setup log or
372 * try to lock the salvager lock.
373 */
374 if (nt_SetupPartitionSalvage(spawnDatap, spawnDataLen) < 0)
375 exit(3);
376 } else {
377 #endif
378
379 #ifndef AFS_NT40_ENV
380 if (geteuid() != 0) {
381 printf("Salvager must be run as root.\n");
382 fflush(stdout);
383 Exit(0);
384 }
385 #endif
386
387 /* bad for normal help flag processing, but can do nada */
388
389 #ifdef AFS_NT40_ENV
390 }
391 #endif
392
393 arock.argc = argc;
394 arock.argv = argv;
395
396
397 ts = cmd_CreateSyntax("initcmd", handleit, &arock, 0, "initialize the program");
398 cmd_AddParmAtOffset(ts, OPT_partition, "-partition", CMD_SINGLE,
399 CMD_OPTIONAL, "Name of partition to salvage");
400 cmd_AddParmAtOffset(ts, OPT_volumeid, "-volumeid", CMD_SINGLE, CMD_OPTIONAL,
401 "Volume Id to salvage");
402 cmd_AddParmAtOffset(ts, OPT_debug, "-debug", CMD_FLAG, CMD_OPTIONAL,
403 "Run in Debugging mode");
404 cmd_AddParmAtOffset(ts, OPT_nowrite, "-nowrite", CMD_FLAG, CMD_OPTIONAL,
405 "Run readonly/test mode");
406 cmd_AddParmAtOffset(ts, OPT_inodes, "-inodes", CMD_FLAG, CMD_OPTIONAL,
407 "Just list affected afs inodes - debugging flag");
408 cmd_AddParmAtOffset(ts, OPT_oktozap, "-oktozap", CMD_FLAG, CMD_OPTIONAL,
409 "Give permission to destroy bogus inodes/volumes - debugging flag");
410 cmd_AddParmAtOffset(ts, OPT_rootinodes, "-rootinodes", CMD_FLAG,
411 CMD_OPTIONAL, "Show inodes owned by root - debugging flag");
412 cmd_AddParmAtOffset(ts, OPT_salvagedirs, "-salvagedirs", CMD_FLAG,
413 CMD_OPTIONAL, "Force rebuild/salvage of all directories");
414 cmd_AddParmAtOffset(ts, OPT_blockreads, "-blockreads", CMD_FLAG,
415 CMD_OPTIONAL, "Read smaller blocks to handle IO/bad blocks");
416 cmd_AddParmAtOffset(ts, OPT_parallel, "-parallel", CMD_SINGLE, CMD_OPTIONAL,
417 "# of max parallel partition salvaging");
418 cmd_AddParmAtOffset(ts, OPT_tmpdir, "-tmpdir", CMD_SINGLE, CMD_OPTIONAL,
419 "Name of dir to place tmp files ");
420 cmd_AddParmAtOffset(ts, OPT_orphans, "-orphans", CMD_SINGLE, CMD_OPTIONAL,
421 "ignore | remove | attach");
422
423 #ifdef HAVE_SYSLOG
424 cmd_AddParmAtOffset(ts, OPT_syslog, "-syslog", CMD_FLAG, CMD_OPTIONAL,
425 "Write salvage log to syslogs");
426 cmd_AddParmAtOffset(ts, OPT_syslogfacility, "-syslogfacility", CMD_SINGLE,
427 CMD_OPTIONAL, "Syslog facility number to use");
428 #endif
429
430 cmd_AddParmAtOffset(ts, OPT_client, "-client", CMD_FLAG, CMD_OPTIONAL,
431 "Use SALVSYNC to ask salvageserver to salvage a volume");
432
433 cmd_AddParmAtOffset(ts, OPT_logfile, "-logfile", CMD_SINGLE, CMD_OPTIONAL,
434 "Location of log file ");
435
436 cmd_AddParmAtOffset(ts, OPT_transarc_logs, "-transarc-logs", CMD_FLAG,
437 CMD_OPTIONAL, "enable Transarc style logging");
438
439 err = cmd_Dispatch(argc, argv);
440 Exit(err);
441 return 0; /* not reached */
442 }
443
444 static void
445 SalvageClient(VolumeId vid, char * pname)
446 {
447 int done = 0;
448 afs_int32 code;
449 SYNC_response res;
450 SALVSYNC_response_hdr sres;
451 VolumePackageOptions opts;
452
453 /* Send Log() messages to stderr in client mode. */
454 ClientMode = 1;
455
456 VOptDefaults(volumeUtility, &opts);
457 if (VInitVolumePackage2(volumeUtility, &opts)) {
458 /* VInitVolumePackage2 can fail on e.g. partition attachment errors,
459 * but we don't really care, since all we're doing is trying to use
460 * SALVSYNC */
461 fprintf(stderr, "errors encountered initializing volume package, but "
462 "trying to continue anyway\n");
463 }
464 SALVSYNC_clientInit();
465
466 code = SALVSYNC_SalvageVolume(vid, pname, SALVSYNC_SALVAGE, SALVSYNC_OPERATOR, 0, NULL);
467 if (code != SYNC_OK) {
468 goto sync_error;
469 }
470
471 res.payload.buf = (void *) &sres;
472 res.payload.len = sizeof(sres);
473
474 while(!done) {
475 sleep(2);
476 code = SALVSYNC_SalvageVolume(vid, pname, SALVSYNC_QUERY, SALVSYNC_WHATEVER, 0, &res);
477 if (code != SYNC_OK) {
478 goto sync_error;
479 }
480 switch (sres.state) {
481 case SALVSYNC_STATE_ERROR:
482 printf("salvageserver reports salvage ended in an error; check log files for more details\n");
483 case SALVSYNC_STATE_DONE:
484 case SALVSYNC_STATE_UNKNOWN:
485 done = 1;
486 }
487 }
488 SALVSYNC_clientFinis();
489 return;
490
491 sync_error:
492 if (code == SYNC_DENIED) {
493 printf("salvageserver refused to salvage volume %u on partition %s\n",
494 vid, pname);
495 } else if (code == SYNC_BAD_COMMAND) {
496 printf("SALVSYNC protocol mismatch; please make sure fileserver, volserver, salvageserver and salvager are same version\n");
497 } else if (code == SYNC_COM_ERROR) {
498 printf("SALVSYNC communications error\n");
499 }
500 SALVSYNC_clientFinis();
501 exit(-1);
502 }
503
504 static int * child_slot;
505
506 static void
507 SalvageServer(int argc, char **argv, struct logOptions *logopts)
508 {
509 int pid, ret;
510 struct SalvageQueueNode * node;
511 pthread_t tid;
512 pthread_attr_t attrs;
513 int slot;
514 VolumePackageOptions opts;
515
516 /* All entries to the log will be appended. Useful if there are
517 * multiple salvagers appending to the log.
518 */
519 OpenLog(logopts);
520 SetupLogSignals();
521
522 Log("%s\n", cml_version_number);
523 LogCommandLine(argc, argv, "Online Salvage Server",
524 SalvageVersion, "Starting OpenAFS", Log);
525 /* Get and hold a lock for the duration of the salvage to make sure
526 * that no other salvage runs at the same time. The routine
527 * VInitVolumePackage2 (called below) makes sure that a file server or
528 * other volume utilities don't interfere with the salvage.
529 */
530
531 /* even demand attach online salvager
532 * still needs this because we don't want
533 * a stand-alone salvager to conflict with
534 * the salvager daemon */
535 ObtainSharedSalvageLock();
536
537 child_slot = calloc(Parallel, sizeof(int));
538 opr_Assert(child_slot != NULL);
539
540 /* initialize things */
541 VOptDefaults(salvageServer, &opts);
542 if (VInitVolumePackage2(salvageServer, &opts)) {
543 Log("Shutting down: errors encountered initializing volume package\n");
544 Exit(1);
545 }
546 DInit(10);
547 queue_Init(&pending_q);
548 queue_Init(&log_cleanup_queue);
549 opr_mutex_init(&worker_lock);
550 opr_cv_init(&worker_cv);
551 opr_cv_init(&log_cleanup_queue.queue_change_cv);
552 opr_Verify(pthread_attr_init(&attrs) == 0);
553
554 /* start up the reaper and log cleaner threads */
555 opr_Verify(pthread_attr_setdetachstate(&attrs,
556 PTHREAD_CREATE_DETACHED) == 0);
557 opr_Verify(pthread_create(&tid, &attrs,
558 &SalvageChildReaperThread, NULL) == 0);
559 opr_Verify(pthread_create(&tid, &attrs,
560 &SalvageLogCleanupThread, NULL) == 0);
561 opr_Verify(pthread_create(&tid, &attrs,
562 &SalvageLogScanningThread, NULL) == 0);
563
564 /* loop forever serving requests */
565 while (1) {
566 node = SALVSYNC_getWork();
567 opr_Assert(node != NULL);
568
569 Log("dispatching child to salvage volume %u...\n",
570 node->command.sop.parent);
571
572 VOL_LOCK;
573 /* find a slot */
574 for (slot = 0; slot < Parallel; slot++) {
575 if (!child_slot[slot])
576 break;
577 }
578 opr_Assert (slot < Parallel);
579
580 do_fork:
581 pid = Fork();
582 if (pid == 0) {
583 VOL_UNLOCK;
584 ret = DoSalvageVolume(node, slot);
585 Exit(ret);
586 } else if (pid < 0) {
587 Log("failed to fork child worker process\n");
588 sleep(1);
589 goto do_fork;
590 } else {
591 child_slot[slot] = pid;
592 node->pid = pid;
593 VOL_UNLOCK;
594
595 opr_mutex_enter(&worker_lock);
596 current_workers++;
597
598 /* let the reaper thread know another worker was spawned */
599 opr_cv_broadcast(&worker_cv);
600
601 /* if we're overquota, wait for the reaper */
602 while (current_workers >= Parallel) {
603 opr_cv_wait(&worker_cv, &worker_lock);
604 }
605 opr_mutex_exit(&worker_lock);
606 }
607 }
608 }
609
610 static int
611 DoSalvageVolume(struct SalvageQueueNode * node, int slot)
612 {
613 char *filename = NULL;
614 struct logOptions logopts;
615 struct DiskPartition64 * partP;
616
617 /* do not allow further forking inside salvager */
618 canfork = 0;
619
620 /*
621 * Do not attempt to close parent's log file handle as
622 * another thread may have held the lock when fork was
623 * called!
624 */
625 memset(&logopts, 0, sizeof(logopts));
626 logopts.lopt_dest = logDest_file;
627 logopts.lopt_rotateStyle = logRotate_none;
628 if (asprintf(&filename, "%s.%d",
629 AFSDIR_SERVER_SLVGLOG_FILEPATH, getpid()) < 0) {
630 fprintf(stderr, "out of memory\n");
631 return ENOMEM;
632 }
633 logopts.lopt_filename = filename;
634 OpenLog(&logopts);
635 free(filename);
636
637 if (node->command.sop.parent <= 0) {
638 Log("salvageServer: invalid volume id specified; salvage aborted\n");
639 return 1;
640 }
641
642 partP = VGetPartition(node->command.sop.partName, 0);
643 if (!partP) {
644 Log("salvageServer: Unknown or unmounted partition %s; salvage aborted\n",
645 node->command.sop.partName);
646 return 1;
647 }
648
649 /* obtain a shared salvage lock in the child worker, so if the
650 * salvageserver restarts (and we continue), we will still hold a lock and
651 * prevent standalone salvagers from interfering */
652 ObtainSharedSalvageLock();
653
654 /* Salvage individual volume; don't notify fs */
655 SalvageFileSys1(partP, node->command.sop.parent);
656
657 CloseLog();
658 return 0;
659 }
660
661
662 static void *
663 SalvageChildReaperThread(void * args)
664 {
665 int slot, pid, status;
666 struct log_cleanup_node * cleanup;
667
668 opr_mutex_enter(&worker_lock);
669
670 /* loop reaping our children */
671 while (1) {
672 /* wait() won't block unless we have children, so
673 * block on the cond var if we're childless */
674 while (current_workers == 0) {
675 opr_cv_wait(&worker_cv, &worker_lock);
676 }
677
678 opr_mutex_exit(&worker_lock);
679
680 cleanup = malloc(sizeof(struct log_cleanup_node));
681
682 while (Reap_Child("salvageserver", &pid, &status) < 0) {
683 /* try to prevent livelock if something goes wrong */
684 sleep(1);
685 }
686
687 VOL_LOCK;
688 for (slot = 0; slot < Parallel; slot++) {
689 if (child_slot[slot] == pid)
690 break;
691 }
692 opr_Assert(slot < Parallel);
693 child_slot[slot] = 0;
694 VOL_UNLOCK;
695
696 SALVSYNC_doneWorkByPid(pid, status);
697
698 opr_mutex_enter(&worker_lock);
699
700 if (cleanup) {
701 cleanup->pid = pid;
702 queue_Append(&log_cleanup_queue, cleanup);
703 opr_cv_signal(&log_cleanup_queue.queue_change_cv);
704 }
705
706 /* ok, we've reaped a child */
707 current_workers--;
708 opr_cv_broadcast(&worker_cv);
709 }
710
711 AFS_UNREACHED(return(NULL));
712 }
713
714 static int
715 Reap_Child(char *prog, int * pid, int * status)
716 {
717 int ret;
718 ret = wait(status);
719
720 if (ret >= 0) {
721 *pid = ret;
722 if (WCOREDUMP(*status))
723 Log("\"%s\" core dumped!\n", prog);
724 if ((WIFSIGNALED(*status) != 0) ||
725 ((WEXITSTATUS(*status) != 0) &&
726 (WEXITSTATUS(*status) != SALSRV_EXIT_VOLGROUP_LINK)))
727 Log("\"%s\" (pid=%d) terminated abnormally!\n", prog, ret);
728 } else {
729 Log("wait returned -1\n");
730 }
731 return ret;
732 }
733
734 /*
735 * thread to combine salvager child logs
736 * back into the main salvageserver log
737 */
738 static void *
739 SalvageLogCleanupThread(void * arg)
740 {
741 struct log_cleanup_node * cleanup;
742
743 opr_mutex_enter(&worker_lock);
744
745 while (1) {
746 while (queue_IsEmpty(&log_cleanup_queue)) {
747 opr_cv_wait(&log_cleanup_queue.queue_change_cv, &worker_lock);
748 }
749
750 while (queue_IsNotEmpty(&log_cleanup_queue)) {
751 cleanup = queue_First(&log_cleanup_queue, log_cleanup_node);
752 queue_Remove(cleanup);
753 opr_mutex_exit(&worker_lock);
754 SalvageLogCleanup(cleanup->pid);
755 free(cleanup);
756 opr_mutex_enter(&worker_lock);
757 }
758 }
759
760 AFS_UNREACHED(opr_mutex_exit(&worker_lock));
761 AFS_UNREACHED(return(NULL));
762 }
763
764 #define LOG_XFER_BUF_SIZE 65536
765 static void
766 SalvageLogCleanup(int pid)
767 {
768 int pidlog, len;
769 char *fn = NULL;
770 char *buf = NULL;
771
772 if (asprintf(&fn, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH, pid) < 0) {
773 Log("Unable to write child log: out of memory\n");
774 goto done;
775 }
776
777 buf = calloc(1, LOG_XFER_BUF_SIZE);
778 if (buf == NULL) {
779 Log("Unable to write child log: out of memory\n");
780 goto done;
781 }
782
783 pidlog = open(fn, O_RDONLY);
784 unlink(fn);
785 if (pidlog < 0)
786 goto done;
787
788 len = read(pidlog, buf, LOG_XFER_BUF_SIZE);
789 while (len) {
790 WriteLogBuffer(buf, len);
791 len = read(pidlog, buf, LOG_XFER_BUF_SIZE);
792 }
793
794 close(pidlog);
795
796 done:
797 free(fn);
798 free(buf);
799 }
800
801 /* wake up every five minutes to see if a non-child salvage has finished */
802 #define SALVAGE_SCAN_POLL_INTERVAL 300
803
804 /**
805 * Thread to look for SalvageLog.$pid files that are not from our child
806 * worker salvagers, and notify SalvageLogCleanupThread to clean them
807 * up. This can happen if we restart during salvages, or the
808 * salvageserver crashes or something.
809 *
810 * @param arg unused
811 *
812 * @return always NULL
813 */
814 static void *
815 SalvageLogScanningThread(void * arg)
816 {
817 struct rx_queue log_watch_queue;
818 char *prefix;
819 int prefix_len;
820
821 queue_Init(&log_watch_queue);
822
823 prefix_len = asprintf(&prefix, "%s.", AFSDIR_SLVGLOG_FILE);
824 if (prefix_len >= 0) {
825 DIR *dp;
826 struct dirent *dirp;
827
828 dp = opendir(AFSDIR_LOGS_DIR);
829 opr_Assert(dp);
830
831 while ((dirp = readdir(dp)) != NULL) {
832 pid_t pid;
833 struct log_cleanup_node *cleanup;
834 int i;
835
836 if (strncmp(dirp->d_name, prefix, prefix_len) != 0) {
837 /* not a salvage logfile; skip */
838 continue;
839 }
840
841 errno = 0;
842 pid = strtol(dirp->d_name + prefix_len, NULL, 10);
843
844 if (errno != 0) {
845 /* file is SalvageLog.<something> but <something> isn't
846 * a pid, so skip */
847 continue;
848 }
849
850 VOL_LOCK;
851 for (i = 0; i < Parallel; ++i) {
852 if (pid == child_slot[i]) {
853 break;
854 }
855 }
856 VOL_UNLOCK;
857 if (i < Parallel) {
858 /* this pid is one of our children, so the reaper thread
859 * will take care of it; skip */
860 continue;
861 }
862
863 cleanup = malloc(sizeof(struct log_cleanup_node));
864 cleanup->pid = pid;
865
866 queue_Append(&log_watch_queue, cleanup);
867 }
868 free(prefix);
869 closedir(dp);
870 }
871
872 ScanLogs(&log_watch_queue);
873
874 while (queue_IsNotEmpty(&log_watch_queue)) {
875 sleep(SALVAGE_SCAN_POLL_INTERVAL);
876 ScanLogs(&log_watch_queue);
877 }
878
879 return NULL;
880 }
881
882 /**
883 * look through log_watch_queue, and if any processes are not still
884 * running, hand them off to the SalvageLogCleanupThread
885 *
886 * @param log_watch_queue a queue of PIDs that we should clean up if
887 * that PID has died
888 */
889 static void
890 ScanLogs(struct rx_queue *log_watch_queue)
891 {
892 struct log_cleanup_node *cleanup, *next;
893
894 opr_mutex_enter(&worker_lock);
895
896 for (queue_Scan(log_watch_queue, cleanup, next, log_cleanup_node)) {
897 /* if a process is still running, assume it's the salvage process
898 * still going, and keep waiting for it */
899 if (kill(cleanup->pid, 0) < 0 && errno == ESRCH) {
900 queue_Remove(cleanup);
901 queue_Append(&log_cleanup_queue, cleanup);
902 opr_cv_signal(&log_cleanup_queue.queue_change_cv);
903 }
904 }
905
906 opr_mutex_exit(&worker_lock);
907 }