Commit | Line | Data |
---|---|---|
805e021f CE |
1 | /* |
2 | * Copyright 2006-2007, Sine Nomine Associates and others. | |
3 | * All Rights Reserved. | |
4 | * | |
5 | * This software has been released under the terms of the IBM Public | |
6 | * License. For details, see the LICENSE file in the top-level source | |
7 | * directory or online at http://www.openafs.org/dl/license10.html | |
8 | */ | |
9 | ||
10 | /* | |
11 | * demand attach fs | |
12 | * online salvager daemon | |
13 | */ | |
14 | ||
15 | /* Main program file. Define globals. */ | |
16 | #define MAIN 1 | |
17 | ||
18 | #include <afsconfig.h> | |
19 | #include <afs/param.h> | |
20 | ||
21 | #include <roken.h> | |
22 | ||
23 | #ifdef HAVE_SYS_FILE_H | |
24 | #include <sys/file.h> | |
25 | #endif | |
26 | ||
27 | #ifdef AFS_NT40_ENV | |
28 | #include <WINNT/afsevent.h> | |
29 | #endif | |
30 | ||
31 | #ifndef WCOREDUMP | |
32 | #define WCOREDUMP(x) ((x) & 0200) | |
33 | #endif | |
34 | ||
35 | #include <afs/opr.h> | |
36 | #include <opr/lock.h> | |
37 | #include <afs/afsint.h> | |
38 | #include <rx/rx_queue.h> | |
39 | ||
40 | #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV) | |
41 | #if defined(AFS_VFSINCL_ENV) | |
42 | #include <sys/vnode.h> | |
43 | #ifdef AFS_SUN5_ENV | |
44 | #include <sys/fs/ufs_inode.h> | |
45 | #else | |
46 | #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV) | |
47 | #include <ufs/ufs/dinode.h> | |
48 | #include <ufs/ffs/fs.h> | |
49 | #else | |
50 | #include <ufs/inode.h> | |
51 | #endif | |
52 | #endif | |
53 | #else /* AFS_VFSINCL_ENV */ | |
54 | #ifdef AFS_OSF_ENV | |
55 | #include <ufs/inode.h> | |
56 | #else /* AFS_OSF_ENV */ | |
57 | #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_DARWIN_ENV) | |
58 | #include <sys/inode.h> | |
59 | #endif | |
60 | #endif | |
61 | #endif /* AFS_VFSINCL_ENV */ | |
62 | #endif /* AFS_SGI_ENV */ | |
63 | #ifdef AFS_AIX_ENV | |
64 | #include <sys/vfs.h> | |
65 | #include <sys/lockf.h> | |
66 | #else | |
67 | #ifdef AFS_HPUX_ENV | |
68 | #include <checklist.h> | |
69 | #else | |
70 | #if defined(AFS_SGI_ENV) | |
71 | #include <mntent.h> | |
72 | #else | |
73 | #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV) | |
74 | #ifdef AFS_SUN5_ENV | |
75 | #include <sys/mnttab.h> | |
76 | #include <sys/mntent.h> | |
77 | #else | |
78 | #include <mntent.h> | |
79 | #endif | |
80 | #else | |
81 | #endif /* AFS_SGI_ENV */ | |
82 | #endif /* AFS_HPUX_ENV */ | |
83 | #endif | |
84 | #endif | |
85 | #ifndef AFS_NT40_ENV | |
86 | #include <afs/osi_inode.h> | |
87 | #endif | |
88 | #include <afs/cmd.h> | |
89 | #include <afs/afsutil.h> | |
90 | #include <afs/fileutil.h> | |
91 | #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */ | |
92 | #include <afs/dir.h> | |
93 | ||
94 | #include "nfs.h" | |
95 | #include "lwp.h" | |
96 | #include "lock.h" | |
97 | #include <afs/afssyscalls.h> | |
98 | #include "ihandle.h" | |
99 | #include "vnode.h" | |
100 | #include "volume.h" | |
101 | #include "partition.h" | |
102 | #include "daemon_com.h" | |
103 | #include "fssync.h" | |
104 | #include "salvsync.h" | |
105 | #include "viceinode.h" | |
106 | #include "salvage.h" | |
107 | #include "vol-salvage.h" | |
108 | #include "common.h" | |
109 | #ifdef AFS_NT40_ENV | |
110 | #include <pthread.h> | |
111 | #endif | |
112 | ||
113 | extern int ClientMode; | |
114 | ||
115 | #if !defined(AFS_DEMAND_ATTACH_FS) | |
116 | #error "online salvager only supported for demand attach fileserver" | |
117 | #endif /* AFS_DEMAND_ATTACH_FS */ | |
118 | ||
119 | #if defined(AFS_NT40_ENV) | |
120 | #error "online salvager not supported on NT" | |
121 | #endif /* AFS_NT40_ENV */ | |
122 | ||
123 | /*@+fcnmacros +macrofcndecl@*/ | |
124 | #ifdef O_LARGEFILE | |
125 | #define afs_fopen fopen64 | |
126 | #else /* !O_LARGEFILE */ | |
127 | #define afs_fopen fopen | |
128 | #endif /* !O_LARGEFILE */ | |
129 | /*@=fcnmacros =macrofcndecl@*/ | |
130 | ||
131 | ||
132 | ||
133 | static volatile int current_workers = 0; | |
134 | static volatile struct rx_queue pending_q; | |
135 | static pthread_mutex_t worker_lock; | |
136 | static pthread_cond_t worker_cv; | |
137 | ||
138 | static void * SalvageChildReaperThread(void *); | |
139 | static int DoSalvageVolume(struct SalvageQueueNode * node, int slot); | |
140 | ||
141 | static void SalvageServer(int argc, char **argv, struct logOptions *logopts); | |
142 | static void SalvageClient(VolumeId vid, char * pname); | |
143 | ||
144 | static int Reap_Child(char * prog, int * pid, int * status); | |
145 | ||
146 | static void * SalvageLogCleanupThread(void *); | |
147 | static void SalvageLogCleanup(int pid); | |
148 | ||
149 | static void * SalvageLogScanningThread(void *); | |
150 | static void ScanLogs(struct rx_queue *log_watch_queue); | |
151 | ||
152 | struct cmdline_rock { | |
153 | int argc; | |
154 | char **argv; | |
155 | }; | |
156 | ||
157 | struct log_cleanup_node { | |
158 | struct rx_queue q; | |
159 | int pid; | |
160 | }; | |
161 | ||
162 | struct { | |
163 | struct rx_queue queue_head; | |
164 | pthread_cond_t queue_change_cv; | |
165 | } log_cleanup_queue; | |
166 | ||
167 | ||
168 | #define DEFAULT_PARALLELISM 4 /* allow 4 parallel salvage workers by default */ | |
169 | ||
170 | enum optionsList { | |
171 | OPT_partition, | |
172 | OPT_volumeid, | |
173 | OPT_debug, | |
174 | OPT_nowrite, | |
175 | OPT_inodes, | |
176 | OPT_oktozap, | |
177 | OPT_rootinodes, | |
178 | OPT_salvagedirs, | |
179 | OPT_blockreads, | |
180 | OPT_parallel, | |
181 | OPT_tmpdir, | |
182 | OPT_orphans, | |
183 | OPT_syslog, | |
184 | OPT_syslogfacility, | |
185 | OPT_logfile, | |
186 | OPT_client, | |
187 | OPT_transarc_logs | |
188 | }; | |
189 | ||
190 | static int | |
191 | handleit(struct cmd_syndesc *opts, void *arock) | |
192 | { | |
193 | char pname[100]; | |
194 | afs_int32 seenpart = 0, seenvol = 0; | |
195 | VolumeId vid = 0; | |
196 | struct cmdline_rock *rock = (struct cmdline_rock *)arock; | |
197 | char *optstring = NULL; | |
198 | struct logOptions logopts; | |
199 | ||
200 | memset(&logopts, 0, sizeof(logopts)); | |
201 | ||
202 | #ifdef AFS_SGI_VNODE_GLUE | |
203 | if (afs_init_kernel_config(-1) < 0) { | |
204 | printf | |
205 | ("Can't determine NUMA configuration, not starting salvager.\n"); | |
206 | exit(1); | |
207 | } | |
208 | #endif | |
209 | ||
210 | cmd_OptionAsFlag(opts, OPT_debug, &debug); | |
211 | cmd_OptionAsFlag(opts, OPT_nowrite, &Testing); | |
212 | cmd_OptionAsFlag(opts, OPT_inodes, &ListInodeOption); | |
213 | cmd_OptionAsFlag(opts, OPT_oktozap, &OKToZap); | |
214 | cmd_OptionAsFlag(opts, OPT_rootinodes, &ShowRootFiles); | |
215 | cmd_OptionAsFlag(opts, OPT_salvagedirs, &RebuildDirs); | |
216 | cmd_OptionAsFlag(opts, OPT_blockreads, &forceR); | |
217 | if (cmd_OptionAsString(opts, OPT_parallel, &optstring) == 0) { | |
218 | if (strncmp(optstring, "all", 3) == 0) { | |
219 | PartsPerDisk = 1; | |
220 | } | |
221 | if (strlen(optstring) != 0) { | |
222 | Parallel = atoi(optstring); | |
223 | if (Parallel < 1) | |
224 | Parallel = 1; | |
225 | if (Parallel > MAXPARALLEL) { | |
226 | printf("Setting parallel salvages to maximum of %d \n", | |
227 | MAXPARALLEL); | |
228 | Parallel = MAXPARALLEL; | |
229 | } | |
230 | } | |
231 | free(optstring); | |
232 | optstring = NULL; | |
233 | } else { | |
234 | Parallel = min(DEFAULT_PARALLELISM, MAXPARALLEL); | |
235 | } | |
236 | if (cmd_OptionAsString(opts, OPT_tmpdir, &optstring) == 0) { | |
237 | DIR *dirp; | |
238 | dirp = opendir(optstring); | |
239 | if (!dirp) { | |
240 | printf | |
241 | ("Can't open temporary placeholder dir %s; using current partition \n", | |
242 | optstring); | |
243 | tmpdir = NULL; | |
244 | } else | |
245 | closedir(dirp); | |
246 | free(optstring); | |
247 | optstring = NULL; | |
248 | } | |
249 | if (cmd_OptionAsString(opts, OPT_orphans, &optstring) == 0) { | |
250 | if (Testing) | |
251 | orphans = ORPH_IGNORE; | |
252 | else if (strcmp(optstring, "remove") == 0 | |
253 | || strcmp(optstring, "r") == 0) | |
254 | orphans = ORPH_REMOVE; | |
255 | else if (strcmp(optstring, "attach") == 0 | |
256 | || strcmp(optstring, "a") == 0) | |
257 | orphans = ORPH_ATTACH; | |
258 | free(optstring); | |
259 | optstring = NULL; | |
260 | } | |
261 | ||
262 | #ifdef HAVE_SYSLOG | |
263 | if (cmd_OptionPresent(opts, OPT_syslog)) { | |
264 | if (cmd_OptionPresent(opts, OPT_logfile)) { | |
265 | fprintf(stderr, "Invalid options: -syslog and -logfile are exclusive.\n"); | |
266 | return -1; | |
267 | } | |
268 | if (cmd_OptionPresent(opts, OPT_transarc_logs)) { | |
269 | fprintf(stderr, "Invalid options: -syslog and -transarc-logs are exclusive.\n"); | |
270 | return -1; | |
271 | } | |
272 | logopts.lopt_dest = logDest_syslog; | |
273 | logopts.lopt_facility = LOG_DAEMON; | |
274 | logopts.lopt_tag = "salvageserver"; | |
275 | cmd_OptionAsInt(opts, OPT_syslogfacility, &logopts.lopt_facility); | |
276 | } else | |
277 | #endif | |
278 | { | |
279 | logopts.lopt_dest = logDest_file; | |
280 | if (cmd_OptionPresent(opts, OPT_transarc_logs)) { | |
281 | logopts.lopt_rotateOnOpen = 1; | |
282 | logopts.lopt_rotateStyle = logRotate_old; | |
283 | } | |
284 | if (cmd_OptionPresent(opts, OPT_logfile)) | |
285 | cmd_OptionAsString(opts, OPT_logfile, (char**)&logopts.lopt_filename); | |
286 | else | |
287 | logopts.lopt_filename = AFSDIR_SERVER_SALSRVLOG_FILEPATH; | |
288 | } | |
289 | ||
290 | if (cmd_OptionPresent(opts, OPT_client)) { | |
291 | if (cmd_OptionAsString(opts, OPT_partition, &optstring) == 0) { | |
292 | seenpart = 1; | |
293 | strlcpy(pname, optstring, sizeof(pname)); | |
294 | free(optstring); | |
295 | optstring = NULL; | |
296 | } | |
297 | if (cmd_OptionAsString(opts, OPT_volumeid, &optstring) == 0) { | |
298 | char *end; | |
299 | unsigned long vid_l; | |
300 | seenvol = 1; | |
301 | vid_l = strtoul(optstring, &end, 10); | |
302 | if (vid_l >= MAX_AFS_UINT32 || vid_l == ULONG_MAX || *end != '\0') { | |
303 | printf("Invalid volume id specified; salvage aborted\n"); | |
304 | exit(-1); | |
305 | } | |
306 | vid = (VolumeId)vid_l; | |
307 | } | |
308 | ||
309 | if (!seenpart || !seenvol) { | |
310 | printf("You must specify '-partition' and '-volumeid' with the '-client' option\n"); | |
311 | exit(-1); | |
312 | } | |
313 | ||
314 | SalvageClient(vid, pname); | |
315 | ||
316 | } else { /* salvageserver mode */ | |
317 | SalvageServer(rock->argc, rock->argv, &logopts); | |
318 | } | |
319 | return (0); | |
320 | } | |
321 | ||
322 | ||
323 | #ifndef AFS_NT40_ENV | |
324 | #include "AFS_component_version_number.c" | |
325 | #endif | |
326 | #define MAX_ARGS 128 | |
327 | #ifdef AFS_NT40_ENV | |
328 | char *save_args[MAX_ARGS]; | |
329 | int n_save_args = 0; | |
330 | pthread_t main_thread; | |
331 | #endif | |
332 | ||
333 | int | |
334 | main(int argc, char **argv) | |
335 | { | |
336 | struct cmd_syndesc *ts; | |
337 | int err = 0; | |
338 | struct cmdline_rock arock; | |
339 | ||
340 | #ifdef AFS_AIX32_ENV | |
341 | /* | |
342 | * The following signal action for AIX is necessary so that in case of a | |
343 | * crash (i.e. core is generated) we can include the user's data section | |
344 | * in the core dump. Unfortunately, by default, only a partial core is | |
345 | * generated which, in many cases, isn't too useful. | |
346 | */ | |
347 | struct sigaction nsa; | |
348 | ||
349 | sigemptyset(&nsa.sa_mask); | |
350 | nsa.sa_handler = SIG_DFL; | |
351 | nsa.sa_flags = SA_FULLDUMP; | |
352 | sigaction(SIGABRT, &nsa, NULL); | |
353 | sigaction(SIGSEGV, &nsa, NULL); | |
354 | #endif | |
355 | ||
356 | /* Initialize directory paths */ | |
357 | if (!(initAFSDirPath() & AFSDIR_SERVER_PATHS_OK)) { | |
358 | #ifdef AFS_NT40_ENV | |
359 | ReportErrorEventAlt(AFSEVT_SVR_NO_INSTALL_DIR, 0, argv[0], 0); | |
360 | #endif | |
361 | fprintf(stderr, "%s: Unable to obtain AFS server directory.\n", | |
362 | argv[0]); | |
363 | exit(2); | |
364 | } | |
365 | #ifdef AFS_NT40_ENV | |
366 | /* Default to binary mode for fopen() */ | |
367 | _set_fmode(_O_BINARY); | |
368 | ||
369 | main_thread = pthread_self(); | |
370 | if (spawnDatap && spawnDataLen) { | |
371 | /* This is a child per partition salvager. Don't setup log or | |
372 | * try to lock the salvager lock. | |
373 | */ | |
374 | if (nt_SetupPartitionSalvage(spawnDatap, spawnDataLen) < 0) | |
375 | exit(3); | |
376 | } else { | |
377 | #endif | |
378 | ||
379 | #ifndef AFS_NT40_ENV | |
380 | if (geteuid() != 0) { | |
381 | printf("Salvager must be run as root.\n"); | |
382 | fflush(stdout); | |
383 | Exit(0); | |
384 | } | |
385 | #endif | |
386 | ||
387 | /* bad for normal help flag processing, but can do nada */ | |
388 | ||
389 | #ifdef AFS_NT40_ENV | |
390 | } | |
391 | #endif | |
392 | ||
393 | arock.argc = argc; | |
394 | arock.argv = argv; | |
395 | ||
396 | ||
397 | ts = cmd_CreateSyntax("initcmd", handleit, &arock, 0, "initialize the program"); | |
398 | cmd_AddParmAtOffset(ts, OPT_partition, "-partition", CMD_SINGLE, | |
399 | CMD_OPTIONAL, "Name of partition to salvage"); | |
400 | cmd_AddParmAtOffset(ts, OPT_volumeid, "-volumeid", CMD_SINGLE, CMD_OPTIONAL, | |
401 | "Volume Id to salvage"); | |
402 | cmd_AddParmAtOffset(ts, OPT_debug, "-debug", CMD_FLAG, CMD_OPTIONAL, | |
403 | "Run in Debugging mode"); | |
404 | cmd_AddParmAtOffset(ts, OPT_nowrite, "-nowrite", CMD_FLAG, CMD_OPTIONAL, | |
405 | "Run readonly/test mode"); | |
406 | cmd_AddParmAtOffset(ts, OPT_inodes, "-inodes", CMD_FLAG, CMD_OPTIONAL, | |
407 | "Just list affected afs inodes - debugging flag"); | |
408 | cmd_AddParmAtOffset(ts, OPT_oktozap, "-oktozap", CMD_FLAG, CMD_OPTIONAL, | |
409 | "Give permission to destroy bogus inodes/volumes - debugging flag"); | |
410 | cmd_AddParmAtOffset(ts, OPT_rootinodes, "-rootinodes", CMD_FLAG, | |
411 | CMD_OPTIONAL, "Show inodes owned by root - debugging flag"); | |
412 | cmd_AddParmAtOffset(ts, OPT_salvagedirs, "-salvagedirs", CMD_FLAG, | |
413 | CMD_OPTIONAL, "Force rebuild/salvage of all directories"); | |
414 | cmd_AddParmAtOffset(ts, OPT_blockreads, "-blockreads", CMD_FLAG, | |
415 | CMD_OPTIONAL, "Read smaller blocks to handle IO/bad blocks"); | |
416 | cmd_AddParmAtOffset(ts, OPT_parallel, "-parallel", CMD_SINGLE, CMD_OPTIONAL, | |
417 | "# of max parallel partition salvaging"); | |
418 | cmd_AddParmAtOffset(ts, OPT_tmpdir, "-tmpdir", CMD_SINGLE, CMD_OPTIONAL, | |
419 | "Name of dir to place tmp files "); | |
420 | cmd_AddParmAtOffset(ts, OPT_orphans, "-orphans", CMD_SINGLE, CMD_OPTIONAL, | |
421 | "ignore | remove | attach"); | |
422 | ||
423 | #ifdef HAVE_SYSLOG | |
424 | cmd_AddParmAtOffset(ts, OPT_syslog, "-syslog", CMD_FLAG, CMD_OPTIONAL, | |
425 | "Write salvage log to syslogs"); | |
426 | cmd_AddParmAtOffset(ts, OPT_syslogfacility, "-syslogfacility", CMD_SINGLE, | |
427 | CMD_OPTIONAL, "Syslog facility number to use"); | |
428 | #endif | |
429 | ||
430 | cmd_AddParmAtOffset(ts, OPT_client, "-client", CMD_FLAG, CMD_OPTIONAL, | |
431 | "Use SALVSYNC to ask salvageserver to salvage a volume"); | |
432 | ||
433 | cmd_AddParmAtOffset(ts, OPT_logfile, "-logfile", CMD_SINGLE, CMD_OPTIONAL, | |
434 | "Location of log file "); | |
435 | ||
436 | cmd_AddParmAtOffset(ts, OPT_transarc_logs, "-transarc-logs", CMD_FLAG, | |
437 | CMD_OPTIONAL, "enable Transarc style logging"); | |
438 | ||
439 | err = cmd_Dispatch(argc, argv); | |
440 | Exit(err); | |
441 | return 0; /* not reached */ | |
442 | } | |
443 | ||
444 | static void | |
445 | SalvageClient(VolumeId vid, char * pname) | |
446 | { | |
447 | int done = 0; | |
448 | afs_int32 code; | |
449 | SYNC_response res; | |
450 | SALVSYNC_response_hdr sres; | |
451 | VolumePackageOptions opts; | |
452 | ||
453 | /* Send Log() messages to stderr in client mode. */ | |
454 | ClientMode = 1; | |
455 | ||
456 | VOptDefaults(volumeUtility, &opts); | |
457 | if (VInitVolumePackage2(volumeUtility, &opts)) { | |
458 | /* VInitVolumePackage2 can fail on e.g. partition attachment errors, | |
459 | * but we don't really care, since all we're doing is trying to use | |
460 | * SALVSYNC */ | |
461 | fprintf(stderr, "errors encountered initializing volume package, but " | |
462 | "trying to continue anyway\n"); | |
463 | } | |
464 | SALVSYNC_clientInit(); | |
465 | ||
466 | code = SALVSYNC_SalvageVolume(vid, pname, SALVSYNC_SALVAGE, SALVSYNC_OPERATOR, 0, NULL); | |
467 | if (code != SYNC_OK) { | |
468 | goto sync_error; | |
469 | } | |
470 | ||
471 | res.payload.buf = (void *) &sres; | |
472 | res.payload.len = sizeof(sres); | |
473 | ||
474 | while(!done) { | |
475 | sleep(2); | |
476 | code = SALVSYNC_SalvageVolume(vid, pname, SALVSYNC_QUERY, SALVSYNC_WHATEVER, 0, &res); | |
477 | if (code != SYNC_OK) { | |
478 | goto sync_error; | |
479 | } | |
480 | switch (sres.state) { | |
481 | case SALVSYNC_STATE_ERROR: | |
482 | printf("salvageserver reports salvage ended in an error; check log files for more details\n"); | |
483 | case SALVSYNC_STATE_DONE: | |
484 | case SALVSYNC_STATE_UNKNOWN: | |
485 | done = 1; | |
486 | } | |
487 | } | |
488 | SALVSYNC_clientFinis(); | |
489 | return; | |
490 | ||
491 | sync_error: | |
492 | if (code == SYNC_DENIED) { | |
493 | printf("salvageserver refused to salvage volume %u on partition %s\n", | |
494 | vid, pname); | |
495 | } else if (code == SYNC_BAD_COMMAND) { | |
496 | printf("SALVSYNC protocol mismatch; please make sure fileserver, volserver, salvageserver and salvager are same version\n"); | |
497 | } else if (code == SYNC_COM_ERROR) { | |
498 | printf("SALVSYNC communications error\n"); | |
499 | } | |
500 | SALVSYNC_clientFinis(); | |
501 | exit(-1); | |
502 | } | |
503 | ||
504 | static int * child_slot; | |
505 | ||
506 | static void | |
507 | SalvageServer(int argc, char **argv, struct logOptions *logopts) | |
508 | { | |
509 | int pid, ret; | |
510 | struct SalvageQueueNode * node; | |
511 | pthread_t tid; | |
512 | pthread_attr_t attrs; | |
513 | int slot; | |
514 | VolumePackageOptions opts; | |
515 | ||
516 | /* All entries to the log will be appended. Useful if there are | |
517 | * multiple salvagers appending to the log. | |
518 | */ | |
519 | OpenLog(logopts); | |
520 | SetupLogSignals(); | |
521 | ||
522 | Log("%s\n", cml_version_number); | |
523 | LogCommandLine(argc, argv, "Online Salvage Server", | |
524 | SalvageVersion, "Starting OpenAFS", Log); | |
525 | /* Get and hold a lock for the duration of the salvage to make sure | |
526 | * that no other salvage runs at the same time. The routine | |
527 | * VInitVolumePackage2 (called below) makes sure that a file server or | |
528 | * other volume utilities don't interfere with the salvage. | |
529 | */ | |
530 | ||
531 | /* even demand attach online salvager | |
532 | * still needs this because we don't want | |
533 | * a stand-alone salvager to conflict with | |
534 | * the salvager daemon */ | |
535 | ObtainSharedSalvageLock(); | |
536 | ||
537 | child_slot = calloc(Parallel, sizeof(int)); | |
538 | opr_Assert(child_slot != NULL); | |
539 | ||
540 | /* initialize things */ | |
541 | VOptDefaults(salvageServer, &opts); | |
542 | if (VInitVolumePackage2(salvageServer, &opts)) { | |
543 | Log("Shutting down: errors encountered initializing volume package\n"); | |
544 | Exit(1); | |
545 | } | |
546 | DInit(10); | |
547 | queue_Init(&pending_q); | |
548 | queue_Init(&log_cleanup_queue); | |
549 | opr_mutex_init(&worker_lock); | |
550 | opr_cv_init(&worker_cv); | |
551 | opr_cv_init(&log_cleanup_queue.queue_change_cv); | |
552 | opr_Verify(pthread_attr_init(&attrs) == 0); | |
553 | ||
554 | /* start up the reaper and log cleaner threads */ | |
555 | opr_Verify(pthread_attr_setdetachstate(&attrs, | |
556 | PTHREAD_CREATE_DETACHED) == 0); | |
557 | opr_Verify(pthread_create(&tid, &attrs, | |
558 | &SalvageChildReaperThread, NULL) == 0); | |
559 | opr_Verify(pthread_create(&tid, &attrs, | |
560 | &SalvageLogCleanupThread, NULL) == 0); | |
561 | opr_Verify(pthread_create(&tid, &attrs, | |
562 | &SalvageLogScanningThread, NULL) == 0); | |
563 | ||
564 | /* loop forever serving requests */ | |
565 | while (1) { | |
566 | node = SALVSYNC_getWork(); | |
567 | opr_Assert(node != NULL); | |
568 | ||
569 | Log("dispatching child to salvage volume %u...\n", | |
570 | node->command.sop.parent); | |
571 | ||
572 | VOL_LOCK; | |
573 | /* find a slot */ | |
574 | for (slot = 0; slot < Parallel; slot++) { | |
575 | if (!child_slot[slot]) | |
576 | break; | |
577 | } | |
578 | opr_Assert (slot < Parallel); | |
579 | ||
580 | do_fork: | |
581 | pid = Fork(); | |
582 | if (pid == 0) { | |
583 | VOL_UNLOCK; | |
584 | ret = DoSalvageVolume(node, slot); | |
585 | Exit(ret); | |
586 | } else if (pid < 0) { | |
587 | Log("failed to fork child worker process\n"); | |
588 | sleep(1); | |
589 | goto do_fork; | |
590 | } else { | |
591 | child_slot[slot] = pid; | |
592 | node->pid = pid; | |
593 | VOL_UNLOCK; | |
594 | ||
595 | opr_mutex_enter(&worker_lock); | |
596 | current_workers++; | |
597 | ||
598 | /* let the reaper thread know another worker was spawned */ | |
599 | opr_cv_broadcast(&worker_cv); | |
600 | ||
601 | /* if we're overquota, wait for the reaper */ | |
602 | while (current_workers >= Parallel) { | |
603 | opr_cv_wait(&worker_cv, &worker_lock); | |
604 | } | |
605 | opr_mutex_exit(&worker_lock); | |
606 | } | |
607 | } | |
608 | } | |
609 | ||
610 | static int | |
611 | DoSalvageVolume(struct SalvageQueueNode * node, int slot) | |
612 | { | |
613 | char *filename = NULL; | |
614 | struct logOptions logopts; | |
615 | struct DiskPartition64 * partP; | |
616 | ||
617 | /* do not allow further forking inside salvager */ | |
618 | canfork = 0; | |
619 | ||
620 | /* | |
621 | * Do not attempt to close parent's log file handle as | |
622 | * another thread may have held the lock when fork was | |
623 | * called! | |
624 | */ | |
625 | memset(&logopts, 0, sizeof(logopts)); | |
626 | logopts.lopt_dest = logDest_file; | |
627 | logopts.lopt_rotateStyle = logRotate_none; | |
628 | if (asprintf(&filename, "%s.%d", | |
629 | AFSDIR_SERVER_SLVGLOG_FILEPATH, getpid()) < 0) { | |
630 | fprintf(stderr, "out of memory\n"); | |
631 | return ENOMEM; | |
632 | } | |
633 | logopts.lopt_filename = filename; | |
634 | OpenLog(&logopts); | |
635 | free(filename); | |
636 | ||
637 | if (node->command.sop.parent <= 0) { | |
638 | Log("salvageServer: invalid volume id specified; salvage aborted\n"); | |
639 | return 1; | |
640 | } | |
641 | ||
642 | partP = VGetPartition(node->command.sop.partName, 0); | |
643 | if (!partP) { | |
644 | Log("salvageServer: Unknown or unmounted partition %s; salvage aborted\n", | |
645 | node->command.sop.partName); | |
646 | return 1; | |
647 | } | |
648 | ||
649 | /* obtain a shared salvage lock in the child worker, so if the | |
650 | * salvageserver restarts (and we continue), we will still hold a lock and | |
651 | * prevent standalone salvagers from interfering */ | |
652 | ObtainSharedSalvageLock(); | |
653 | ||
654 | /* Salvage individual volume; don't notify fs */ | |
655 | SalvageFileSys1(partP, node->command.sop.parent); | |
656 | ||
657 | CloseLog(); | |
658 | return 0; | |
659 | } | |
660 | ||
661 | ||
662 | static void * | |
663 | SalvageChildReaperThread(void * args) | |
664 | { | |
665 | int slot, pid, status; | |
666 | struct log_cleanup_node * cleanup; | |
667 | ||
668 | opr_mutex_enter(&worker_lock); | |
669 | ||
670 | /* loop reaping our children */ | |
671 | while (1) { | |
672 | /* wait() won't block unless we have children, so | |
673 | * block on the cond var if we're childless */ | |
674 | while (current_workers == 0) { | |
675 | opr_cv_wait(&worker_cv, &worker_lock); | |
676 | } | |
677 | ||
678 | opr_mutex_exit(&worker_lock); | |
679 | ||
680 | cleanup = malloc(sizeof(struct log_cleanup_node)); | |
681 | ||
682 | while (Reap_Child("salvageserver", &pid, &status) < 0) { | |
683 | /* try to prevent livelock if something goes wrong */ | |
684 | sleep(1); | |
685 | } | |
686 | ||
687 | VOL_LOCK; | |
688 | for (slot = 0; slot < Parallel; slot++) { | |
689 | if (child_slot[slot] == pid) | |
690 | break; | |
691 | } | |
692 | opr_Assert(slot < Parallel); | |
693 | child_slot[slot] = 0; | |
694 | VOL_UNLOCK; | |
695 | ||
696 | SALVSYNC_doneWorkByPid(pid, status); | |
697 | ||
698 | opr_mutex_enter(&worker_lock); | |
699 | ||
700 | if (cleanup) { | |
701 | cleanup->pid = pid; | |
702 | queue_Append(&log_cleanup_queue, cleanup); | |
703 | opr_cv_signal(&log_cleanup_queue.queue_change_cv); | |
704 | } | |
705 | ||
706 | /* ok, we've reaped a child */ | |
707 | current_workers--; | |
708 | opr_cv_broadcast(&worker_cv); | |
709 | } | |
710 | ||
711 | AFS_UNREACHED(return(NULL)); | |
712 | } | |
713 | ||
714 | static int | |
715 | Reap_Child(char *prog, int * pid, int * status) | |
716 | { | |
717 | int ret; | |
718 | ret = wait(status); | |
719 | ||
720 | if (ret >= 0) { | |
721 | *pid = ret; | |
722 | if (WCOREDUMP(*status)) | |
723 | Log("\"%s\" core dumped!\n", prog); | |
724 | if ((WIFSIGNALED(*status) != 0) || | |
725 | ((WEXITSTATUS(*status) != 0) && | |
726 | (WEXITSTATUS(*status) != SALSRV_EXIT_VOLGROUP_LINK))) | |
727 | Log("\"%s\" (pid=%d) terminated abnormally!\n", prog, ret); | |
728 | } else { | |
729 | Log("wait returned -1\n"); | |
730 | } | |
731 | return ret; | |
732 | } | |
733 | ||
734 | /* | |
735 | * thread to combine salvager child logs | |
736 | * back into the main salvageserver log | |
737 | */ | |
738 | static void * | |
739 | SalvageLogCleanupThread(void * arg) | |
740 | { | |
741 | struct log_cleanup_node * cleanup; | |
742 | ||
743 | opr_mutex_enter(&worker_lock); | |
744 | ||
745 | while (1) { | |
746 | while (queue_IsEmpty(&log_cleanup_queue)) { | |
747 | opr_cv_wait(&log_cleanup_queue.queue_change_cv, &worker_lock); | |
748 | } | |
749 | ||
750 | while (queue_IsNotEmpty(&log_cleanup_queue)) { | |
751 | cleanup = queue_First(&log_cleanup_queue, log_cleanup_node); | |
752 | queue_Remove(cleanup); | |
753 | opr_mutex_exit(&worker_lock); | |
754 | SalvageLogCleanup(cleanup->pid); | |
755 | free(cleanup); | |
756 | opr_mutex_enter(&worker_lock); | |
757 | } | |
758 | } | |
759 | ||
760 | AFS_UNREACHED(opr_mutex_exit(&worker_lock)); | |
761 | AFS_UNREACHED(return(NULL)); | |
762 | } | |
763 | ||
764 | #define LOG_XFER_BUF_SIZE 65536 | |
765 | static void | |
766 | SalvageLogCleanup(int pid) | |
767 | { | |
768 | int pidlog, len; | |
769 | char *fn = NULL; | |
770 | char *buf = NULL; | |
771 | ||
772 | if (asprintf(&fn, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH, pid) < 0) { | |
773 | Log("Unable to write child log: out of memory\n"); | |
774 | goto done; | |
775 | } | |
776 | ||
777 | buf = calloc(1, LOG_XFER_BUF_SIZE); | |
778 | if (buf == NULL) { | |
779 | Log("Unable to write child log: out of memory\n"); | |
780 | goto done; | |
781 | } | |
782 | ||
783 | pidlog = open(fn, O_RDONLY); | |
784 | unlink(fn); | |
785 | if (pidlog < 0) | |
786 | goto done; | |
787 | ||
788 | len = read(pidlog, buf, LOG_XFER_BUF_SIZE); | |
789 | while (len) { | |
790 | WriteLogBuffer(buf, len); | |
791 | len = read(pidlog, buf, LOG_XFER_BUF_SIZE); | |
792 | } | |
793 | ||
794 | close(pidlog); | |
795 | ||
796 | done: | |
797 | free(fn); | |
798 | free(buf); | |
799 | } | |
800 | ||
801 | /* wake up every five minutes to see if a non-child salvage has finished */ | |
802 | #define SALVAGE_SCAN_POLL_INTERVAL 300 | |
803 | ||
804 | /** | |
805 | * Thread to look for SalvageLog.$pid files that are not from our child | |
806 | * worker salvagers, and notify SalvageLogCleanupThread to clean them | |
807 | * up. This can happen if we restart during salvages, or the | |
808 | * salvageserver crashes or something. | |
809 | * | |
810 | * @param arg unused | |
811 | * | |
812 | * @return always NULL | |
813 | */ | |
814 | static void * | |
815 | SalvageLogScanningThread(void * arg) | |
816 | { | |
817 | struct rx_queue log_watch_queue; | |
818 | char *prefix; | |
819 | int prefix_len; | |
820 | ||
821 | queue_Init(&log_watch_queue); | |
822 | ||
823 | prefix_len = asprintf(&prefix, "%s.", AFSDIR_SLVGLOG_FILE); | |
824 | if (prefix_len >= 0) { | |
825 | DIR *dp; | |
826 | struct dirent *dirp; | |
827 | ||
828 | dp = opendir(AFSDIR_LOGS_DIR); | |
829 | opr_Assert(dp); | |
830 | ||
831 | while ((dirp = readdir(dp)) != NULL) { | |
832 | pid_t pid; | |
833 | struct log_cleanup_node *cleanup; | |
834 | int i; | |
835 | ||
836 | if (strncmp(dirp->d_name, prefix, prefix_len) != 0) { | |
837 | /* not a salvage logfile; skip */ | |
838 | continue; | |
839 | } | |
840 | ||
841 | errno = 0; | |
842 | pid = strtol(dirp->d_name + prefix_len, NULL, 10); | |
843 | ||
844 | if (errno != 0) { | |
845 | /* file is SalvageLog.<something> but <something> isn't | |
846 | * a pid, so skip */ | |
847 | continue; | |
848 | } | |
849 | ||
850 | VOL_LOCK; | |
851 | for (i = 0; i < Parallel; ++i) { | |
852 | if (pid == child_slot[i]) { | |
853 | break; | |
854 | } | |
855 | } | |
856 | VOL_UNLOCK; | |
857 | if (i < Parallel) { | |
858 | /* this pid is one of our children, so the reaper thread | |
859 | * will take care of it; skip */ | |
860 | continue; | |
861 | } | |
862 | ||
863 | cleanup = malloc(sizeof(struct log_cleanup_node)); | |
864 | cleanup->pid = pid; | |
865 | ||
866 | queue_Append(&log_watch_queue, cleanup); | |
867 | } | |
868 | free(prefix); | |
869 | closedir(dp); | |
870 | } | |
871 | ||
872 | ScanLogs(&log_watch_queue); | |
873 | ||
874 | while (queue_IsNotEmpty(&log_watch_queue)) { | |
875 | sleep(SALVAGE_SCAN_POLL_INTERVAL); | |
876 | ScanLogs(&log_watch_queue); | |
877 | } | |
878 | ||
879 | return NULL; | |
880 | } | |
881 | ||
882 | /** | |
883 | * look through log_watch_queue, and if any processes are not still | |
884 | * running, hand them off to the SalvageLogCleanupThread | |
885 | * | |
886 | * @param log_watch_queue a queue of PIDs that we should clean up if | |
887 | * that PID has died | |
888 | */ | |
889 | static void | |
890 | ScanLogs(struct rx_queue *log_watch_queue) | |
891 | { | |
892 | struct log_cleanup_node *cleanup, *next; | |
893 | ||
894 | opr_mutex_enter(&worker_lock); | |
895 | ||
896 | for (queue_Scan(log_watch_queue, cleanup, next, log_cleanup_node)) { | |
897 | /* if a process is still running, assume it's the salvage process | |
898 | * still going, and keep waiting for it */ | |
899 | if (kill(cleanup->pid, 0) < 0 && errno == ESRCH) { | |
900 | queue_Remove(cleanup); | |
901 | queue_Append(&log_cleanup_queue, cleanup); | |
902 | opr_cv_signal(&log_cleanup_queue.queue_change_cv); | |
903 | } | |
904 | } | |
905 | ||
906 | opr_mutex_exit(&worker_lock); | |
907 | } |