Import Upstream version 1.8.5
[hcoop/debian/openafs.git] / src / bozo / fsbnodeops.c
CommitLineData
805e021f
CE
1/*
2 * Copyright 2000, International Business Machines Corporation and others.
3 * All Rights Reserved.
4 *
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
8 */
9
10#include <afsconfig.h>
11#include <afs/param.h>
12
13#include <afs/procmgmt.h>
14#include <roken.h>
15#include <afs/opr.h>
16
17#include <lwp.h>
18#include <rx/rx.h>
19#include <afs/afsutil.h>
20#include <opr/queue.h>
21
22#include "bnode.h"
23#include "bnode_internal.h"
24#include "bosprototypes.h"
25
26extern char *DoPidFiles;
27static int emergency = 0;
28
29/* if this file exists, then we have to salvage the file system */
30#define SALFILE "SALVAGE."
31
32#define POLLTIME 20 /* for handling below */
33#define SDTIME 60 /* time in seconds given to a process to evaporate */
34
35/* basic rules:
36 Normal operation involves having the file server and the vol server both running.
37
38 If the vol server terminates, it can simply be restarted.
39
40 If the file server terminates, the disk must salvaged before the file server
41 can be restarted. In order to restart either the file server or the salvager,
42 the vol server must be shut down.
43
44 If the file server terminates *normally* (exits after receiving a SIGQUIT)
45 then we don't have to salvage it.
46
47 The needsSalvage flag is set when the file server is started. It is cleared
48 if the file server exits when fileSDW is true but fileKillSent is false,
49 indicating that it exited after receiving a quit, but before we sent it a kill.
50
51 The needsSalvage flag is cleared when the salvager exits.
52*/
53
54struct fsbnode {
55 struct bnode b;
56 afs_int32 timeSDStarted; /* time shutdown operation started */
57 char *filecmd; /* command to start primary file server */
58 char *volcmd; /* command to start secondary vol server */
59 char *salsrvcmd; /* command to start salvageserver (demand attach fs) */
60 char *salcmd; /* command to start salvager */
61 char *scancmd; /* command to start scanner (MR-AFS) */
62 struct bnode_proc *fileProc; /* process for file server */
63 struct bnode_proc *volProc; /* process for vol server */
64 struct bnode_proc *salsrvProc; /* process for salvageserver (demand attach fs) */
65 struct bnode_proc *salProc; /* process for salvager */
66 struct bnode_proc *scanProc; /* process for scanner (MR-AFS) */
67 afs_int32 lastFileStart; /* last start for file */
68 afs_int32 lastVolStart; /* last start for vol */
69 afs_int32 lastSalsrvStart; /* last start for salvageserver (demand attach fs) */
70 afs_int32 lastScanStart; /* last start for scanner (MR-AFS) */
71 char fileRunning; /* file process is running */
72 char volRunning; /* volser is running */
73 char salsrvRunning; /* salvageserver is running (demand attach fs) */
74 char salRunning; /* salvager is running */
75 char scanRunning; /* scanner is running (MR_AFS) */
76 char fileSDW; /* file shutdown wait */
77 char volSDW; /* vol shutdown wait */
78 char salsrvSDW; /* salvageserver shutdown wait (demand attach fs) */
79 char salSDW; /* waiting for the salvager to shutdown */
80 char scanSDW; /* scanner shutdown wait (MR_AFS) */
81 char fileKillSent; /* kill signal has been sent */
82 char volKillSent;
83 char salsrvKillSent; /* kill signal has been sent (demand attach fs) */
84 char salKillSent;
85 char scanKillSent; /* kill signal has been sent (MR_AFS) */
86 char needsSalvage; /* salvage before running */
87 char needsClock; /* do we need clock ticks */
88};
89
90struct bnode * fs_create(char *ainstance, char *afilecmd, char *avolcmd,
91 char *asalcmd, char *ascancmd, char *dummy);
92struct bnode * dafs_create(char *ainstance, char *afilecmd, char *avolcmd,
93 char * asalsrvcmd, char *asalcmd, char *ascancmd);
94
95static int fs_hascore(struct bnode *abnode);
96static int fs_restartp(struct bnode *abnode);
97static int fs_delete(struct bnode *abnode);
98static int fs_timeout(struct bnode *abnode);
99static int fs_getstat(struct bnode *abnode, afs_int32 * astatus);
100static int fs_setstat(struct bnode *abnode, afs_int32 astatus);
101static int fs_procstarted(struct bnode *abnode, struct bnode_proc *aproc);
102static int fs_procexit(struct bnode *abnode, struct bnode_proc *aproc);
103static int fs_getstring(struct bnode *abnode, char *abuffer, afs_int32 alen);
104static int fs_getparm(struct bnode *abnode, afs_int32 aindex,
105 char *abuffer, afs_int32 alen);
106static int dafs_getparm(struct bnode *abnode, afs_int32 aindex,
107 char *abuffer, afs_int32 alen);
108
109static int SetSalFlag(struct fsbnode *abnode, int aflag);
110static int RestoreSalFlag(struct fsbnode *abnode);
111static void SetNeedsClock(struct fsbnode *);
112static int NudgeProcs(struct fsbnode *);
113
114static char *PathToExecutable(char *cmd);
115
116struct bnode_ops fsbnode_ops = {
117 fs_create,
118 fs_timeout,
119 fs_getstat,
120 fs_setstat,
121 fs_delete,
122 fs_procexit,
123 fs_getstring,
124 fs_getparm,
125 fs_restartp,
126 fs_hascore,
127 fs_procstarted,
128};
129
130/* demand attach fs bnode ops */
131struct bnode_ops dafsbnode_ops = {
132 dafs_create,
133 fs_timeout,
134 fs_getstat,
135 fs_setstat,
136 fs_delete,
137 fs_procexit,
138 fs_getstring,
139 dafs_getparm,
140 fs_restartp,
141 fs_hascore,
142 fs_procstarted,
143};
144
145/* Quick inline function to safely convert a fsbnode to a bnode without
146 * dropping type information
147 */
148
149static_inline struct bnode *
150fsbnode2bnode(struct fsbnode *abnode) {
151 return (struct bnode *) abnode;
152}
153
154/* Function to tell whether this bnode has a core file or not. You might
155 * think that this could be in bnode.c, and decide what core files to check
156 * for based on the bnode's coreName property, but that doesn't work because
157 * there may not be an active process for a bnode that dumped core at the
158 * time the query is done.
159 */
160static int
161fs_hascore(struct bnode *abnode)
162{
163 char tbuffer[256];
164
165 /* see if file server has a core file */
166 bnode_CoreName(abnode, "file", tbuffer);
167 if (access(tbuffer, 0) == 0)
168 return 1;
169
170 /* see if volserver has a core file */
171 bnode_CoreName(abnode, "vol", tbuffer);
172 if (access(tbuffer, 0) == 0)
173 return 1;
174
175 /* see if salvageserver left a core file */
176 bnode_CoreName(abnode, "salsrv", tbuffer);
177 if (access(tbuffer, 0) == 0)
178 return 1;
179
180 /* see if salvager left a core file */
181 bnode_CoreName(abnode, "salv", tbuffer);
182 if (access(tbuffer, 0) == 0)
183 return 1;
184
185 /* see if scanner left a core file (MR-AFS) */
186 bnode_CoreName(abnode, "scan", tbuffer);
187 if (access(tbuffer, 0) == 0)
188 return 1;
189
190 /* no one left a core file */
191 return 0;
192}
193
194static int
195fs_restartp(struct bnode *bn)
196{
197 struct fsbnode *abnode = (struct fsbnode *)bn;
198 struct bnode_token *tt;
199 afs_int32 code;
200 struct stat tstat;
201
202 code = bnode_ParseLine(abnode->filecmd, &tt);
203 if (code)
204 return 0;
205 if (!tt)
206 return 0;
207 code = stat(tt->key, &tstat);
208 if (code) {
209 bnode_FreeTokens(tt);
210 return 0;
211 }
212 if (tstat.st_ctime > abnode->lastFileStart)
213 code = 1;
214 else
215 code = 0;
216 bnode_FreeTokens(tt);
217 if (code)
218 return code;
219
220 /* now do same for volcmd */
221 code = bnode_ParseLine(abnode->volcmd, &tt);
222 if (code)
223 return 0;
224 if (!tt)
225 return 0;
226 code = stat(tt->key, &tstat);
227 if (code) {
228 bnode_FreeTokens(tt);
229 return 0;
230 }
231 if (tstat.st_ctime > abnode->lastVolStart)
232 code = 1;
233 else
234 code = 0;
235 bnode_FreeTokens(tt);
236 if (code)
237 return code;
238
239 if (abnode->salsrvcmd) { /* only in demand attach fs */
240 /* now do same for salsrvcmd (demand attach fs) */
241 code = bnode_ParseLine(abnode->salsrvcmd, &tt);
242 if (code)
243 return 0;
244 if (!tt)
245 return 0;
246 code = stat(tt->key, &tstat);
247 if (code) {
248 bnode_FreeTokens(tt);
249 return 0;
250 }
251 if (tstat.st_ctime > abnode->lastSalsrvStart)
252 code = 1;
253 else
254 code = 0;
255 bnode_FreeTokens(tt);
256 }
257
258 if (abnode->scancmd) { /* Only in MR-AFS */
259 /* now do same for scancmd (MR-AFS) */
260 code = bnode_ParseLine(abnode->scancmd, &tt);
261 if (code)
262 return 0;
263 if (!tt)
264 return 0;
265 code = stat(tt->key, &tstat);
266 if (code) {
267 bnode_FreeTokens(tt);
268 return 0;
269 }
270 if (tstat.st_ctime > abnode->lastScanStart)
271 code = 1;
272 else
273 code = 0;
274 bnode_FreeTokens(tt);
275 }
276
277 return code;
278}
279
280/* set needsSalvage flag, creating file SALVAGE.<instancename> if
281 we need to salvage the file system (so we can tell over panic reboots */
282static int
283SetSalFlag(struct fsbnode *abnode, int aflag)
284{
285 char *filepath;
286 int fd;
287
288 /* don't use the salvage flag for demand attach fs */
289 if (abnode->salsrvcmd == NULL) {
290 abnode->needsSalvage = aflag;
291 if (asprintf(&filepath, "%s/%s%s", AFSDIR_SERVER_LOCAL_DIRPATH,
292 SALFILE, abnode->b.name) < 0)
293 return ENOMEM;
294 if (aflag) {
295 fd = open(filepath, O_CREAT | O_TRUNC | O_RDWR, 0666);
296 close(fd);
297 } else {
298 unlink(filepath);
299 }
300 free(filepath);
301 }
302 return 0;
303}
304
305/* set the needsSalvage flag according to the existence of the salvage file */
306static int
307RestoreSalFlag(struct fsbnode *abnode)
308{
309 char *filepath;
310
311 /* never set needs salvage flag for demand attach fs */
312 if (abnode->salsrvcmd != NULL) {
313 abnode->needsSalvage = 0;
314 } else {
315 if (asprintf(&filepath, "%s/%s%s", AFSDIR_SERVER_LOCAL_DIRPATH,
316 SALFILE, abnode->b.name) < 0)
317 return ENOMEM;
318 if (access(filepath, 0) == 0) {
319 /* file exists, so need to salvage */
320 abnode->needsSalvage = 1;
321 } else {
322 abnode->needsSalvage = 0;
323 }
324 free(filepath);
325 }
326 return 0;
327}
328
329static int
330fs_delete(struct bnode *bn)
331{
332 struct fsbnode *abnode = (struct fsbnode *)bn;
333
334 free(abnode->filecmd);
335 free(abnode->volcmd);
336 free(abnode->salcmd);
337 if (abnode->salsrvcmd)
338 free(abnode->salsrvcmd);
339 if (abnode->scancmd)
340 free(abnode->scancmd);
341 free(abnode);
342 return 0;
343}
344
345/*! PathToExecutable() - for both Unix and Windows, accept a full bnode
346 * command, including any arguments, and return only the path to the
347 * binary executable, with arguments stripped.
348 *
349 * \notes The caller will stat() the returned path.
350 *
351 * \param cmd - full bnode command string with arguments
352 *
353 * \return - string path to the binary executable, to be freed by the caller
354 */
355#ifdef AFS_NT40_ENV
356/* The Windows implementation must also ensure that an extension is
357 * specified in the path.
358 */
359
360static char *
361PathToExecutable(char *cmd)
362{
363 char cmdext[_MAX_EXT];
364 char *cmdexe, *cmdcopy, *cmdname;
365 size_t cmdend;
366
367 cmdcopy = strdup(cmd);
368 if (cmdcopy == NULL) {
369 return NULL;
370 }
371 /* strip off any arguments */
372 cmdname = strsep(&cmdcopy, " \t"); /* roken, I'm hopin' */
373 if (*cmdname == '\0') {
374 free(cmdname);
375 return NULL;
376 }
377 /* Is there an extension specified? */
378 _splitpath(cmdname, NULL, NULL, NULL, cmdext);
379 if (*cmdext == '\0') {
380 /* No, supply one. */
381 if (asprintf(&cmdexe, "%s.exe", cmdname) < 0) {
382 free(cmdname);
383 return NULL;
384 }
385 free(cmdname);
386 return cmdexe;
387 }
388 return cmdname;
389}
390#else /* AFS_NT40_ENV */
391/* Unix implementation is extension-agnostic. */
392static char *
393PathToExecutable(char *cmd)
394{
395 char *cmdcopy, *cmdname;
396 cmdcopy = strdup(cmd);
397 if (cmdcopy == NULL) {
398 return NULL;
399 }
400 cmdname = strsep(&cmdcopy, " ");
401 if (*cmdname == '\0') {
402 free(cmdname);
403 return NULL;
404 }
405 return cmdname;
406}
407#endif /* AFS_NT40_ENV */
408
409
410struct bnode *
411fs_create(char *ainstance, char *afilecmd, char *avolcmd, char *asalcmd,
412 char *ascancmd, char *dummy)
413{
414 struct stat tstat;
415 struct fsbnode *te;
416 char *cmdname = NULL;
417 char *fileCmdpath, *volCmdpath, *salCmdpath, *scanCmdpath;
418 int bailout = 0;
419
420 fileCmdpath = volCmdpath = salCmdpath = scanCmdpath = NULL;
421 te = NULL;
422
423 /* construct local paths from canonical (wire-format) paths */
424 if (ConstructLocalBinPath(afilecmd, &fileCmdpath)) {
425 bozo_Log("BNODE: command path invalid '%s'\n", afilecmd);
426 bailout = 1;
427 goto done;
428 }
429 if (ConstructLocalBinPath(avolcmd, &volCmdpath)) {
430 bozo_Log("BNODE: command path invalid '%s'\n", avolcmd);
431 bailout = 1;
432 goto done;
433 }
434 if (ConstructLocalBinPath(asalcmd, &salCmdpath)) {
435 bozo_Log("BNODE: command path invalid '%s'\n", asalcmd);
436 bailout = 1;
437 goto done;
438 }
439
440 if (ascancmd && strlen(ascancmd)) {
441 if (ConstructLocalBinPath(ascancmd, &scanCmdpath)) {
442 bozo_Log("BNODE: command path invalid '%s'\n", ascancmd);
443 bailout = 1;
444 goto done;
445 }
446 }
447
448 if (!bailout) {
449 cmdname = PathToExecutable(fileCmdpath);
450 if (cmdname == NULL) {
451 bozo_Log("Out of memory constructing binary filename\n");
452 bailout = 1;
453 goto done;
454 }
455 if (stat(cmdname, &tstat)) {
456 bozo_Log("BNODE: file server binary '%s' not found\n", cmdname);
457 bailout = 1;
458 goto done;
459 }
460 free(cmdname);
461
462 cmdname = PathToExecutable(volCmdpath);
463 if (cmdname == NULL) {
464 bozo_Log("Out of memory constructing binary filename\n");
465 bailout = 1;
466 goto done;
467 }
468 if (stat(cmdname, &tstat)) {
469 bozo_Log("BNODE: volume server binary '%s' not found\n", cmdname);
470 bailout = 1;
471 goto done;
472 }
473 free(cmdname);
474
475 cmdname = PathToExecutable(salCmdpath);
476 if (cmdname == NULL) {
477 bozo_Log("Out of memory constructing binary filename\n");
478 bailout = 1;
479 goto done;
480 }
481 if (stat(cmdname, &tstat)) {
482 bozo_Log("BNODE: salvager binary '%s' not found\n", cmdname);
483 bailout = 1;
484 goto done;
485 }
486
487 if (ascancmd && strlen(ascancmd)) {
488 free(cmdname);
489 cmdname = PathToExecutable(scanCmdpath);
490 if (cmdname == NULL) {
491 bozo_Log("Out of memory constructing binary filename\n");
492 bailout = 1;
493 goto done;
494 }
495 if (stat(cmdname, &tstat)) {
496 bozo_Log("BNODE: scanner binary '%s' not found\n", cmdname);
497 bailout = 1;
498 goto done;
499 }
500 }
501 }
502
503 te = calloc(1, sizeof(struct fsbnode));
504 if (te == NULL) {
505 bailout = 1;
506 goto done;
507 }
508 te->filecmd = fileCmdpath;
509 te->volcmd = volCmdpath;
510 te->salsrvcmd = NULL;
511 te->salcmd = salCmdpath;
512 if (ascancmd && strlen(ascancmd))
513 te->scancmd = scanCmdpath;
514 else
515 te->scancmd = NULL;
516 if (bnode_InitBnode(fsbnode2bnode(te), &fsbnode_ops, ainstance) != 0) {
517 bailout = 1;
518 goto done;
519 }
520 bnode_SetTimeout(fsbnode2bnode(te), POLLTIME);
521 /* ask for timeout activations every 20 seconds */
522 RestoreSalFlag(te); /* restore needsSalvage flag based on file's existence */
523 SetNeedsClock(te); /* compute needsClock field */
524
525 done:
526 free(cmdname);
527 if (bailout) {
528 if (te)
529 free(te);
530 if (fileCmdpath)
531 free(fileCmdpath);
532 if (volCmdpath)
533 free(volCmdpath);
534 if (salCmdpath)
535 free(salCmdpath);
536 if (scanCmdpath)
537 free(scanCmdpath);
538 return NULL;
539 }
540
541 return fsbnode2bnode(te);
542}
543
544/* create a demand attach fs bnode */
545struct bnode *
546dafs_create(char *ainstance, char *afilecmd, char *avolcmd,
547 char * asalsrvcmd, char *asalcmd, char *ascancmd)
548{
549 struct stat tstat;
550 struct fsbnode *te;
551 char *cmdname = NULL;
552 char *fileCmdpath, *volCmdpath, *salsrvCmdpath, *salCmdpath, *scanCmdpath;
553 int bailout = 0;
554
555 fileCmdpath = volCmdpath = salsrvCmdpath = salCmdpath = scanCmdpath = NULL;
556 te = NULL;
557
558 /* construct local paths from canonical (wire-format) paths */
559 if (ConstructLocalBinPath(afilecmd, &fileCmdpath)) {
560 bozo_Log("BNODE: command path invalid '%s'\n", afilecmd);
561 bailout = 1;
562 goto done;
563 }
564 if (ConstructLocalBinPath(avolcmd, &volCmdpath)) {
565 bozo_Log("BNODE: command path invalid '%s'\n", avolcmd);
566 bailout = 1;
567 goto done;
568 }
569 if (ConstructLocalBinPath(asalsrvcmd, &salsrvCmdpath)) {
570 bozo_Log("BNODE: command path invalid '%s'\n", asalsrvcmd);
571 bailout = 1;
572 goto done;
573 }
574 if (ConstructLocalBinPath(asalcmd, &salCmdpath)) {
575 bozo_Log("BNODE: command path invalid '%s'\n", asalcmd);
576 bailout = 1;
577 goto done;
578 }
579
580 if (ascancmd && strlen(ascancmd)) {
581 if (ConstructLocalBinPath(ascancmd, &scanCmdpath)) {
582 bozo_Log("BNODE: command path invalid '%s'\n", ascancmd);
583 bailout = 1;
584 goto done;
585 }
586 }
587
588 if (!bailout) {
589 cmdname = PathToExecutable(fileCmdpath);
590 if (cmdname == NULL) {
591 bozo_Log("Out of memory constructing binary filename\n");
592 bailout = 1;
593 goto done;
594 }
595 if (stat(cmdname, &tstat)) {
596 bozo_Log("BNODE: file server binary '%s' not found\n", cmdname);
597 bailout = 1;
598 goto done;
599 }
600 free(cmdname);
601
602 cmdname = PathToExecutable(volCmdpath);
603 if (cmdname == NULL) {
604 bozo_Log("Out of memory constructing binary filename\n");
605 bailout = 1;
606 goto done;
607 }
608 if (stat(cmdname, &tstat)) {
609 bozo_Log("BNODE: volume server binary '%s' not found\n", cmdname);
610 bailout = 1;
611 goto done;
612 }
613 free(cmdname);
614
615 cmdname = PathToExecutable(salsrvCmdpath);
616 if (cmdname == NULL) {
617 bozo_Log("Out of memory constructing binary filename\n");
618 bailout = 1;
619 goto done;
620 }
621 if (stat(cmdname, &tstat)) {
622 bozo_Log("BNODE: salvageserver binary '%s' not found\n", cmdname);
623 bailout = 1;
624 goto done;
625 }
626 free(cmdname);
627
628 cmdname = PathToExecutable(salCmdpath);
629 if (cmdname == NULL) {
630 bozo_Log("Out of memory constructing binary filename\n");
631 bailout = 1;
632 goto done;
633 }
634 if (stat(cmdname, &tstat)) {
635 bozo_Log("BNODE: salvager binary '%s' not found\n", cmdname);
636 bailout = 1;
637 goto done;
638 }
639
640 if (ascancmd && strlen(ascancmd)) {
641 free(cmdname);
642 cmdname = PathToExecutable(scanCmdpath);
643 if (cmdname == NULL) {
644 bozo_Log("Out of memory constructing binary filename\n");
645 bailout = 1;
646 goto done;
647 }
648 if (stat(cmdname, &tstat)) {
649 bozo_Log("BNODE: scanner binary '%s' not found\n", cmdname);
650 bailout = 1;
651 goto done;
652 }
653 }
654 }
655
656 te = calloc(1, sizeof(struct fsbnode));
657 if (te == NULL) {
658 bailout = 1;
659 goto done;
660 }
661 te->filecmd = fileCmdpath;
662 te->volcmd = volCmdpath;
663 te->salsrvcmd = salsrvCmdpath;
664 te->salcmd = salCmdpath;
665 if (ascancmd && strlen(ascancmd))
666 te->scancmd = scanCmdpath;
667 else
668 te->scancmd = NULL;
669 if (bnode_InitBnode(fsbnode2bnode(te), &dafsbnode_ops, ainstance) != 0) {
670 bailout = 1;
671 goto done;
672 }
673 bnode_SetTimeout(fsbnode2bnode(te), POLLTIME);
674 /* ask for timeout activations every 20 seconds */
675 RestoreSalFlag(te); /* restore needsSalvage flag based on file's existence */
676 SetNeedsClock(te); /* compute needsClock field */
677
678 done:
679 free(cmdname);
680 if (bailout) {
681 if (te)
682 free(te);
683 if (fileCmdpath)
684 free(fileCmdpath);
685 if (volCmdpath)
686 free(volCmdpath);
687 if (salsrvCmdpath)
688 free(salsrvCmdpath);
689 if (salCmdpath)
690 free(salCmdpath);
691 if (scanCmdpath)
692 free(scanCmdpath);
693 return NULL;
694 }
695
696 return fsbnode2bnode(te);
697}
698
699/* called to SIGKILL a process if it doesn't terminate normally */
700static int
701fs_timeout(struct bnode *bn)
702{
703 struct fsbnode *abnode = (struct fsbnode *)bn;
704
705 afs_int32 now;
706
707 now = FT_ApproxTime();
708 /* shutting down */
709 if (abnode->volSDW) {
710 if (!abnode->volKillSent && now - abnode->timeSDStarted > SDTIME) {
711 bnode_StopProc(abnode->volProc, SIGKILL);
712 abnode->volKillSent = 1;
713 bozo_Log
714 ("bos shutdown: volserver failed to shutdown within %d seconds\n",
715 SDTIME);
716 }
717 }
718 if (abnode->salSDW) {
719 if (!abnode->salKillSent && now - abnode->timeSDStarted > SDTIME) {
720 bnode_StopProc(abnode->salProc, SIGKILL);
721 abnode->salKillSent = 1;
722 bozo_Log
723 ("bos shutdown: salvager failed to shutdown within %d seconds\n",
724 SDTIME);
725 }
726 }
727 if (abnode->fileSDW) {
728 if (!abnode->fileKillSent && now - abnode->timeSDStarted > FSSDTIME) {
729 bnode_StopProc(abnode->fileProc, SIGKILL);
730 abnode->fileKillSent = 1;
731 bozo_Log
732 ("bos shutdown: fileserver failed to shutdown within %d seconds\n",
733 FSSDTIME);
734 }
735 }
736 if (abnode->salsrvSDW) {
737 if (!abnode->salsrvKillSent && now - abnode->timeSDStarted > SDTIME) {
738 bnode_StopProc(abnode->salsrvProc, SIGKILL);
739 abnode->salsrvKillSent = 1;
740 bozo_Log
741 ("bos shutdown: salvageserver failed to shutdown within %d seconds\n",
742 SDTIME);
743 }
744 }
745 if (abnode->scanSDW) {
746 if (!abnode->scanKillSent && now - abnode->timeSDStarted > SDTIME) {
747 bnode_StopProc(abnode->scanProc, SIGKILL);
748 abnode->scanKillSent = 1;
749 bozo_Log
750 ("bos shutdown: scanner failed to shutdown within %d seconds\n",
751 SDTIME);
752 }
753 }
754
755 if ((abnode->b.flags & BNODE_ERRORSTOP) && !abnode->salRunning
756 && !abnode->volRunning && !abnode->fileRunning && !abnode->scanRunning
757 && !abnode->salsrvRunning) {
758 bnode_SetStat(bn, BSTAT_NORMAL);
759 }
760 else {
761 bnode_ResetErrorCount(bn);
762 }
763
764 SetNeedsClock(abnode);
765 return 0;
766}
767
768static int
769fs_getstat(struct bnode *bn, afs_int32 * astatus)
770{
771 struct fsbnode *abnode = (struct fsbnode *) bn;
772
773 afs_int32 temp;
774 if (abnode->volSDW || abnode->fileSDW || abnode->salSDW
775 || abnode->scanSDW || abnode->salsrvSDW)
776 temp = BSTAT_SHUTTINGDOWN;
777 else if (abnode->salRunning)
778 temp = BSTAT_NORMAL;
779 else if (abnode->volRunning && abnode->fileRunning
780 && (!abnode->scancmd || abnode->scanRunning)
781 && (!abnode->salsrvcmd || abnode->salsrvRunning))
782 temp = BSTAT_NORMAL;
783 else if (!abnode->salRunning && !abnode->volRunning
784 && !abnode->fileRunning && !abnode->scanRunning
785 && !abnode->salsrvRunning)
786 temp = BSTAT_SHUTDOWN;
787 else
788 temp = BSTAT_STARTINGUP;
789 *astatus = temp;
790 return 0;
791}
792
793static int
794fs_setstat(struct bnode *abnode, afs_int32 astatus)
795{
796 return NudgeProcs((struct fsbnode *) abnode);
797}
798
799static int
800fs_procstarted(struct bnode *bn, struct bnode_proc *aproc)
801{
802 int code = 0;
803
804 if (DoPidFiles) {
805 code = bozo_CreatePidFile(bn->name, aproc->coreName, aproc->pid);
806 }
807 return code;
808}
809
810static int
811fs_procexit(struct bnode *bn, struct bnode_proc *aproc)
812{
813 struct fsbnode *abnode = (struct fsbnode *)bn;
814
815 /* process has exited */
816
817 if (DoPidFiles) {
818 bozo_DeletePidFile(bn->name, aproc->coreName);
819 }
820
821 if (aproc == abnode->volProc) {
822 abnode->volProc = 0;
823 abnode->volRunning = 0;
824 abnode->volSDW = 0;
825 abnode->volKillSent = 0;
826 } else if (aproc == abnode->fileProc) {
827 /* if we were expecting a shutdown and we didn't send a kill signal
828 * and exited (didn't have a signal termination), then we assume that
829 * the file server exited after putting the appropriate volumes safely
830 * offline, and don't salvage next time.
831 */
832 if (abnode->fileSDW && !abnode->fileKillSent
833 && aproc->lastSignal == 0)
834 SetSalFlag(abnode, 0); /* shut down normally */
835 abnode->fileProc = 0;
836 abnode->fileRunning = 0;
837 abnode->fileSDW = 0;
838 abnode->fileKillSent = 0;
839 } else if (aproc == abnode->salProc) {
840 /* if we didn't shutdown the salvager, then assume it exited ok, and thus
841 * that we don't have to salvage again */
842 if (!abnode->salSDW)
843 SetSalFlag(abnode, 0); /* salvage just completed */
844 abnode->salProc = 0;
845 abnode->salRunning = 0;
846 abnode->salSDW = 0;
847 abnode->salKillSent = 0;
848 } else if (aproc == abnode->scanProc) {
849 abnode->scanProc = 0;
850 abnode->scanRunning = 0;
851 abnode->scanSDW = 0;
852 abnode->scanKillSent = 0;
853 } else if (aproc == abnode->salsrvProc) {
854 abnode->salsrvProc = 0;
855 abnode->salsrvRunning = 0;
856 abnode->salsrvSDW = 0;
857 abnode->salsrvKillSent = 0;
858 }
859
860 /* now restart anyone who needs to restart */
861 return NudgeProcs(abnode);
862}
863
864/* make sure we're periodically checking the state if we need to */
865static void
866SetNeedsClock(struct fsbnode *ab)
867{
868 afs_int32 timeout = POLLTIME;
869
870 if ((ab->fileSDW && !ab->fileKillSent) || (ab->volSDW && !ab->volKillSent)
871 || (ab->scanSDW && !ab->scanKillSent) || (ab->salSDW && !ab->salKillSent)
872 || (ab->salsrvSDW && !ab->salsrvKillSent)) {
873 /* SIGQUIT sent, will send SIGKILL if process does not exit */
874 ab->needsClock = 1;
875 } else if (ab->b.goal == 1 && ab->fileRunning && ab->volRunning
876 && (!ab->scancmd || ab->scanRunning)
877 && (!ab->salsrvcmd || ab->salsrvRunning)) {
878 if (ab->b.errorStopCount) {
879 /* reset error count after running for a bit */
880 ab->needsClock = 1;
881 } else {
882 ab->needsClock = 0; /* running normally */
883 }
884 } else if ((ab->b.goal == 0) && !ab->fileRunning && !ab->volRunning
885 && !ab->salRunning && !ab->scanRunning && !ab->salsrvRunning) {
886 if (ab->b.flags & BNODE_ERRORSTOP && ab->b.errorStopDelay) {
887 bozo_Log("%s will retry start in %d seconds\n", ab->b.name,
888 ab->b.errorStopDelay);
889 ab->needsClock = 1; /* halted for errors, retry later */
890 timeout = ab->b.errorStopDelay;
891 } else {
892 ab->needsClock = 0; /* halted normally */
893 }
894 } else
895 ab->needsClock = 1; /* other */
896
897 if (ab->needsClock && (!bnode_PendingTimeout(fsbnode2bnode(ab))
898 || ab->b.period != timeout))
899 bnode_SetTimeout(fsbnode2bnode(ab), timeout);
900 if (!ab->needsClock)
901 bnode_SetTimeout(fsbnode2bnode(ab), 0);
902}
903
904static int
905NudgeProcs(struct fsbnode *abnode)
906{
907 struct bnode_proc *tp; /* not register */
908 afs_int32 code;
909 afs_int32 now;
910
911 now = FT_ApproxTime();
912 if (abnode->b.goal == 1) {
913 /* we're trying to run the system. If the file server is running, then we
914 * are trying to start up the system. If it is not running, then needsSalvage
915 * tells us if we need to run the salvager or not */
916 if (abnode->fileRunning) {
917 if (abnode->salRunning) {
918 bozo_Log("Salvager running along with file server!\n");
919 bozo_Log("Emergency shutdown\n");
920 emergency = 1;
921 bnode_SetGoal(fsbnode2bnode(abnode), BSTAT_SHUTDOWN);
922 bnode_StopProc(abnode->salProc, SIGKILL);
923 SetNeedsClock(abnode);
924 return -1;
925 }
926 if (!abnode->volRunning) {
927 abnode->lastVolStart = FT_ApproxTime();
928 code = bnode_NewProc(fsbnode2bnode(abnode), abnode->volcmd, "vol", &tp);
929 if (code == 0) {
930 abnode->volProc = tp;
931 abnode->volRunning = 1;
932 }
933 }
934 if (abnode->salsrvcmd) {
935 if (!abnode->salsrvRunning) {
936 abnode->lastSalsrvStart = FT_ApproxTime();
937 code =
938 bnode_NewProc(fsbnode2bnode(abnode), abnode->salsrvcmd, "salsrv",
939 &tp);
940 if (code == 0) {
941 abnode->salsrvProc = tp;
942 abnode->salsrvRunning = 1;
943 }
944 }
945 }
946 if (abnode->scancmd) {
947 if (!abnode->scanRunning) {
948 abnode->lastScanStart = FT_ApproxTime();
949 code =
950 bnode_NewProc(fsbnode2bnode(abnode), abnode->scancmd, "scanner",
951 &tp);
952 if (code == 0) {
953 abnode->scanProc = tp;
954 abnode->scanRunning = 1;
955 }
956 }
957 }
958 } else { /* file is not running */
959 /* see how to start */
960 /* for demand attach fs, needsSalvage flag is ignored */
961 if (!abnode->needsSalvage || abnode->salsrvcmd) {
962 /* no crash apparent, just start up normally */
963 if (!abnode->fileRunning) {
964 abnode->lastFileStart = FT_ApproxTime();
965 code =
966 bnode_NewProc(fsbnode2bnode(abnode), abnode->filecmd, "file", &tp);
967 if (code == 0) {
968 abnode->fileProc = tp;
969 abnode->fileRunning = 1;
970 SetSalFlag(abnode, 1);
971 }
972 }
973 if (!abnode->volRunning) {
974 abnode->lastVolStart = FT_ApproxTime();
975 code = bnode_NewProc(fsbnode2bnode(abnode), abnode->volcmd, "vol", &tp);
976 if (code == 0) {
977 abnode->volProc = tp;
978 abnode->volRunning = 1;
979 }
980 }
981 if (abnode->salsrvcmd && !abnode->salsrvRunning) {
982 abnode->lastSalsrvStart = FT_ApproxTime();
983 code =
984 bnode_NewProc(fsbnode2bnode(abnode), abnode->salsrvcmd, "salsrv",
985 &tp);
986 if (code == 0) {
987 abnode->salsrvProc = tp;
988 abnode->salsrvRunning = 1;
989 }
990 }
991 if (abnode->scancmd && !abnode->scanRunning) {
992 abnode->lastScanStart = FT_ApproxTime();
993 code =
994 bnode_NewProc(fsbnode2bnode(abnode), abnode->scancmd, "scanner",
995 &tp);
996 if (code == 0) {
997 abnode->scanProc = tp;
998 abnode->scanRunning = 1;
999 }
1000 }
1001 } else { /* needs to be salvaged */
1002 /* make sure file server and volser are gone */
1003 if (abnode->volRunning) {
1004 bnode_StopProc(abnode->volProc, SIGTERM);
1005 if (!abnode->volSDW)
1006 abnode->timeSDStarted = now;
1007 abnode->volSDW = 1;
1008 }
1009 if (abnode->fileRunning) {
1010 bnode_StopProc(abnode->fileProc, SIGQUIT);
1011 if (!abnode->fileSDW)
1012 abnode->timeSDStarted = now;
1013 abnode->fileSDW = 1;
1014 }
1015 if (abnode->scanRunning) {
1016 bnode_StopProc(abnode->scanProc, SIGTERM);
1017 if (!abnode->scanSDW)
1018 abnode->timeSDStarted = now;
1019 abnode->scanSDW = 1;
1020 }
1021 if (abnode->volRunning || abnode->fileRunning
1022 || abnode->scanRunning)
1023 return 0;
1024 /* otherwise, it is safe to start salvager */
1025 if (!abnode->salRunning) {
1026 code = bnode_NewProc(fsbnode2bnode(abnode), abnode->salcmd, "salv", &tp);
1027 if (code == 0) {
1028 abnode->salProc = tp;
1029 abnode->salRunning = 1;
1030 }
1031 }
1032 }
1033 }
1034 } else { /* goal is 0, we're shutting down */
1035 /* trying to shutdown */
1036 if (abnode->salRunning && !abnode->salSDW) {
1037 bnode_StopProc(abnode->salProc, SIGTERM);
1038 abnode->salSDW = 1;
1039 abnode->timeSDStarted = now;
1040 }
1041 if (abnode->fileRunning && !abnode->fileSDW) {
1042 bnode_StopProc(abnode->fileProc, SIGQUIT);
1043 abnode->fileSDW = 1;
1044 abnode->timeSDStarted = now;
1045 }
1046 if (abnode->volRunning && !abnode->volSDW) {
1047 bnode_StopProc(abnode->volProc, SIGTERM);
1048 abnode->volSDW = 1;
1049 abnode->timeSDStarted = now;
1050 }
1051 if (abnode->salsrvRunning && !abnode->salsrvSDW) {
1052 bnode_StopProc(abnode->salsrvProc, SIGTERM);
1053 abnode->salsrvSDW = 1;
1054 abnode->timeSDStarted = now;
1055 }
1056 if (abnode->scanRunning && !abnode->scanSDW) {
1057 bnode_StopProc(abnode->scanProc, SIGTERM);
1058 abnode->scanSDW = 1;
1059 abnode->timeSDStarted = now;
1060 }
1061 }
1062 SetNeedsClock(abnode);
1063 return 0;
1064}
1065
1066static int
1067fs_getstring(struct bnode *bn, char *abuffer, afs_int32 alen)
1068{
1069 struct fsbnode *abnode = (struct fsbnode *)bn;
1070
1071 if (alen < 40)
1072 return -1;
1073 if (abnode->b.goal == 1) {
1074 if (abnode->fileRunning) {
1075 if (abnode->fileSDW)
1076 strcpy(abuffer, "file server shutting down");
1077 else if (abnode->scancmd) {
1078 if (!abnode->volRunning && !abnode->scanRunning)
1079 strcpy(abuffer,
1080 "file server up; volser and scanner down");
1081 else if (abnode->volRunning && !abnode->scanRunning)
1082 strcpy(abuffer,
1083 "file server up; volser up; scanner down");
1084 else if (!abnode->volRunning && abnode->scanRunning)
1085 strcpy(abuffer,
1086 "file server up; volser down; scanner up");
1087
1088 else
1089 strcpy(abuffer, "file server running");
1090 } else if (!abnode->volRunning)
1091 strcpy(abuffer, "file server up; volser down");
1092 else
1093 strcpy(abuffer, "file server running");
1094 } else if (abnode->salRunning) {
1095 strcpy(abuffer, "salvaging file system");
1096 } else
1097 strcpy(abuffer, "starting file server");
1098 } else {
1099 /* shutting down */
1100 if (abnode->fileRunning || abnode->volRunning || abnode->scanRunning) {
1101 strcpy(abuffer, "file server shutting down");
1102 } else if (abnode->salRunning)
1103 strcpy(abuffer, "salvager shutting down");
1104 else
1105 strcpy(abuffer, "file server shut down");
1106 }
1107 return 0;
1108}
1109
1110static int
1111fs_getparm(struct bnode *bn, afs_int32 aindex, char *abuffer,
1112 afs_int32 alen)
1113{
1114 struct fsbnode *abnode = (struct fsbnode *)bn;
1115
1116 if (aindex == 0)
1117 strcpy(abuffer, abnode->filecmd);
1118 else if (aindex == 1)
1119 strcpy(abuffer, abnode->volcmd);
1120 else if (aindex == 2)
1121 strcpy(abuffer, abnode->salcmd);
1122 else if (aindex == 3 && abnode->scancmd)
1123 strcpy(abuffer, abnode->scancmd);
1124 else
1125 return BZDOM;
1126 return 0;
1127}
1128
1129static int
1130dafs_getparm(struct bnode *bn, afs_int32 aindex, char *abuffer,
1131 afs_int32 alen)
1132{
1133 struct fsbnode *abnode = (struct fsbnode *)bn;
1134
1135 if (aindex == 0)
1136 strcpy(abuffer, abnode->filecmd);
1137 else if (aindex == 1)
1138 strcpy(abuffer, abnode->volcmd);
1139 else if (aindex == 2)
1140 strcpy(abuffer, abnode->salsrvcmd);
1141 else if (aindex == 3)
1142 strcpy(abuffer, abnode->salcmd);
1143 else if (aindex == 4 && abnode->scancmd)
1144 strcpy(abuffer, abnode->scancmd);
1145 else
1146 return BZDOM;
1147 return 0;
1148}