Import Upstream version 1.8.5
[hcoop/debian/openafs.git] / src / bozo / bnode.c
CommitLineData
805e021f
CE
1/*
2 * Copyright 2000, International Business Machines Corporation and others.
3 * All Rights Reserved.
4 *
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
8 */
9
10#include <afsconfig.h>
11#include <afs/param.h>
12
13#include <afs/procmgmt.h>
14#include <roken.h>
15
16#include <stddef.h>
17
18#include <lwp.h>
19#include <rx/rx.h>
20#include <afs/audit.h>
21#include <afs/afsutil.h>
22#include <afs/fileutil.h>
23#include <opr/queue.h>
24
25#include "bnode.h"
26#include "bnode_internal.h"
27#include "bosprototypes.h"
28
29#ifndef WCOREDUMP
30#define WCOREDUMP(x) ((x) & 0200)
31#endif
32
33#define BNODE_LWP_STACKSIZE (16 * 1024)
34#define BNODE_ERROR_COUNT_MAX 16 /* maximum number of retries */
35#define BNODE_ERROR_DELAY_MAX 60 /* maximum retry delay (seconds) */
36
37static PROCESS bproc_pid; /* pid of waker-upper */
38static struct opr_queue allBnodes; /**< List of all bnodes */
39static struct opr_queue allProcs; /**< List of all processes for which we're waiting */
40static struct opr_queue allTypes; /**< List of all registered type handlers */
41
42static struct bnode_stats {
43 int weirdPids;
44} bnode_stats;
45
46extern const char *DoCore;
47extern const char *DoPidFiles;
48#ifndef AFS_NT40_ENV
49extern char **environ; /* env structure */
50#endif
51
52int hdl_notifier(struct bnode_proc *tp);
53
54/* Remember the name of the process, if any, that failed last */
55static void
56RememberProcName(struct bnode_proc *ap)
57{
58 struct bnode *tbnodep;
59
60 tbnodep = ap->bnode;
61 if (tbnodep->lastErrorName) {
62 free(tbnodep->lastErrorName);
63 tbnodep->lastErrorName = NULL;
64 }
65 if (ap->coreName)
66 tbnodep->lastErrorName = strdup(ap->coreName);
67}
68
69/* utility for use by BOP_HASCORE functions to determine where a core file might
70 * be stored.
71 */
72int
73bnode_CoreName(struct bnode *abnode, char *acoreName, char *abuffer)
74{
75 if (DoCore) {
76 strcpy(abuffer, DoCore);
77 strcat(abuffer, "/");
78 strcat(abuffer, AFSDIR_CORE_FILE);
79 } else
80 strcpy(abuffer, AFSDIR_SERVER_CORELOG_FILEPATH);
81 if (acoreName) {
82 strcat(abuffer, acoreName);
83 strcat(abuffer, ".");
84 }
85 strcat(abuffer, abnode->name);
86 return 0;
87}
88
89/* save core file, if any */
90static void
91SaveCore(struct bnode *abnode, struct bnode_proc
92 *aproc)
93{
94 char tbuffer[256];
95 struct stat tstat;
96 afs_int32 code = 0;
97 char *corefile = NULL;
98#ifdef BOZO_SAVE_CORES
99 struct timeval Start;
100 struct tm *TimeFields;
101 char FileName[256];
102#endif
103
104 /* Linux always appends the PID to core dumps from threaded processes, so
105 * we have to scan the directory to find core files under another name. */
106 if (DoCore) {
107 strcpy(tbuffer, DoCore);
108 strcat(tbuffer, "/");
109 strcat(tbuffer, AFSDIR_CORE_FILE);
110 } else
111 code = stat(AFSDIR_SERVER_CORELOG_FILEPATH, &tstat);
112 if (code) {
113 DIR *logdir;
114 struct dirent *file;
115 unsigned long pid;
116 const char *coredir = AFSDIR_LOGS_DIR;
117
118 if (DoCore)
119 coredir = DoCore;
120
121 logdir = opendir(coredir);
122 if (logdir == NULL)
123 return;
124 while ((file = readdir(logdir)) != NULL) {
125 if (strncmp(file->d_name, "core.", 5) != 0)
126 continue;
127 pid = atol(file->d_name + 5);
128 if (pid == aproc->pid) {
129 int r;
130
131 r = asprintf(&corefile, "%s/%s", coredir, file->d_name);
132 if (r < 0 || corefile == NULL) {
133 closedir(logdir);
134 return;
135 }
136 code = 0;
137 break;
138 }
139 }
140 closedir(logdir);
141 } else {
142 corefile = strdup(tbuffer);
143 }
144 if (code)
145 return;
146
147 bnode_CoreName(abnode, aproc->coreName, tbuffer);
148#ifdef BOZO_SAVE_CORES
149 FT_GetTimeOfDay(&Start, 0);
150 TimeFields = localtime(&Start.tv_sec);
151 sprintf(FileName, "%s.%d%02d%02d%02d%02d%02d", tbuffer,
152 TimeFields->tm_year + 1900, TimeFields->tm_mon + 1, TimeFields->tm_mday,
153 TimeFields->tm_hour, TimeFields->tm_min, TimeFields->tm_sec);
154 strcpy(tbuffer, FileName);
155#endif
156 rk_rename(corefile, tbuffer);
157 free(corefile);
158}
159
160int
161bnode_GetString(struct bnode *abnode, char *abuffer,
162 afs_int32 alen)
163{
164 return BOP_GETSTRING(abnode, abuffer, alen);
165}
166
167int
168bnode_GetParm(struct bnode *abnode, afs_int32 aindex,
169 char *abuffer, afs_int32 alen)
170{
171 return BOP_GETPARM(abnode, aindex, abuffer, alen);
172}
173
174int
175bnode_GetStat(struct bnode *abnode, afs_int32 * astatus)
176{
177 return BOP_GETSTAT(abnode, astatus);
178}
179
180int
181bnode_RestartP(struct bnode *abnode)
182{
183 return BOP_RESTARTP(abnode);
184}
185
186static int
187bnode_Check(struct bnode *abnode)
188{
189 if (abnode->flags & BNODE_WAIT) {
190 abnode->flags &= ~BNODE_WAIT;
191 LWP_NoYieldSignal(abnode);
192 }
193 return 0;
194}
195
196/* tell if an instance has a core file */
197int
198bnode_HasCore(struct bnode *abnode)
199{
200 return BOP_HASCORE(abnode);
201}
202
203/* wait for all bnodes to stabilize */
204int
205bnode_WaitAll(void)
206{
207 struct opr_queue *cursor;
208 afs_int32 code;
209 afs_int32 stat;
210
211 retry:
212 for (opr_queue_Scan(&allBnodes, cursor)) {
213 struct bnode *tb = opr_queue_Entry(cursor, struct bnode, q);
214
215 bnode_Hold(tb);
216 code = BOP_GETSTAT(tb, &stat);
217 if (code) {
218 bnode_Release(tb);
219 return code;
220 }
221 if (stat != tb->goal) {
222 tb->flags |= BNODE_WAIT;
223 LWP_WaitProcess(tb);
224 bnode_Release(tb);
225 goto retry;
226 }
227 bnode_Release(tb);
228 }
229 return 0;
230}
231
232/* wait until bnode status is correct */
233int
234bnode_WaitStatus(struct bnode *abnode, int astatus)
235{
236 afs_int32 code;
237 afs_int32 stat;
238
239 bnode_Hold(abnode);
240 while (1) {
241 /* get the status */
242 code = BOP_GETSTAT(abnode, &stat);
243 if (code)
244 return code;
245
246 /* otherwise, check if we're done */
247 if (stat == astatus) {
248 bnode_Release(abnode);
249 return 0; /* done */
250 }
251 if (astatus != abnode->goal) {
252 bnode_Release(abnode);
253 return -1; /* no longer our goal, don't keep waiting */
254 }
255 /* otherwise, block */
256 abnode->flags |= BNODE_WAIT;
257 LWP_WaitProcess(abnode);
258 }
259}
260
261int
262bnode_ResetErrorCount(struct bnode *abnode)
263{
264 abnode->errorStopCount = 0;
265 abnode->errorStopDelay = 0;
266 return 0;
267}
268
269int
270bnode_SetStat(struct bnode *abnode, int agoal)
271{
272 abnode->goal = agoal;
273 bnode_Check(abnode);
274 BOP_SETSTAT(abnode, agoal);
275 abnode->flags &= ~BNODE_ERRORSTOP;
276 return 0;
277}
278
279int
280bnode_SetGoal(struct bnode *abnode, int agoal)
281{
282 abnode->goal = agoal;
283 bnode_Check(abnode);
284 return 0;
285}
286
287int
288bnode_SetFileGoal(struct bnode *abnode, int agoal)
289{
290 if (abnode->fileGoal == agoal)
291 return 0; /* already done */
292 abnode->fileGoal = agoal;
293 WriteBozoFile(0);
294 return 0;
295}
296
297/* apply a function to all bnodes in the system */
298int
299bnode_ApplyInstance(int (*aproc) (struct bnode *tb, void *), void *arock)
300{
301 struct opr_queue *cursor, *store;
302 afs_int32 code;
303
304 for (opr_queue_ScanSafe(&allBnodes, cursor, store)) {
305 struct bnode *tb = opr_queue_Entry(cursor, struct bnode, q);
306 code = (*aproc) (tb, arock);
307 if (code)
308 return code;
309 }
310 return 0;
311}
312
313struct bnode *
314bnode_FindInstance(char *aname)
315{
316 struct opr_queue *cursor;
317
318 for (opr_queue_Scan(&allBnodes, cursor)) {
319 struct bnode *tb = opr_queue_Entry(cursor, struct bnode, q);
320
321 if (!strcmp(tb->name, aname))
322 return tb;
323 }
324 return NULL;
325}
326
327static struct bnode_type *
328FindType(char *aname)
329{
330 struct opr_queue *cursor;
331
332 for (opr_queue_Scan(&allTypes, cursor)) {
333 struct bnode_type *tt = opr_queue_Entry(cursor, struct bnode_type, q);
334
335 if (!strcmp(tt->name, aname))
336 return tt;
337 }
338 return NULL;
339}
340
341int
342bnode_Register(char *atype, struct bnode_ops *aprocs, int anparms)
343{
344 struct opr_queue *cursor;
345 struct bnode_type *tt = NULL;
346
347 for (opr_queue_Scan(&allTypes, cursor), tt = NULL) {
348 tt = opr_queue_Entry(cursor, struct bnode_type, q);
349 if (!strcmp(tt->name, atype))
350 break;
351 }
352 if (!tt) {
353 tt = calloc(1, sizeof(struct bnode_type));
354 opr_queue_Init(&tt->q);
355 opr_queue_Prepend(&allTypes, &tt->q);
356 tt->name = atype;
357 }
358 tt->ops = aprocs;
359 return 0;
360}
361
362afs_int32
363bnode_Create(char *atype, char *ainstance, struct bnode ** abp, char *ap1,
364 char *ap2, char *ap3, char *ap4, char *ap5, char *notifier,
365 int fileGoal, int rewritefile)
366{
367 struct bnode_type *type;
368 struct bnode *tb;
369 char *notifierpath = NULL;
370 struct stat tstat;
371
372 if (bnode_FindInstance(ainstance))
373 return BZEXISTS;
374 type = FindType(atype);
375 if (!type)
376 return BZBADTYPE;
377
378 if (notifier && strcmp(notifier, NONOTIFIER)) {
379 /* construct local path from canonical (wire-format) path */
380 if (ConstructLocalBinPath(notifier, &notifierpath)) {
381 bozo_Log("BNODE-Create: Notifier program path invalid '%s'\n",
382 notifier);
383 return BZNOCREATE;
384 }
385
386 if (stat(notifierpath, &tstat)) {
387 bozo_Log("BNODE-Create: Notifier program '%s' not found\n",
388 notifierpath);
389 free(notifierpath);
390 return BZNOCREATE;
391 }
392 }
393 tb = (*type->ops->create) (ainstance, ap1, ap2, ap3, ap4, ap5);
394 if (!tb) {
395 free(notifierpath);
396 return BZNOCREATE;
397 }
398 tb->notifier = notifierpath;
399 *abp = tb;
400 tb->type = type;
401
402 /* The fs_create above calls bnode_InitBnode() which always sets the
403 ** fileGoal to BSTAT_NORMAL .... overwrite it with whatever is passed into
404 ** this function as a parameter... */
405 tb->fileGoal = fileGoal;
406
407 bnode_SetStat(tb, tb->goal); /* nudge it once */
408
409 if (rewritefile != 0)
410 WriteBozoFile(0);
411
412 return 0;
413}
414
415int
416bnode_DeleteName(char *ainstance)
417{
418 struct bnode *tb;
419
420 tb = bnode_FindInstance(ainstance);
421 if (!tb)
422 return BZNOENT;
423
424 return bnode_Delete(tb);
425}
426
427int
428bnode_Hold(struct bnode *abnode)
429{
430 abnode->refCount++;
431 return 0;
432}
433
434int
435bnode_Release(struct bnode *abnode)
436{
437 abnode->refCount--;
438 if (abnode->refCount == 0 && abnode->flags & BNODE_DELETE) {
439 abnode->flags &= ~BNODE_DELETE; /* we're going for it */
440 bnode_Delete(abnode);
441 }
442 return 0;
443}
444
445int
446bnode_Delete(struct bnode *abnode)
447{
448 afs_int32 code;
449 afs_int32 temp;
450
451 if (abnode->refCount != 0) {
452 abnode->flags |= BNODE_DELETE;
453 return 0;
454 }
455
456 /* make sure the bnode is idle before zapping */
457 bnode_Hold(abnode);
458 code = BOP_GETSTAT(abnode, &temp);
459 bnode_Release(abnode);
460 if (code)
461 return code;
462 if (temp != BSTAT_SHUTDOWN)
463 return BZBUSY;
464
465 /* all clear to zap */
466 opr_queue_Remove(&abnode->q);
467 free(abnode->name); /* do this first, since bnode fields may be bad after BOP_DELETE */
468 code = BOP_DELETE(abnode); /* don't play games like holding over this one */
469 WriteBozoFile(0);
470 return code;
471}
472
473/* function to tell if there's a timeout coming up */
474int
475bnode_PendingTimeout(struct bnode *abnode)
476{
477 return (abnode->flags & BNODE_NEEDTIMEOUT);
478}
479
480/* function called to set / clear periodic bnode wakeup times */
481int
482bnode_SetTimeout(struct bnode *abnode, afs_int32 atimeout)
483{
484 if (atimeout != 0) {
485 abnode->nextTimeout = FT_ApproxTime() + atimeout;
486 abnode->flags |= BNODE_NEEDTIMEOUT;
487 abnode->period = atimeout;
488 IOMGR_Cancel(bproc_pid);
489 } else {
490 abnode->flags &= ~BNODE_NEEDTIMEOUT;
491 }
492 return 0;
493}
494
495/* used by new bnode creation code to format bnode header */
496int
497bnode_InitBnode(struct bnode *abnode, struct bnode_ops *abnodeops,
498 char *aname)
499{
500 /* format the bnode properly */
501 memset(abnode, 0, sizeof(struct bnode));
502 opr_queue_Init(&abnode->q);
503 abnode->ops = abnodeops;
504 abnode->name = strdup(aname);
505 if (!abnode->name)
506 return ENOMEM;
507 abnode->flags = BNODE_ACTIVE;
508 abnode->fileGoal = BSTAT_NORMAL;
509 abnode->goal = BSTAT_SHUTDOWN;
510
511 /* put the bnode at the end of the list so we write bnode file in same order */
512 opr_queue_Append(&allBnodes, &abnode->q);
513
514 return 0;
515}
516
517/* bnode lwp executes this code repeatedly */
518static void *
519bproc(void *unused)
520{
521 afs_int32 code;
522 struct bnode *tb;
523 afs_int32 temp;
524 struct opr_queue *cursor, *store;
525 struct bnode_proc *tp;
526 int options; /* must not be register */
527 struct timeval tv;
528 int setAny;
529 int status;
530
531 while (1) {
532 /* first figure out how long to sleep for */
533 temp = 0x7fffffff; /* afs_int32 time; maxint doesn't work in select */
534 setAny = 0;
535 for (opr_queue_Scan(&allBnodes, cursor)) {
536 tb = opr_queue_Entry(cursor, struct bnode, q);
537 if (tb->flags & BNODE_NEEDTIMEOUT) {
538 if (tb->nextTimeout < temp) {
539 setAny = 1;
540 temp = tb->nextTimeout;
541 }
542 }
543 }
544 /* now temp has the time at which we should wakeup next */
545
546 /* sleep */
547 if (setAny)
548 temp -= FT_ApproxTime(); /* how many seconds until next event */
549 else
550 temp = 999999;
551 if (temp > 0) {
552 tv.tv_sec = temp;
553 tv.tv_usec = 0;
554 code = IOMGR_Select(0, 0, 0, 0, &tv);
555 } else
556 code = 0; /* fake timeout code */
557
558 /* figure out why we woke up; child exit or timeouts */
559 FT_GetTimeOfDay(&tv, 0); /* must do the real gettimeofday once and a while */
560 temp = tv.tv_sec;
561
562 /* check all bnodes to see which ones need timeout events */
563 for (opr_queue_ScanSafe(&allBnodes, cursor, store)) {
564 tb = opr_queue_Entry(cursor, struct bnode, q);
565 if ((tb->flags & BNODE_NEEDTIMEOUT) && temp > tb->nextTimeout) {
566 bnode_Hold(tb);
567 BOP_TIMEOUT(tb);
568 bnode_Check(tb);
569 if (tb->flags & BNODE_NEEDTIMEOUT) { /* check again, BOP_TIMEOUT could change */
570 tb->nextTimeout = FT_ApproxTime() + tb->period;
571 }
572 bnode_Release(tb); /* delete may occur here */
573 }
574 }
575
576 if (code < 0) {
577 /* signalled, probably by incoming signal */
578 while (1) {
579 options = WNOHANG;
580 code = waitpid((pid_t) - 1, &status, options);
581 if (code == 0 || code == -1)
582 break; /* all done */
583 /* otherwise code has a process id, which we now search for */
584 for (tp = NULL, opr_queue_Scan(&allProcs, cursor), tp = NULL) {
585 tp = opr_queue_Entry(cursor, struct bnode_proc, q);
586
587 if (tp->pid == code)
588 break;
589 }
590 if (tp) {
591 /* found the pid */
592 tb = tp->bnode;
593 bnode_Hold(tb);
594
595 /* count restarts in last 30 seconds */
596 if (temp > tb->rsTime + 30) {
597 /* it's been 30 seconds we've been counting */
598 tb->rsTime = temp;
599 tb->rsCount = 0;
600 }
601
602
603 if (WIFSIGNALED(status) == 0) {
604 /* exited, not signalled */
605 tp->lastExit = WEXITSTATUS(status);
606 tp->lastSignal = 0;
607 if (tp->lastExit) {
608 tb->errorCode = tp->lastExit;
609 tb->lastErrorExit = FT_ApproxTime();
610 RememberProcName(tp);
611 tb->errorSignal = 0;
612 }
613 if (tp->coreName)
614 bozo_Log("%s:%s exited with code %d\n", tb->name,
615 tp->coreName, tp->lastExit);
616 else
617 bozo_Log("%s exited with code %d\n", tb->name,
618 tp->lastExit);
619 } else {
620 /* Signal occurred, perhaps spurious due to shutdown request.
621 * If due to a shutdown request, don't overwrite last error
622 * information.
623 */
624 tp->lastSignal = WTERMSIG(status);
625 tp->lastExit = 0;
626 if (tp->lastSignal != SIGQUIT
627 && tp->lastSignal != SIGTERM
628 && tp->lastSignal != SIGKILL) {
629 tb->errorSignal = tp->lastSignal;
630 tb->lastErrorExit = FT_ApproxTime();
631 RememberProcName(tp);
632 }
633 if (tp->coreName)
634 bozo_Log("%s:%s exited on signal %d%s\n",
635 tb->name, tp->coreName, tp->lastSignal,
636 WCOREDUMP(status) ? " (core dumped)" :
637 "");
638 else
639 bozo_Log("%s exited on signal %d%s\n", tb->name,
640 tp->lastSignal,
641 WCOREDUMP(status) ? " (core dumped)" :
642 "");
643 SaveCore(tb, tp);
644 }
645 tb->lastAnyExit = FT_ApproxTime();
646
647 if (tb->notifier) {
648 bozo_Log("BNODE: Notifier %s will be called\n",
649 tb->notifier);
650 hdl_notifier(tp);
651 }
652
653 if (tb->goal && tb->rsCount++ > 10) {
654 /* 10 in 30 seconds */
655 if (tb->errorStopCount >= BNODE_ERROR_COUNT_MAX) {
656 tb->errorStopDelay = 0; /* max reached, give up. */
657 } else {
658 tb->errorStopCount++;
659 if (!tb->errorStopDelay) {
660 tb->errorStopDelay = 1; /* wait a second, then retry */
661 } else {
662 tb->errorStopDelay *= 2; /* ramp up the retry delays */
663 }
664 if (tb->errorStopDelay > BNODE_ERROR_DELAY_MAX) {
665 tb->errorStopDelay = BNODE_ERROR_DELAY_MAX; /* cap the delay */
666 }
667 }
668 tb->flags |= BNODE_ERRORSTOP;
669 bnode_SetGoal(tb, BSTAT_SHUTDOWN);
670 bozo_Log
671 ("BNODE '%s' repeatedly failed to start, perhaps missing executable.\n",
672 tb->name);
673 }
674 BOP_PROCEXIT(tb, tp);
675 bnode_Check(tb);
676 bnode_Release(tb); /* bnode delete can happen here */
677 opr_queue_Remove(&tp->q);
678 free(tp);
679 } else
680 bnode_stats.weirdPids++;
681 }
682 }
683 }
684 AFS_UNREACHED(return(NULL));
685}
686
687static afs_int32
688SendNotifierData(int fd, struct bnode_proc *tp)
689{
690 struct bnode *tb = tp->bnode;
691 char buffer[1000], *bufp = buffer, *buf1;
692 int len;
693
694 /*
695 * First sent out the bnode_proc struct
696 */
697 (void)sprintf(bufp, "BEGIN bnode_proc\n");
698 bufp += strlen(bufp);
699 (void)sprintf(bufp, "comLine: %s\n", tp->comLine);
700 bufp += strlen(bufp);
701 if (!(buf1 = tp->coreName))
702 buf1 = "(null)";
703 (void)sprintf(bufp, "coreName: %s\n", buf1);
704 bufp += strlen(bufp);
705 (void)sprintf(bufp, "pid: %ld\n", afs_printable_int32_ld(tp->pid));
706 bufp += strlen(bufp);
707 (void)sprintf(bufp, "lastExit: %ld\n", afs_printable_int32_ld(tp->lastExit));
708 bufp += strlen(bufp);
709#ifdef notdef
710 (void)sprintf(bufp, "lastSignal: %ld\n", afs_printable_int32_ld(tp->lastSignal));
711 bufp += strlen(bufp);
712#endif
713 (void)sprintf(bufp, "flags: %ld\n", afs_printable_int32_ld(tp->flags));
714 bufp += strlen(bufp);
715 (void)sprintf(bufp, "END bnode_proc\n");
716 bufp += strlen(bufp);
717 len = (int)(bufp - buffer);
718 if (write(fd, buffer, len) < 0) {
719 return -1;
720 }
721
722 /*
723 * Now sent out the bnode struct
724 */
725 bufp = buffer;
726 (void)sprintf(bufp, "BEGIN bnode\n");
727 bufp += strlen(bufp);
728 (void)sprintf(bufp, "name: %s\n", tb->name);
729 bufp += strlen(bufp);
730 (void)sprintf(bufp, "rsTime: %ld\n", afs_printable_int32_ld(tb->rsTime));
731 bufp += strlen(bufp);
732 (void)sprintf(bufp, "rsCount: %ld\n", afs_printable_int32_ld(tb->rsCount));
733 bufp += strlen(bufp);
734 (void)sprintf(bufp, "procStartTime: %ld\n", afs_printable_int32_ld(tb->procStartTime));
735 bufp += strlen(bufp);
736 (void)sprintf(bufp, "procStarts: %ld\n", afs_printable_int32_ld(tb->procStarts));
737 bufp += strlen(bufp);
738 (void)sprintf(bufp, "lastAnyExit: %ld\n", afs_printable_int32_ld(tb->lastAnyExit));
739 bufp += strlen(bufp);
740 (void)sprintf(bufp, "lastErrorExit: %ld\n", afs_printable_int32_ld(tb->lastErrorExit));
741 bufp += strlen(bufp);
742 (void)sprintf(bufp, "errorCode: %ld\n", afs_printable_int32_ld(tb->errorCode));
743 bufp += strlen(bufp);
744 (void)sprintf(bufp, "errorSignal: %ld\n", afs_printable_int32_ld(tb->errorSignal));
745 bufp += strlen(bufp);
746/*
747 (void) sprintf(bufp, "lastErrorName: %s\n", tb->lastErrorName);
748 bufp += strlen(bufp);
749*/
750 (void)sprintf(bufp, "goal: %d\n", tb->goal);
751 bufp += strlen(bufp);
752 (void)sprintf(bufp, "END bnode\n");
753 bufp += strlen(bufp);
754 len = (int)(bufp - buffer);
755 if (write(fd, buffer, len) < 0) {
756 return -1;
757 }
758 return 0;
759}
760
761int
762hdl_notifier(struct bnode_proc *tp)
763{
764#ifndef AFS_NT40_ENV /* NT notifier callout not yet implemented */
765 int pid;
766 struct stat tstat;
767
768 if (stat(tp->bnode->notifier, &tstat)) {
769 bozo_Log("BNODE: Failed to find notifier '%s'; ignored\n",
770 tp->bnode->notifier);
771 return (1);
772 }
773 if ((pid = fork()) == 0) {
774 FILE *fout;
775 struct bnode *tb = tp->bnode;
776
777#if defined(AFS_HPUX_ENV) || defined(AFS_SUN5_ENV) || defined(AFS_SGI51_ENV)
778 setsid();
779#elif defined(AFS_DARWIN90_ENV)
780 setpgid(0, 0);
781#elif defined(AFS_LINUX20_ENV) || defined(AFS_AIX_ENV)
782 setpgrp();
783#else
784 setpgrp(0, 0);
785#endif
786 fout = popen(tb->notifier, "w");
787 if (fout == NULL) {
788 bozo_Log("BNODE: Failed to find notifier '%s'; ignored\n",
789 tb->notifier);
790 perror(tb->notifier);
791 exit(1);
792 }
793 SendNotifierData(fileno(fout), tp);
794 pclose(fout);
795 exit(0);
796 } else if (pid < 0) {
797 bozo_Log("Failed to fork creating process to handle notifier '%s'\n",
798 tp->bnode->notifier);
799 return -1;
800 }
801#endif /* AFS_NT40_ENV */
802 return (0);
803}
804
805/* Called by IOMGR at low priority on IOMGR's stack shortly after a SIGCHLD
806 * occurs. Wakes up bproc do redo things */
807void *
808bnode_SoftInt(void *param)
809{
810 /* int asignal = (int) param; */
811
812 IOMGR_Cancel(bproc_pid);
813 return NULL;
814}
815
816/* Called at signal interrupt level; queues function to be called
817 * when IOMGR runs again.
818 */
819void
820bnode_Int(int asignal)
821{
822 if (asignal == SIGQUIT || asignal == SIGTERM) {
823 IOMGR_SoftSig(bozo_ShutdownAndExit, (void *)(intptr_t)asignal);
824 } else {
825 IOMGR_SoftSig(bnode_SoftInt, (void *)(intptr_t)asignal);
826 }
827}
828
829
830/* intialize the whole system */
831int
832bnode_Init(void)
833{
834 PROCESS junk;
835 afs_int32 code;
836 struct sigaction newaction;
837 static int initDone = 0;
838
839 if (initDone)
840 return 0;
841 initDone = 1;
842 opr_queue_Init(&allTypes);
843 opr_queue_Init(&allProcs);
844 opr_queue_Init(&allBnodes);
845 memset(&bnode_stats, 0, sizeof(bnode_stats));
846 LWP_InitializeProcessSupport(1, &junk); /* just in case */
847 IOMGR_Initialize();
848 code = LWP_CreateProcess(bproc, BNODE_LWP_STACKSIZE,
849 /* priority */ 1, (void *) /* parm */ 0,
850 "bnode-manager", &bproc_pid);
851 if (code)
852 return code;
853 memset(&newaction, 0, sizeof(newaction));
854 newaction.sa_handler = bnode_Int;
855 code = sigaction(SIGCHLD, &newaction, NULL);
856 if (code)
857 return errno;
858 code = sigaction(SIGQUIT, &newaction, NULL);
859 if (code)
860 return errno;
861 code = sigaction(SIGTERM, &newaction, NULL);
862 if (code)
863 return errno;
864 return code;
865}
866
867/* free token list returned by parseLine */
868int
869bnode_FreeTokens(struct bnode_token *alist)
870{
871 struct bnode_token *nlist;
872 for (; alist; alist = nlist) {
873 nlist = alist->next;
874 free(alist->key);
875 free(alist);
876 }
877 return 0;
878}
879
880static int
881space(int x)
882{
883 if (x == 0 || x == ' ' || x == '\t' || x == '\n')
884 return 1;
885 else
886 return 0;
887}
888
889int
890bnode_ParseLine(char *aline, struct bnode_token **alist)
891{
892 char tbuffer[256];
893 char *tptr = NULL;
894 int inToken;
895 struct bnode_token *first, *last;
896 struct bnode_token *ttok;
897 int tc;
898
899 inToken = 0; /* not copying token chars at start */
900 first = (struct bnode_token *)0;
901 last = (struct bnode_token *)0;
902 while (1) {
903 tc = *aline++;
904 if (tc == 0 || space(tc)) { /* terminating null gets us in here, too */
905 if (inToken) {
906 inToken = 0; /* end of this token */
907 *tptr++ = 0;
908 ttok = malloc(sizeof(struct bnode_token));
909 ttok->next = (struct bnode_token *)0;
910 ttok->key = strdup(tbuffer);
911 if (last) {
912 last->next = ttok;
913 last = ttok;
914 } else
915 last = ttok;
916 if (!first)
917 first = ttok;
918 }
919 } else {
920 /* an alpha character */
921 if (!inToken) {
922 tptr = tbuffer;
923 inToken = 1;
924 }
925 if (tptr - tbuffer >= sizeof(tbuffer))
926 return -1; /* token too long */
927 *tptr++ = tc;
928 }
929 if (tc == 0) {
930 /* last token flushed 'cause space(0) --> true */
931 if (last)
932 last->next = (struct bnode_token *)0;
933 *alist = first;
934 return 0;
935 }
936 }
937}
938
939#define MAXVARGS 128
940int
941bnode_NewProc(struct bnode *abnode, char *aexecString, char *coreName,
942 struct bnode_proc **aproc)
943{
944 struct bnode_token *tlist, *tt;
945 afs_int32 code;
946 struct bnode_proc *tp;
947 pid_t cpid;
948 char *argv[MAXVARGS];
949 int i;
950
951 code = bnode_ParseLine(aexecString, &tlist); /* try parsing first */
952 if (code)
953 return code;
954 tp = calloc(1, sizeof(struct bnode_proc));
955 opr_queue_Init(&tp->q);
956 tp->bnode = abnode;
957 tp->comLine = aexecString;
958 tp->coreName = coreName; /* may be null */
959 abnode->procStartTime = FT_ApproxTime();
960 abnode->procStarts++;
961
962 /* convert linked list of tokens into argv structure */
963 for (tt = tlist, i = 0; i < (MAXVARGS - 1) && tt; tt = tt->next, i++) {
964 argv[i] = tt->key;
965 }
966 argv[i] = NULL; /* null-terminated */
967
968 cpid = spawnprocve(argv[0], argv, environ, -1);
969 osi_audit(BOSSpawnProcEvent, 0, AUD_STR, aexecString, AUD_END);
970
971 if (cpid == (pid_t) - 1) {
972 bozo_Log("Failed to spawn process for bnode '%s'\n", abnode->name);
973 bnode_FreeTokens(tlist);
974 free(tp);
975 return errno;
976 }
977 bozo_Log("%s started pid %ld: %s\n", abnode->name, cpid, aexecString);
978
979 bnode_FreeTokens(tlist);
980 opr_queue_Prepend(&allProcs, &tp->q);
981 *aproc = tp;
982 tp->pid = cpid;
983 tp->flags = BPROC_STARTED;
984 tp->flags &= ~BPROC_EXITED;
985 BOP_PROCSTARTED(abnode, tp);
986 bnode_Check(abnode);
987 return 0;
988}
989
990int
991bnode_StopProc(struct bnode_proc *aproc, int asignal)
992{
993 int code;
994 if (!(aproc->flags & BPROC_STARTED) || (aproc->flags & BPROC_EXITED))
995 return BZNOTACTIVE;
996
997 osi_audit(BOSStopProcEvent, 0, AUD_STR, (aproc ? aproc->comLine : NULL),
998 AUD_END);
999
1000 code = kill(aproc->pid, asignal);
1001 bnode_Check(aproc->bnode);
1002 return code;
1003}