Import Upstream version 1.8.5
[hcoop/debian/openafs.git] / src / ubik / beacon.c
1 /*
2 * Copyright 2000, International Business Machines Corporation and others.
3 * All Rights Reserved.
4 *
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
8 */
9
10 #include <afsconfig.h>
11 #include <afs/param.h>
12
13 #include <roken.h>
14
15 #include <afs/opr.h>
16 #ifdef AFS_PTHREAD_ENV
17 # include <opr/lock.h>
18 #else
19 # include <opr/lockstub.h>
20 #endif
21
22 #include <lock.h>
23 #include <rx/rx.h>
24 #include <rx/rxkad.h>
25 #include <rx/rx_multi.h>
26 #include <afs/cellconfig.h>
27 #include <afs/afsutil.h>
28
29 #define UBIK_INTERNALS
30 #include "ubik.h"
31 #include "ubik_int.h"
32
33 /* These global variables were used to set the function to use to initialise
34 * the client security layer. They are retained for backwards compatiblity with
35 * legacy callers - the ubik_SetClientSecurityProcs() interface should be used
36 * instead
37 */
38 int (*ubik_CRXSecurityProc) (void *rock, struct rx_securityClass **,
39 afs_int32 *);
40 void *ubik_CRXSecurityRock;
41
42 /*! \name statics used to determine if we're the sync site */
43 static int nServers; /*!< total number of servers */
44 static char amIMagic = 0; /*!< is this host the magic host */
45 char amIClone = 0; /*!< is this a clone which doesn't vote */
46 static char ubik_singleServer = 0;
47 /*\}*/
48 static int (*secLayerProc) (void *rock, struct rx_securityClass **,
49 afs_int32 *) = NULL;
50 static int (*tokenCheckProc) (void *rock) = NULL;
51 static void * securityRock = NULL;
52
53 afs_int32 ubikSecIndex;
54 struct rx_securityClass *ubikSecClass;
55
56 /* Values protected by the address lock */
57 struct addr_data addr_globals;
58
59 /* Values protected by the beacon lock */
60 struct beacon_data beacon_globals;
61
62 static int ubeacon_InitServerListCommon(afs_uint32 ame,
63 struct afsconf_cell *info,
64 char clones[],
65 afs_uint32 aservers[]);
66 static int verifyInterfaceAddress(afs_uint32 *ame, struct afsconf_cell *info,
67 afs_uint32 aservers[]);
68
69 /*! \file
70 * Module responsible for both deciding if we're currently the sync site,
71 * and keeping collecting votes so as to stay sync site.
72 *
73 * The basic module contacts all of the servers it can, trying to get them to vote
74 * for this server for sync site. The vote request message (called a beacon message)
75 * also specifies until which time this site claims to be the sync site, if at all, thus enabling
76 * receiving sites to know how long the sync site guarantee is made for.
77 *
78 * Each of these beacon messages is thus both a declaration of how long this site will
79 * remain sync site, and an attempt to extend that time by collecting votes for a later
80 * sync site extension.
81 *
82 * The voting module is responsible for choosing a reasonable time until which it promises
83 * not to vote for someone else. This parameter (BIG seconds) is not actually passed in
84 * the interface (perhaps it should be?) but is instead a compile time constant that both
85 * sides know about.
86
87 * The beacon and vote modules work intimately together; the vote module decides how long
88 * it should promise the beacon module its vote, and the beacon module takes all of these
89 * votes and decides for how long it is the synchronization site.
90 */
91
92 /*! \brief procedure called from debug rpc call to get this module's state for debugging */
93 void
94 ubeacon_Debug(struct ubik_debug *aparm)
95 {
96 /* fill in beacon's state fields in the ubik_debug structure */
97 aparm->syncSiteUntil = beacon_globals.syncSiteUntil;
98 aparm->nServers = nServers;
99 }
100
101 static int
102 amSyncSite(void)
103 {
104 afs_int32 now;
105 afs_int32 rcode;
106
107 /* special case for fast startup */
108 if (nServers == 1 && !amIClone)
109 return 1; /* one guy is always the sync site */
110
111 UBIK_BEACON_LOCK;
112 if (beacon_globals.ubik_amSyncSite == 0 || amIClone)
113 rcode = 0; /* if I don't think I'm the sync site, say so */
114 else {
115 now = FT_ApproxTime();
116 if (beacon_globals.syncSiteUntil <= now) { /* if my votes have expired, say so */
117 if (beacon_globals.ubik_amSyncSite)
118 ViceLog(5, ("Ubik: I am no longer the sync site\n"));
119 beacon_globals.ubik_amSyncSite = 0;
120 beacon_globals.ubik_syncSiteAdvertised = 0;
121 rcode = 0;
122 } else {
123 rcode = 1; /* otherwise still have the required votes */
124 }
125 }
126 UBIK_BEACON_UNLOCK;
127 ViceLog(5, ("beacon: amSyncSite is %d\n", rcode));
128 return rcode;
129 }
130
131 /*!
132 * \brief Procedure that determines whether this site has enough current votes to remain sync site.
133 *
134 * Called from higher-level modules (everything but the vote module).
135 *
136 * If we're the sync site, check that our guarantees, obtained by the ubeacon_Interact()
137 * light-weight process, haven't expired. We're sync site as long as a majority of the
138 * servers in existence have promised us unexpired guarantees. The variable #ubik_syncSiteUntil
139 * contains the time at which the latest of the majority of the sync site guarantees expires
140 * (if the variable #ubik_amSyncSite is true)
141 * This module also calls up to the recovery module if it thinks that the recovery module
142 * may have to pick up a new database (which offucr sif [sic] we lose the sync site votes).
143 *
144 * \return 1 if local site is the sync site
145 * \return 0 if sync site is elsewhere
146 */
147 int
148 ubeacon_AmSyncSite(void)
149 {
150 afs_int32 rcode;
151
152 rcode = amSyncSite();
153
154 if (!rcode)
155 urecovery_ResetState();
156
157 return rcode;
158 }
159
160 /*!
161 * \brief Determine whether at least quorum are aware we have a sync-site.
162 *
163 * Called from higher-level modules.
164 *
165 * There is a gap between the time when a new sync-site is elected and the time
166 * when the remotes are aware of that. Therefore, any write transaction between
167 * this gap will fail. This will force a new re-election which might be time
168 * consuming. This procedure determines whether the remotes (quorum) are aware
169 * we have a sync-site.
170 *
171 * \return 1 if remotes are aware we have a sync-site
172 * \return 0 if remotes are not aware we have a sync-site
173 */
174 int
175 ubeacon_SyncSiteAdvertised(void)
176 {
177 afs_int32 rcode;
178
179 UBIK_BEACON_LOCK;
180 rcode = beacon_globals.ubik_syncSiteAdvertised;
181 UBIK_BEACON_UNLOCK;
182
183 return rcode;
184 }
185
186 /*!
187 * \see ubeacon_InitServerListCommon()
188 */
189 int
190 ubeacon_InitServerListByInfo(afs_uint32 ame, struct afsconf_cell *info,
191 char clones[])
192 {
193 afs_int32 code;
194
195 code = ubeacon_InitServerListCommon(ame, info, clones, 0);
196 return code;
197 }
198
199 /*!
200 * \param ame "address of me"
201 * \param aservers list of other servers
202 *
203 * \see ubeacon_InitServerListCommon()
204 */
205 int
206 ubeacon_InitServerList(afs_uint32 ame, afs_uint32 aservers[])
207 {
208 afs_int32 code;
209
210 code =
211 ubeacon_InitServerListCommon(ame, (struct afsconf_cell *)0, 0,
212 aservers);
213 return code;
214 }
215
216 /* Must be called with address lock held */
217 void
218 ubeacon_InitSecurityClass(void)
219 {
220 int i;
221 /* get the security index to use, if we can */
222 if (secLayerProc) {
223 i = (*secLayerProc) (securityRock, &addr_globals.ubikSecClass, &addr_globals.ubikSecIndex);
224 } else if (ubik_CRXSecurityProc) {
225 i = (*ubik_CRXSecurityProc) (ubik_CRXSecurityRock, &addr_globals.ubikSecClass,
226 &addr_globals.ubikSecIndex);
227 } else
228 i = 1;
229 if (i) {
230 /* don't have sec module yet */
231 addr_globals.ubikSecIndex = 0;
232 addr_globals.ubikSecClass = rxnull_NewClientSecurityObject();
233 }
234 }
235
236 void
237 ubeacon_ReinitServer(struct ubik_server *ts)
238 {
239 if (tokenCheckProc && !(*tokenCheckProc) (securityRock)) {
240 struct rx_connection *disk_rxcid;
241 struct rx_connection *vote_rxcid;
242 struct rx_connection *tmp;
243 UBIK_ADDR_LOCK;
244 ubeacon_InitSecurityClass();
245 disk_rxcid =
246 rx_NewConnection(rx_HostOf(rx_PeerOf(ts->disk_rxcid)),
247 ubik_callPortal, DISK_SERVICE_ID,
248 addr_globals.ubikSecClass, addr_globals.ubikSecIndex);
249 if (disk_rxcid) {
250 tmp = ts->disk_rxcid;
251 ts->disk_rxcid = disk_rxcid;
252 rx_PutConnection(tmp);
253 }
254 vote_rxcid =
255 rx_NewConnection(rx_HostOf(rx_PeerOf(ts->vote_rxcid)),
256 ubik_callPortal, VOTE_SERVICE_ID,
257 addr_globals.ubikSecClass, addr_globals.ubikSecIndex);
258 if (vote_rxcid) {
259 tmp = ts->vote_rxcid;
260 ts->vote_rxcid = vote_rxcid;
261 rx_PutConnection(tmp);
262 }
263 UBIK_ADDR_UNLOCK;
264 }
265 }
266
267 /*!
268 * \brief setup server list
269 *
270 * \param ame "address of me"
271 * \param aservers list of other servers
272 *
273 * called only at initialization to set up the list of servers to
274 * contact for votes. Just creates the server structure.
275 *
276 * The "magic" host is the one with the lowest internet address. It is
277 * magic because its vote counts epsilon more than the others. This acts
278 * as a tie-breaker when we have an even number of hosts in the system.
279 * For example, if the "magic" host is up in a 2 site system, then it
280 * is sync site. Without the magic host hack, if anyone crashed in a 2
281 * site system, we'd be out of business.
282 *
283 * \note There are two connections in every server structure, one for
284 * vote calls (which must always go through quickly) and one for database
285 * operations, which are subject to waiting for locks. If we used only
286 * one, the votes would sometimes get held up behind database operations,
287 * and the sync site guarantees would timeout even though the host would be
288 * up for communication.
289 *
290 * \see ubeacon_InitServerList(), ubeacon_InitServerListByInfo()
291 */
292 int
293 ubeacon_InitServerListCommon(afs_uint32 ame, struct afsconf_cell *info,
294 char clones[], afs_uint32 aservers[])
295 {
296 struct ubik_server *ts;
297 afs_int32 me = -1;
298 afs_int32 servAddr;
299 afs_int32 i, code;
300 afs_int32 magicHost;
301 struct ubik_server *magicServer;
302
303 /* verify that the addresses passed in are correct */
304 if ((code = verifyInterfaceAddress(&ame, info, aservers)))
305 return code;
306
307 ubeacon_InitSecurityClass();
308
309 magicHost = ntohl(ame); /* do comparisons in host order */
310 magicServer = (struct ubik_server *)0;
311
312 if (info) {
313 for (i = 0; i < info->numServers; i++) {
314 if (ntohl((afs_uint32) info->hostAddr[i].sin_addr.s_addr) ==
315 ntohl((afs_uint32) ame)) {
316 me = i;
317 if (clones[i]) {
318 amIClone = 1;
319 magicHost = 0;
320 }
321 }
322 }
323 nServers = 0;
324 for (i = 0; i < info->numServers; i++) {
325 if (i == me)
326 continue;
327 ts = calloc(1, sizeof(struct ubik_server));
328 ts->next = ubik_servers;
329 ubik_servers = ts;
330 ts->addr[0] = info->hostAddr[i].sin_addr.s_addr;
331 if (clones[i]) {
332 ts->isClone = 1;
333 } else {
334 if (!magicHost
335 || ntohl((afs_uint32) ts->addr[0]) <
336 (afs_uint32) magicHost) {
337 magicHost = ntohl(ts->addr[0]);
338 magicServer = ts;
339 }
340 ++nServers;
341 }
342 /* for vote reqs */
343 ts->vote_rxcid =
344 rx_NewConnection(info->hostAddr[i].sin_addr.s_addr,
345 ubik_callPortal, VOTE_SERVICE_ID,
346 addr_globals.ubikSecClass, addr_globals.ubikSecIndex);
347 /* for disk reqs */
348 ts->disk_rxcid =
349 rx_NewConnection(info->hostAddr[i].sin_addr.s_addr,
350 ubik_callPortal, DISK_SERVICE_ID,
351 addr_globals.ubikSecClass, addr_globals.ubikSecIndex);
352 ts->up = 1;
353 }
354 } else {
355 i = 0;
356 while ((servAddr = *aservers++)) {
357 if (i >= MAXSERVERS)
358 return UNHOSTS; /* too many hosts */
359 ts = calloc(1, sizeof(struct ubik_server));
360 ts->next = ubik_servers;
361 ubik_servers = ts;
362 ts->addr[0] = servAddr; /* primary address in net byte order */
363 ts->vote_rxcid = rx_NewConnection(servAddr, ubik_callPortal, VOTE_SERVICE_ID,
364 addr_globals.ubikSecClass, addr_globals.ubikSecIndex); /* for vote reqs */
365 ts->disk_rxcid = rx_NewConnection(servAddr, ubik_callPortal, DISK_SERVICE_ID,
366 addr_globals.ubikSecClass, addr_globals.ubikSecIndex); /* for disk reqs */
367 ts->isClone = 0; /* don't know about clones */
368 ts->up = 1;
369 if (ntohl((afs_uint32) servAddr) < (afs_uint32) magicHost) {
370 magicHost = ntohl(servAddr);
371 magicServer = ts;
372 }
373 i++;
374 }
375 }
376 if (magicServer)
377 magicServer->magic = 1; /* remember for when counting votes */
378
379 if (!amIClone && !magicServer)
380 amIMagic = 1;
381 if (info) {
382 if (!amIClone)
383 ++nServers; /* count this server as well as the remotes */
384 } else
385 nServers = i + 1; /* count this server as well as the remotes */
386
387 ubik_quorum = (nServers >> 1) + 1; /* compute the majority figure */
388
389 /* Shoud we set some defaults for RX??
390 r_retryInterval = 2;
391 r_nRetries = (RPCTIMEOUT/r_retryInterval);
392 */
393 if (info) {
394 if (!ubik_servers) /* special case 1 server */
395 ubik_singleServer = 1;
396 if (nServers == 1 && !amIClone) {
397 beacon_globals.ubik_amSyncSite = 1; /* let's start as sync site */
398 beacon_globals.syncSiteUntil = 0x7fffffff; /* and be it quite a while */
399 beacon_globals.ubik_syncSiteAdvertised = 1;
400 DBHOLD(ubik_dbase);
401 UBIK_VERSION_LOCK;
402 version_globals.ubik_epochTime = FT_ApproxTime();
403 UBIK_VERSION_UNLOCK;
404 DBRELE(ubik_dbase);
405 }
406 } else {
407 if (nServers == 1) /* special case 1 server */
408 ubik_singleServer = 1;
409 }
410
411 if (ubik_singleServer) {
412 if (!beacon_globals.ubik_amSyncSite) {
413 ViceLog(5, ("Ubik: I am the sync site - 1 server\n"));
414 DBHOLD(ubik_dbase);
415 UBIK_VERSION_LOCK;
416 version_globals.ubik_epochTime = FT_ApproxTime();
417 UBIK_VERSION_UNLOCK;
418 DBRELE(ubik_dbase);
419 }
420 beacon_globals.ubik_amSyncSite = 1;
421 beacon_globals.syncSiteUntil = 0x7fffffff; /* quite a while */
422 beacon_globals.ubik_syncSiteAdvertised = 1;
423 }
424 return 0;
425 }
426
427 /*!
428 * \brief main lwp loop for code that sends out beacons.
429 *
430 * This code only runs while we're sync site or we want to be the sync site.
431 * It runs in its very own light-weight process.
432 */
433 void *
434 ubeacon_Interact(void *dummy)
435 {
436 afs_int32 code;
437 struct timeval tt;
438 struct rx_connection *connections[MAXSERVERS];
439 struct ubik_server *servers[MAXSERVERS];
440 afs_int32 i;
441 struct ubik_server *ts;
442 afs_int32 temp, yesVotes, lastWakeupTime, oldestYesVote, syncsite;
443 int becameSyncSite;
444 struct ubik_tid ttid;
445 struct ubik_version tversion;
446 afs_int32 startTime;
447
448 afs_pthread_setname_self("beacon");
449
450 /* loop forever getting votes */
451 lastWakeupTime = 0; /* keep track of time we last started a vote collection */
452 while (1) {
453
454 /* don't wakeup more than every POLLTIME seconds */
455 temp = (lastWakeupTime + POLLTIME) - FT_ApproxTime();
456 /* don't sleep if last collection phase took too long (probably timed someone out ) */
457 if (temp > 0) {
458 if (temp > POLLTIME)
459 temp = POLLTIME;
460 tt.tv_sec = temp;
461 tt.tv_usec = 0;
462 #ifdef AFS_PTHREAD_ENV
463 select(0, 0, 0, 0, &tt);
464 #else
465 IOMGR_Select(0, 0, 0, 0, &tt);
466 #endif
467 }
468
469 lastWakeupTime = FT_ApproxTime(); /* started a new collection phase */
470
471 if (ubik_singleServer)
472 continue; /* special-case 1 server for speedy startup */
473
474 if (!uvote_ShouldIRun())
475 continue; /* if voter has heard from a better candidate than us, don't bother running */
476
477 /* otherwise we should run for election, or we're the sync site (and have already won);
478 * send out the beacon packets */
479 /* build list of all up hosts (noticing dead hosts are running again
480 * is a task for the recovery module, not the beacon module), and
481 * prepare to send them an r multi-call containing the beacon message */
482 i = 0; /* collect connections */
483 UBIK_BEACON_LOCK;
484 UBIK_ADDR_LOCK;
485 for (ts = ubik_servers; ts; ts = ts->next) {
486 if (ts->up && ts->addr[0] != ubik_host[0]) {
487 servers[i] = ts;
488 connections[i++] = ts->vote_rxcid;
489 }
490 }
491 UBIK_ADDR_UNLOCK;
492 UBIK_BEACON_UNLOCK;
493 servers[i] = (struct ubik_server *)0; /* end of list */
494 /* note that we assume in the vote module that we'll always get at least BIGTIME
495 * seconds of vote from anyone who votes for us, which means we can conservatively
496 * assume we'll be fine until SMALLTIME seconds after we start collecting votes */
497 /* this next is essentially an expansion of rgen's ServBeacon routine */
498
499 UBIK_VERSION_LOCK;
500 ttid.epoch = version_globals.ubik_epochTime;
501 if (ubik_dbase->flags & DBWRITING) {
502 /*
503 * if a write is in progress, we have to send the writeTidCounter
504 * which holds the tid counter of the write transaction , and not
505 * send the tidCounter value which holds the tid counter of the
506 * last transaction.
507 */
508 ttid.counter = ubik_dbase->writeTidCounter;
509 } else
510 ttid.counter = ubik_dbase->tidCounter + 1;
511 tversion.epoch = ubik_dbase->version.epoch;
512 tversion.counter = ubik_dbase->version.counter;
513 UBIK_VERSION_UNLOCK;
514
515 /* now analyze return codes, counting up our votes */
516 yesVotes = 0; /* count how many to ensure we have quorum */
517 oldestYesVote = 0x7fffffff; /* time quorum expires */
518 syncsite = amSyncSite();
519 if (!syncsite) {
520 /* Ok to use the DB lock here since we aren't sync site */
521 DBHOLD(ubik_dbase);
522 urecovery_ResetState();
523 DBRELE(ubik_dbase);
524 }
525 startTime = FT_ApproxTime();
526 /*
527 * Don't waste time using mult Rx calls if there are no connections out there
528 */
529 if (i > 0) {
530 char hoststr[16];
531 multi_Rx(connections, i) {
532 multi_VOTE_Beacon(syncsite, startTime, &tversion,
533 &ttid);
534 temp = FT_ApproxTime(); /* now, more or less */
535 ts = servers[multi_i];
536 UBIK_BEACON_LOCK;
537 ts->lastBeaconSent = temp;
538 code = multi_error;
539
540 if (code > 0 && ((code < temp && code < temp - 3600) ||
541 (code > temp && code > temp + 3600))) {
542 /* if we reached here, supposedly the remote host voted
543 * for us based on a computation from over an hour ago in
544 * the past, or over an hour in the future. this is
545 * unlikely; what actually probably happened is that the
546 * call generated some error and was aborted. this can
547 * happen due to errors with the rx security class in play
548 * (rxkad, rxgk, etc). treat the host as if we got a
549 * timeout, since this is not a valid vote. */
550 ViceLog(0, ("assuming distant vote time %d from %s is an error; marking host down\n",
551 (int)code, afs_inet_ntoa_r(ts->addr[0], hoststr)));
552 code = -1;
553 }
554 if (code > 0 && rx_ConnError(connections[multi_i])) {
555 ViceLog(0, ("assuming vote from %s is invalid due to conn error %d; marking host down\n",
556 afs_inet_ntoa_r(ts->addr[0], hoststr),
557 (int)rx_ConnError(connections[multi_i])));
558 code = -1;
559 }
560
561 /* note that the vote time (the return code) represents the time
562 * the vote was computed, *not* the time the vote expires. We compute
563 * the latter down below if we got enough votes to go with */
564 if (code > 0) {
565 if ((code & ~0xff) == ERROR_TABLE_BASE_RXK) {
566 ViceLog(5, ("token error %d from host %s\n",
567 code, afs_inet_ntoa_r(ts->addr[0], hoststr)));
568 ts->up = 0;
569 ts->beaconSinceDown = 0;
570 urecovery_LostServer(ts);
571 } else {
572 ts->lastVoteTime = code;
573 if (code < oldestYesVote)
574 oldestYesVote = code;
575 ts->lastVote = 1;
576 if (!ts->isClone)
577 yesVotes += 2;
578 if (ts->magic)
579 yesVotes++; /* the extra epsilon */
580 ts->up = 1; /* server is up (not really necessary: recovery does this for real) */
581 ts->beaconSinceDown = 1;
582 ViceLog(5, ("yes vote from host %s\n",
583 afs_inet_ntoa_r(ts->addr[0], hoststr)));
584 }
585 } else if (code == 0) {
586 ts->lastVoteTime = temp;
587 ts->lastVote = 0;
588 ts->beaconSinceDown = 1;
589 ViceLog(5, ("no vote from %s\n",
590 afs_inet_ntoa_r(ts->addr[0], hoststr)));
591 } else if (code < 0) {
592 ts->up = 0;
593 ts->beaconSinceDown = 0;
594 urecovery_LostServer(ts);
595 ViceLog(5, ("time out from %s\n",
596 afs_inet_ntoa_r(ts->addr[0], hoststr)));
597 }
598 UBIK_BEACON_UNLOCK;
599 }
600 multi_End;
601 }
602 /* now call our own voter module to see if we'll vote for ourself. Note that
603 * the same restrictions apply for our voting for ourself as for our voting
604 * for anyone else. */
605 i = SVOTE_Beacon((struct rx_call *)0, ubeacon_AmSyncSite(), startTime,
606 &tversion, &ttid);
607 if (i) {
608 yesVotes += 2;
609 if (amIMagic)
610 yesVotes++; /* extra epsilon */
611 if (i < oldestYesVote)
612 oldestYesVote = i;
613 }
614
615 /* now decide if we have enough votes to become sync site.
616 * Note that we can still get enough votes even if we didn't for ourself. */
617 becameSyncSite = 0;
618 if (yesVotes > nServers) { /* yesVotes is bumped by 2 or 3 for each site */
619 UBIK_BEACON_LOCK;
620 if (!beacon_globals.ubik_amSyncSite) {
621 ViceLog(5, ("Ubik: I am the sync site\n"));
622 /* Defer actually changing any variables until we can take the
623 * DB lock (which is before the beacon lock in the lock order). */
624 becameSyncSite = 1;
625 } else {
626 beacon_globals.syncSiteUntil = oldestYesVote + SMALLTIME;
627 /* at this point, we have the guarantee that at least quorum
628 * received a beacon packet informing we have a sync-site. */
629 beacon_globals.ubik_syncSiteAdvertised = 1;
630 }
631 UBIK_BEACON_UNLOCK;
632 } else {
633 UBIK_BEACON_LOCK;
634 if (beacon_globals.ubik_amSyncSite)
635 ViceLog(5, ("Ubik: I am no longer the sync site\n"));
636 beacon_globals.ubik_amSyncSite = 0;
637 beacon_globals.ubik_syncSiteAdvertised = 0;
638 UBIK_BEACON_UNLOCK;
639 DBHOLD(ubik_dbase);
640 urecovery_ResetState(); /* tell recovery we're no longer the sync site */
641 DBRELE(ubik_dbase);
642 }
643 /* We cannot take the DB lock around the entire preceding conditional,
644 * because if we are currently the sync site and this election serves
645 * to confirm that status, the DB lock may already be held for a long-running
646 * write transaction. In such a case, attempting to acquire the DB lock
647 * would cause the beacon thread to block and disrupt election processing.
648 * However, if we are transitioning from not-sync-site to sync-site, there
649 * can be no outstanding transactions and acquiring the DB lock should be
650 * safe without extended blocking. */
651 if (becameSyncSite) {
652 DBHOLD(ubik_dbase);
653 UBIK_BEACON_LOCK;
654 UBIK_VERSION_LOCK;
655 version_globals.ubik_epochTime = FT_ApproxTime();
656 beacon_globals.ubik_amSyncSite = 1;
657 beacon_globals.syncSiteUntil = oldestYesVote + SMALLTIME;
658 UBIK_VERSION_UNLOCK;
659 UBIK_BEACON_UNLOCK;
660 DBRELE(ubik_dbase);
661 }
662
663 } /* while loop */
664 AFS_UNREACHED(return(NULL));
665 }
666
667 /*!
668 * \brief Verify that a given IP addresses does actually exist on this machine.
669 *
670 * \param ame the pointer to my IP address specified in the
671 * CellServDB file.
672 * \param aservers an array containing IP
673 * addresses of remote ubik servers. The array is
674 * terminated by a zero address.
675 *
676 * Algorithm : Verify that my IP addresses \p ame does actually exist
677 * on this machine. If any of my IP addresses are there
678 * in the remote server list \p aserver, remove them from
679 * this list. Update global variable \p ubik_host[] with
680 * my IP addresses.
681 *
682 * \return 0 on success, non-zero on failure
683 */
684 static int
685 verifyInterfaceAddress(afs_uint32 *ame, struct afsconf_cell *info,
686 afs_uint32 aservers[]) {
687 afs_uint32 myAddr[UBIK_MAX_INTERFACE_ADDR], *servList, tmpAddr;
688 afs_uint32 myAddr2[UBIK_MAX_INTERFACE_ADDR];
689 char hoststr[16];
690 int tcount, count, found, i, j, totalServers, start, end, usednetfiles =
691 0;
692
693 if (info)
694 totalServers = info->numServers;
695 else { /* count the number of servers */
696 for (totalServers = 0, servList = aservers; *servList; servList++)
697 totalServers++;
698 }
699
700 if (AFSDIR_SERVER_NETRESTRICT_FILEPATH || AFSDIR_SERVER_NETINFO_FILEPATH) {
701 /*
702 * Find addresses we are supposed to register as per the netrestrict file
703 * if it exists, else just register all the addresses we find on this
704 * host as returned by rx_getAllAddr (in NBO)
705 */
706 char reason[1024];
707 count = afsconf_ParseNetFiles(myAddr, NULL, NULL,
708 UBIK_MAX_INTERFACE_ADDR, reason,
709 AFSDIR_SERVER_NETINFO_FILEPATH,
710 AFSDIR_SERVER_NETRESTRICT_FILEPATH);
711 if (count < 0) {
712 ViceLog(0, ("ubik: Can't register any valid addresses:%s\n",
713 reason));
714 ViceLog(0, ("Aborting..\n"));
715 return UBADHOST;
716 }
717 usednetfiles++;
718 } else {
719 /* get all my interface addresses in net byte order */
720 count = rx_getAllAddr(myAddr, UBIK_MAX_INTERFACE_ADDR);
721 }
722
723 if (count <= 0) { /* no address found */
724 ViceLog(0, ("ubik: No network addresses found, aborting..\n"));
725 return UBADHOST;
726 }
727
728 /* verify that the My-address passed in by ubik is correct */
729 for (j = 0, found = 0; j < count; j++) {
730 if (*ame == myAddr[j]) { /* both in net byte order */
731 found = 1;
732 break;
733 }
734 }
735
736 if (!found) {
737 ViceLog(0, ("ubik: primary address %s does not exist\n",
738 afs_inet_ntoa_r(*ame, hoststr)));
739 /* if we had the result of rx_getAllAddr already, avoid subverting
740 * the "is gethostbyname(gethostname()) us" check. If we're
741 * using NetInfo/NetRestrict, we assume they have enough clue
742 * to avoid that big hole in their foot from the loaded gun. */
743 if (usednetfiles) {
744 /* take the address we did get, then see if ame was masked */
745 *ame = myAddr[0];
746 tcount = rx_getAllAddr(myAddr2, UBIK_MAX_INTERFACE_ADDR);
747 if (tcount <= 0) { /* no address found */
748 ViceLog(0, ("ubik: No network addresses found, aborting..\n"));
749 return UBADHOST;
750 }
751
752 /* verify that the My-address passed in by ubik is correct */
753 for (j = 0, found = 0; j < tcount; j++) {
754 if (*ame == myAddr2[j]) { /* both in net byte order */
755 found = 1;
756 break;
757 }
758 }
759 }
760 if (!found)
761 return UBADHOST;
762 }
763
764 /* if any of my addresses are there in serverList, then
765 ** use that as my primary addresses : the higher level
766 ** application screwed up in dealing with multihomed concepts
767 */
768 for (j = 0, found = 0; j < count; j++) {
769 for (i = 0; i < totalServers; i++) {
770 if (info)
771 tmpAddr = (afs_uint32) info->hostAddr[i].sin_addr.s_addr;
772 else
773 tmpAddr = aservers[i];
774 if (myAddr[j] == tmpAddr) {
775 *ame = tmpAddr;
776 if (!info)
777 aservers[i] = 0;
778 found = 1;
779 }
780 }
781 }
782 if (found)
783 ViceLog(0, ("Using %s as my primary address\n", afs_inet_ntoa_r(*ame, hoststr)));
784
785 if (!info) {
786 /* get rid of servers which were purged because all
787 ** those interface addresses are myself
788 */
789 for (start = 0, end = totalServers - 1; (start < end); start++, end--) {
790 /* find the first zero entry from the beginning */
791 for (; (start < end) && (aservers[start]); start++);
792
793 /* find the last non-zero entry from the end */
794 for (; (end >= 0) && (!aservers[end]); end--);
795
796 /* if there is nothing more to purge, exit from loop */
797 if (start >= end)
798 break;
799
800 /* move the entry */
801 aservers[start] = aservers[end];
802 aservers[end] = 0; /* this entry was moved */
803 }
804 }
805
806 /* update all my addresses in ubik_host in such a way
807 ** that ubik_host[0] has the primary address
808 */
809 ubik_host[0] = *ame;
810 for (j = 0, i = 1; j < count; j++)
811 if (*ame != myAddr[j])
812 ubik_host[i++] = myAddr[j];
813
814 return 0; /* return success */
815 }
816
817
818 /*!
819 * \brief Exchange IP address information with remote servers.
820 *
821 * \param ubik_host an array containing all my IP addresses.
822 *
823 * Algorithm : Do an RPC to all remote ubik servers informing them
824 * about my IP addresses. Get their IP addresses and
825 * update my linked list of ubik servers \p ubik_servers
826 *
827 * \return 0 on success, non-zero on failure
828 */
829 int
830 ubeacon_updateUbikNetworkAddress(afs_uint32 ubik_host[UBIK_MAX_INTERFACE_ADDR])
831 {
832 int j, count, code = 0;
833 UbikInterfaceAddr inAddr, outAddr;
834 struct rx_connection *conns[MAXSERVERS];
835 struct ubik_server *ts, *server[MAXSERVERS];
836 char buffer[32];
837 char hoststr[16];
838
839 UBIK_ADDR_LOCK;
840 for (count = 0, ts = ubik_servers; ts; count++, ts = ts->next) {
841 conns[count] = ts->disk_rxcid;
842 server[count] = ts;
843 }
844 UBIK_ADDR_UNLOCK;
845
846
847 /* inform all other servers only if there are more than one
848 * database servers in the cell */
849
850 if (count > 0) {
851
852 for (j = 0; j < UBIK_MAX_INTERFACE_ADDR; j++)
853 inAddr.hostAddr[j] = ntohl(ubik_host[j]);
854
855
856 /* do the multi-RX RPC to all other servers */
857 multi_Rx(conns, count) {
858 multi_DISK_UpdateInterfaceAddr(&inAddr, &outAddr);
859 ts = server[multi_i]; /* reply received from this server */
860 if (!multi_error) {
861 UBIK_ADDR_LOCK;
862 if (ts->addr[0] != htonl(outAddr.hostAddr[0])) {
863 code = UBADHOST;
864 strcpy(buffer, afs_inet_ntoa_r(ts->addr[0], hoststr));
865 ViceLog(0, ("ubik:Two primary addresses for same server \
866 %s %s\n", buffer,
867 afs_inet_ntoa_r(htonl(outAddr.hostAddr[0]), hoststr)));
868 } else {
869 for (j = 1; j < UBIK_MAX_INTERFACE_ADDR; j++)
870 ts->addr[j] = htonl(outAddr.hostAddr[j]);
871 }
872 UBIK_ADDR_UNLOCK;
873 } else if (multi_error == RXGEN_OPCODE) { /* pre 3.5 remote server */
874 UBIK_ADDR_LOCK;
875 ViceLog(0, ("ubik server %s does not support UpdateInterfaceAddr RPC\n",
876 afs_inet_ntoa_r(ts->addr[0], hoststr)));
877 UBIK_ADDR_UNLOCK;
878 } else if (multi_error == UBADHOST) {
879 code = UBADHOST; /* remote CellServDB inconsistency */
880 ViceLog(0, ("Inconsistent Cell Info on server:\n"));
881 UBIK_ADDR_LOCK;
882 for (j = 0; j < UBIK_MAX_INTERFACE_ADDR && ts->addr[j]; j++)
883 ViceLog(0, ("... %s\n", afs_inet_ntoa_r(ts->addr[j], hoststr)));
884 UBIK_ADDR_UNLOCK;
885 } else {
886 UBIK_BEACON_LOCK;
887 ts->up = 0; /* mark the remote server as down */
888 UBIK_BEACON_UNLOCK;
889 }
890 }
891 multi_End;
892 }
893 return code;
894 }
895
896 void
897 ubik_SetClientSecurityProcs(int (*secproc) (void *,
898 struct rx_securityClass **,
899 afs_int32 *),
900 int (*checkproc) (void *),
901 void *rock)
902 {
903 secLayerProc = secproc;
904 tokenCheckProc = checkproc;
905 securityRock = rock;
906 }