Commit | Line | Data |
---|---|---|
805e021f CE |
1 | /* |
2 | * Copyright 2000, International Business Machines Corporation and others. | |
3 | * All Rights Reserved. | |
4 | * | |
5 | * This software has been released under the terms of the IBM Public | |
6 | * License. For details, see the LICENSE file in the top-level source | |
7 | * directory or online at http://www.openafs.org/dl/license10.html | |
8 | */ | |
9 | ||
10 | #include <afsconfig.h> | |
11 | #include <afs/param.h> | |
12 | ||
13 | #include <roken.h> | |
14 | ||
15 | #include <afs/opr.h> | |
16 | #ifdef AFS_PTHREAD_ENV | |
17 | # include <opr/lock.h> | |
18 | #else | |
19 | # include <opr/lockstub.h> | |
20 | #endif | |
21 | #include <lock.h> | |
22 | #include <rx/rx.h> | |
23 | #include <afs/afsutil.h> | |
24 | ||
25 | #define UBIK_INTERNALS | |
26 | #include "ubik.h" | |
27 | #include "ubik_int.h" | |
28 | ||
29 | /*! \file | |
30 | * General Ubik Goal: | |
31 | * The goal is to provide reliable operation among N servers, such that any | |
32 | * server can crash with the remaining servers continuing operation within a | |
33 | * short period of time. While a \b short outage is acceptable, this time | |
34 | * should be order of 3 minutes or less. | |
35 | * | |
36 | * Theory of operation: | |
37 | * | |
38 | * Note: #SMALLTIME and #BIGTIME are essentially the same time value, separated | |
39 | * only by the clock skew, #MAXSKEW. In general, if you are making guarantees | |
40 | * for someone else, promise them no more than #SMALLTIME seconds of whatever | |
41 | * invariant you provide. If you are waiting to be sure some invariant is now | |
42 | * \b false, wait at least #BIGTIME seconds to be sure that #SMALLTIME seconds | |
43 | * has passed at the other site. | |
44 | * | |
45 | * Now, back to the design: | |
46 | * One site in the collection is a special site, designated the \b sync site. | |
47 | * The sync site sends periodic messages, which can be thought of as | |
48 | * keep-alive messages. When a non-sync site hears from the sync site, it | |
49 | * knows that it is getting updates for the next #SMALLTIME seconds from that | |
50 | * sync site. | |
51 | * | |
52 | * If a server does not hear from the sync site in #SMALLTIME seconds, it | |
53 | * determines that it no longer is getting updates, and thus refuses to give | |
54 | * out potentially out-of-date data. If a sync site can not muster a majority | |
55 | * of servers to agree that it is the sync site, then there is a possibility | |
56 | * that a network partition has occurred, allowing another server to claim to | |
57 | * be the sync site. Thus, any time that the sync site has not heard from a | |
58 | * majority of the servers in the last #SMALLTIME seconds, it voluntarily | |
59 | * relinquishes its role as sync site. | |
60 | * | |
61 | * While attempting to nominate a new sync site, certain rules apply. First, | |
62 | * a server can not reply "ok" (return 1 from ServBeacon) to two different | |
63 | * hosts in less than #BIGTIME seconds; this allows a server that has heard | |
64 | * affirmative replies from a majority of the servers to know that no other | |
65 | * server in the network has heard enough affirmative replies in the last | |
66 | * #BIGTIME seconds to become sync site, too. The variables #ubik_lastYesTime | |
67 | * and #lastYesHost are used by all servers to keep track of which host they | |
68 | * have last replied affirmatively to, when queried by a potential new sync | |
69 | * site. | |
70 | * | |
71 | * Once a sync site has become a sync site, it periodically sends beacon | |
72 | * messages with a parameter of 1, indicating that it already has determined | |
73 | * it is supposed to be the sync site. The servers treat such a message as a | |
74 | * guarantee that no other site will become sync site for the next #SMALLTIME | |
75 | * seconds. In the interim, these servers can answer a query concerning which | |
76 | * site is the sync site without any communication with any server. The | |
77 | * variables #lastBeaconArrival and #lastBeaconHost are used by all servers to | |
78 | * keep track of which sync site has last contacted them. | |
79 | * | |
80 | * One complication occurs while nominating a new sync site: each site may be | |
81 | * trying to nominate a different site (based on the value of #lastYesHost), | |
82 | * yet we must nominate the smallest host (under some order), to prevent this | |
83 | * process from looping. The process could loop by having each server give | |
84 | * one vote to another server, but with no server getting a majority of the | |
85 | * votes. To avoid this, we try to withhold our votes for the server with the | |
86 | * lowest internet address (an easy-to-generate order). To this effect, we | |
87 | * keep track (in #lowestTime and #lowestHost) of the lowest server trying to | |
88 | * become a sync site. We wait for this server unless there is already a sync | |
89 | * site (indicated by ServBeacon's parameter being 1). | |
90 | */ | |
91 | ||
92 | afs_int32 ubik_debugFlag = 0; /*!< print out debugging messages? */ | |
93 | ||
94 | struct vote_data vote_globals; | |
95 | ||
96 | ||
97 | /*! | |
98 | * \brief Decide if we should try to become sync site. | |
99 | * | |
100 | * The basic rule is that we | |
101 | * don't run if there is a valid sync site and it ain't us (we have to run if | |
102 | * it is us, in order to keep our votes). If there is no sync site, then we | |
103 | * want to run if we're the lowest numbered host running, otherwise we defer to | |
104 | * the lowest host. However, if the lowest host hasn't been heard from for a | |
105 | * while, then we start running again, in case he crashed. | |
106 | * | |
107 | * \return true if we should run, and false otherwise. | |
108 | */ | |
109 | int | |
110 | uvote_ShouldIRun(void) | |
111 | { | |
112 | afs_int32 now; | |
113 | int code = 1; /* default to yes */ | |
114 | ||
115 | if (amIClone) { | |
116 | return 0; /* if we cannot be the sync-site, do not ask for votes */ | |
117 | } | |
118 | ||
119 | UBIK_VOTE_LOCK; | |
120 | now = FT_ApproxTime(); | |
121 | if (BIGTIME + vote_globals.ubik_lastYesTime < now) | |
122 | goto done; | |
123 | if (vote_globals.lastYesState && vote_globals.lastYesHost != ubik_host[0]) { | |
124 | code = 0; /* other guy is sync site, leave him alone */ | |
125 | goto done; | |
126 | } | |
127 | if (ntohl((afs_uint32)vote_globals.lastYesHost) < ntohl((afs_uint32)ubik_host[0])) { | |
128 | code = 0; /* if someone is valid and better than us, don't run */ | |
129 | goto done; | |
130 | } | |
131 | ||
132 | done: | |
133 | UBIK_VOTE_UNLOCK; | |
134 | return code; | |
135 | } | |
136 | ||
137 | /*! | |
138 | * \brief Return the current synchronization site, if any. | |
139 | * | |
140 | * Simple approach: if the | |
141 | * last guy we voted yes for claims to be the sync site, then we we're happy to | |
142 | * use that guy for a sync site until the time his mandate expires. If the guy | |
143 | * does not claim to be sync site, then, of course, there's none. | |
144 | * | |
145 | * In addition, if we lost the sync, we set #urecovery_syncSite to an invalid | |
146 | * value, indicating that we no longer know which version of the dbase is the | |
147 | * one we should have. We'll get a new one when we next hear from the sync | |
148 | * site. | |
149 | * | |
150 | * \return 0 or currently valid sync site. It can return our own | |
151 | * address, if we're the sync site. | |
152 | */ | |
153 | afs_int32 | |
154 | uvote_GetSyncSite(void) | |
155 | { | |
156 | afs_int32 now; | |
157 | afs_int32 code; | |
158 | ||
159 | UBIK_VOTE_LOCK; | |
160 | if (!vote_globals.lastYesState) | |
161 | code = 0; | |
162 | else { | |
163 | now = FT_ApproxTime(); | |
164 | if (SMALLTIME + vote_globals.lastYesClaim < now) | |
165 | code = 0; /* last guy timed out */ | |
166 | else | |
167 | code = vote_globals.lastYesHost; | |
168 | } | |
169 | UBIK_VOTE_UNLOCK; | |
170 | return code; | |
171 | } | |
172 | ||
173 | /*! | |
174 | * \brief called by the sync site to handle vote beacons; if aconn is null, this is a | |
175 | * local call | |
176 | * | |
177 | * \returns 0 or time when the vote was sent. It returns 0 if we are | |
178 | * not voting for this sync site, or the time we actually voted yes, if | |
179 | * non-zero. | |
180 | */ | |
181 | afs_int32 | |
182 | SVOTE_Beacon(struct rx_call * rxcall, afs_int32 astate, | |
183 | afs_int32 astart, struct ubik_version * avers, | |
184 | struct ubik_tid * atid) | |
185 | { | |
186 | afs_int32 otherHost; | |
187 | afs_int32 now; | |
188 | afs_int32 vote; | |
189 | struct rx_connection *aconn; | |
190 | struct rx_peer *rxp; | |
191 | struct ubik_server *ts; | |
192 | int isClone = 0; | |
193 | char hoststr[16]; | |
194 | ||
195 | if (rxcall) { /* caller's host */ | |
196 | aconn = rx_ConnectionOf(rxcall); | |
197 | rxp = rx_PeerOf(aconn); | |
198 | otherHost = rx_HostOf(rxp); | |
199 | ||
200 | /* get the primary interface address for this host. */ | |
201 | /* This is the identifier that ubik uses. */ | |
202 | otherHost = ubikGetPrimaryInterfaceAddr(otherHost); | |
203 | if (!otherHost) { | |
204 | ViceLog(5, ("Received beacon from unknown host %s\n", | |
205 | afs_inet_ntoa_r(rx_HostOf(rxp), hoststr))); | |
206 | return 0; /* I don't know about you: vote no */ | |
207 | } | |
208 | for (ts = ubik_servers; ts; ts = ts->next) { | |
209 | if (ts->addr[0] == otherHost) | |
210 | break; | |
211 | } | |
212 | if (!ts) | |
213 | ViceLog(5, ("Unknown host %x has sent a beacon\n", otherHost)); | |
214 | if (ts && ts->isClone) | |
215 | isClone = 1; | |
216 | } else { | |
217 | otherHost = ubik_host[0]; /* this host */ | |
218 | isClone = amIClone; | |
219 | } | |
220 | ||
221 | ViceLog(5, ("Received beacon type %d from host %s\n", astate, | |
222 | afs_inet_ntoa_r(otherHost, hoststr))); | |
223 | ||
224 | /* compute the lowest server we've heard from. We'll try to only vote for | |
225 | * this dude if we don't already have a synchronization site. Also, don't | |
226 | * let a very old lowestHost confusing things forever. We pick a new | |
227 | * lowestHost after BIGTIME seconds to limit the damage if this host | |
228 | * actually crashes. Finally, we also count in this computation: don't | |
229 | * pick someone else if we're even better! | |
230 | * | |
231 | * Note that the test below must be <=, not <, so that we keep refreshing | |
232 | * lowestTime. Otherwise it will look like we haven't heard from | |
233 | * lowestHost in a while and another host could slip in. */ | |
234 | ||
235 | ||
236 | /* First compute the lowest host we've heard from, whether we want them | |
237 | * for a sync site or not. If we haven't heard from a site in BIGTIME | |
238 | * seconds, we ignore its presence in lowestHost: it may have crashed. | |
239 | * Note that we don't ever let anyone appear in our lowestHost if we're | |
240 | * lower than them, 'cause we know we're up. */ | |
241 | /* But do not consider clones for lowesHost since they never may become | |
242 | * sync site */ | |
243 | UBIK_VOTE_LOCK; | |
244 | now = FT_ApproxTime(); /* close to current time */ | |
245 | if (!isClone | |
246 | && (ntohl((afs_uint32)otherHost) <= ntohl((afs_uint32)vote_globals.lowestHost) | |
247 | || vote_globals.lowestTime + BIGTIME < now)) { | |
248 | vote_globals.lowestTime = now; | |
249 | vote_globals.lowestHost = otherHost; | |
250 | } | |
251 | /* why do we need this next check? Consider the case where each of two | |
252 | * servers decides the other is lowestHost. Each stops sending beacons | |
253 | * 'cause the other is there. Not obvious that this process terminates: | |
254 | * i.e. each guy could restart procedure and again think other side is | |
255 | * lowest. Need to prove: if one guy in the system is lowest and knows | |
256 | * he's lowest, these loops don't occur. because if someone knows he's | |
257 | * lowest, he will send out beacons telling others to vote for him. */ | |
258 | if (!amIClone | |
259 | && (ntohl((afs_uint32) ubik_host[0]) <= ntohl((afs_uint32)vote_globals.lowestHost) | |
260 | || vote_globals.lowestTime + BIGTIME < now)) { | |
261 | vote_globals.lowestTime = now; | |
262 | vote_globals.lowestHost = ubik_host[0]; | |
263 | } | |
264 | ||
265 | /* tell if we've heard from a sync site recently (even if we're not voting | |
266 | * for this dude yet). After a while, time the guy out. */ | |
267 | if (astate) { /* this guy is a sync site */ | |
268 | vote_globals.syncHost = otherHost; | |
269 | vote_globals.syncTime = now; | |
270 | } else if (vote_globals.syncTime + BIGTIME < now) { | |
271 | if (vote_globals.syncHost) { | |
272 | ViceLog(5, ("Ubik: Lost contact with sync-site %s (NOT in quorum)\n", | |
273 | afs_inet_ntoa_r(vote_globals.syncHost, hoststr))); | |
274 | } | |
275 | vote_globals.syncHost = 0; | |
276 | } | |
277 | ||
278 | /* decide how to vote */ | |
279 | vote = 0; /* start off voting no */ | |
280 | ||
281 | /* if we this guy isn't a sync site, we don't really have to vote for him. | |
282 | * We get to apply some heuristics to try to avoid weird oscillation sates | |
283 | * in the voting procedure. */ | |
284 | if (astate == 0) { | |
285 | /* in here only if this guy doesn't claim to be a sync site */ | |
286 | ||
287 | /* lowestHost is also trying for our votes, then just say no. */ | |
288 | if (ntohl(vote_globals.lowestHost) != ntohl(otherHost)) { | |
289 | goto done_zero; | |
290 | } | |
291 | ||
292 | /* someone else *is* a sync site, just say no */ | |
293 | if (vote_globals.syncHost && vote_globals.syncHost != otherHost) | |
294 | goto done_zero; | |
295 | } else if (vote_globals.lastYesHost == 0xffffffff && otherHost == ubik_host[0]) { | |
296 | /* fast startup if this is the only non-clone */ | |
297 | int i = 0; | |
298 | for (ts = ubik_servers; ts; ts = ts->next) { | |
299 | if (ts->addr[0] == otherHost) | |
300 | continue; | |
301 | if (!ts->isClone) | |
302 | i++; | |
303 | } | |
304 | if (!i) | |
305 | vote_globals.lastYesHost = otherHost; | |
306 | } | |
307 | ||
308 | ||
309 | if (isClone) | |
310 | goto done_zero; /* clone never can become sync site */ | |
311 | ||
312 | /* Don't promise sync site support to more than one host every BIGTIME | |
313 | * seconds. This is the heart of our invariants in this system. */ | |
314 | if (vote_globals.ubik_lastYesTime + BIGTIME < now || otherHost == vote_globals.lastYesHost) { | |
315 | if ((vote_globals.ubik_lastYesTime + BIGTIME < now) || (otherHost != vote_globals.lastYesHost) | |
316 | || (vote_globals.lastYesState != astate)) { | |
317 | /* A new vote or a change in the vote or changed quorum */ | |
318 | ViceLog(5, ("Ubik: vote 'yes' for %s %s\n", | |
319 | afs_inet_ntoa_r(otherHost, hoststr), | |
320 | (astate ? "(in quorum)" : "(NOT in quorum)"))); | |
321 | } | |
322 | ||
323 | vote = now; /* vote yes */ | |
324 | vote_globals.ubik_lastYesTime = now; /* remember when we voted yes */ | |
325 | vote_globals.lastYesClaim = astart; /* remember for computing when sync site expires */ | |
326 | vote_globals.lastYesHost = otherHost; /* and who for */ | |
327 | vote_globals.lastYesState = astate; /* remember if site is a sync site */ | |
328 | vote_globals.ubik_dbVersion = *avers; /* resync value */ | |
329 | vote_globals.ubik_dbTid = *atid; /* transaction id, if any, of active trans */ | |
330 | UBIK_VOTE_UNLOCK; | |
331 | DBHOLD(ubik_dbase); | |
332 | urecovery_CheckTid(atid, 0); /* check if current write trans needs aborted */ | |
333 | DBRELE(ubik_dbase); | |
334 | } else { | |
335 | UBIK_VOTE_UNLOCK; | |
336 | } | |
337 | return vote; | |
338 | done_zero: | |
339 | UBIK_VOTE_UNLOCK; | |
340 | return 0; | |
341 | } | |
342 | ||
343 | /*! | |
344 | * \brief Handle per-server debug command, where 0 is the first server. | |
345 | * | |
346 | * Basic network debugging hooks. | |
347 | */ | |
348 | afs_int32 | |
349 | SVOTE_SDebug(struct rx_call * rxcall, afs_int32 awhich, | |
350 | struct ubik_sdebug * aparm) | |
351 | { | |
352 | afs_int32 code, isClone; | |
353 | code = SVOTE_XSDebug(rxcall, awhich, aparm, &isClone); | |
354 | return code; | |
355 | } | |
356 | ||
357 | afs_int32 | |
358 | SVOTE_XSDebug(struct rx_call * rxcall, afs_int32 awhich, | |
359 | struct ubik_sdebug * aparm, afs_int32 * isclone) | |
360 | { | |
361 | struct ubik_server *ts; | |
362 | int i; | |
363 | for (ts = ubik_servers; ts; ts = ts->next) { | |
364 | if (awhich-- == 0) { | |
365 | /* we're done */ | |
366 | aparm->addr = ntohl(ts->addr[0]); /* primary interface */ | |
367 | for (i = 0; i < UBIK_MAX_INTERFACE_ADDR - 1; i++) | |
368 | aparm->altAddr[i] = ntohl(ts->addr[i + 1]); | |
369 | aparm->lastVoteTime = ts->lastVoteTime; | |
370 | aparm->lastBeaconSent = ts->lastBeaconSent; | |
371 | memcpy(&aparm->remoteVersion, &ts->version, | |
372 | sizeof(struct ubik_version)); | |
373 | aparm->lastVote = ts->lastVote; | |
374 | aparm->up = ts->up; | |
375 | aparm->beaconSinceDown = ts->beaconSinceDown; | |
376 | aparm->currentDB = ts->currentDB; | |
377 | *isclone = ts->isClone; | |
378 | return 0; | |
379 | } | |
380 | } | |
381 | return 2; | |
382 | } | |
383 | ||
384 | afs_int32 | |
385 | SVOTE_XDebug(struct rx_call * rxcall, struct ubik_debug * aparm, | |
386 | afs_int32 * isclone) | |
387 | { | |
388 | afs_int32 code; | |
389 | ||
390 | code = SVOTE_Debug(rxcall, aparm); | |
391 | *isclone = amIClone; | |
392 | return code; | |
393 | } | |
394 | ||
395 | /*! | |
396 | * \brief Handle basic network debug command. This is the global state dumper. | |
397 | */ | |
398 | afs_int32 | |
399 | SVOTE_Debug(struct rx_call * rxcall, struct ubik_debug * aparm) | |
400 | { | |
401 | int i; | |
402 | /* fill in the basic debug structure. Note the the RPC protocol transfers, | |
403 | * integers in host order. */ | |
404 | ||
405 | memset(aparm, 0, sizeof(*aparm)); | |
406 | aparm->now = FT_ApproxTime(); | |
407 | aparm->lastYesTime = vote_globals.ubik_lastYesTime; | |
408 | aparm->lastYesHost = ntohl(vote_globals.lastYesHost); | |
409 | aparm->lastYesState = vote_globals.lastYesState; | |
410 | aparm->lastYesClaim = vote_globals.lastYesClaim; | |
411 | aparm->lowestHost = ntohl(vote_globals.lowestHost); | |
412 | aparm->lowestTime = vote_globals.lowestTime; | |
413 | aparm->syncHost = ntohl(vote_globals.syncHost); | |
414 | aparm->syncTime = vote_globals.syncTime; | |
415 | memcpy(&aparm->syncVersion, &vote_globals.ubik_dbVersion, sizeof(struct ubik_version)); | |
416 | memcpy(&aparm->syncTid, &vote_globals.ubik_dbTid, sizeof(struct ubik_tid)); | |
417 | ||
418 | /* fill in all interface addresses of myself in hostbyte order */ | |
419 | for (i = 0; i < UBIK_MAX_INTERFACE_ADDR; i++) | |
420 | aparm->interfaceAddr[i] = ntohl(ubik_host[i]); | |
421 | ||
422 | aparm->amSyncSite = beacon_globals.ubik_amSyncSite; | |
423 | ubeacon_Debug(aparm); | |
424 | ||
425 | udisk_Debug(aparm); | |
426 | ||
427 | ulock_Debug(aparm); | |
428 | ||
429 | /* Get the recovery state. The label of the database may not have | |
430 | * been written yet but set the flag so udebug behavior remains. | |
431 | * Defect 9477. | |
432 | */ | |
433 | aparm->recoveryState = urecovery_state; | |
434 | if ((urecovery_state & UBIK_RECSYNCSITE) | |
435 | && (urecovery_state & UBIK_RECFOUNDDB) | |
436 | && (urecovery_state & UBIK_RECHAVEDB)) { | |
437 | aparm->recoveryState |= UBIK_RECLABELDB; | |
438 | } | |
439 | aparm->activeWrite = (ubik_dbase->flags & DBWRITING); | |
440 | aparm->tidCounter = ubik_dbase->tidCounter; | |
441 | ||
442 | if (ubik_currentTrans) { | |
443 | aparm->currentTrans = 1; | |
444 | aparm->writeTrans = 1; | |
445 | } else { | |
446 | aparm->currentTrans = 0; | |
447 | } | |
448 | ||
449 | aparm->epochTime = version_globals.ubik_epochTime; | |
450 | ||
451 | return 0; | |
452 | } | |
453 | ||
454 | afs_int32 | |
455 | SVOTE_SDebugOld(struct rx_call * rxcall, afs_int32 awhich, | |
456 | struct ubik_sdebug_old * aparm) | |
457 | { | |
458 | struct ubik_server *ts; | |
459 | ||
460 | for (ts = ubik_servers; ts; ts = ts->next) { | |
461 | if (awhich-- == 0) { | |
462 | /* we're done */ | |
463 | aparm->addr = ntohl(ts->addr[0]); /* primary interface */ | |
464 | aparm->lastVoteTime = ts->lastVoteTime; | |
465 | aparm->lastBeaconSent = ts->lastBeaconSent; | |
466 | memcpy(&aparm->remoteVersion, &ts->version, | |
467 | sizeof(struct ubik_version)); | |
468 | aparm->lastVote = ts->lastVote; | |
469 | aparm->up = ts->up; | |
470 | aparm->beaconSinceDown = ts->beaconSinceDown; | |
471 | aparm->currentDB = ts->currentDB; | |
472 | return 0; | |
473 | } | |
474 | } | |
475 | return 2; | |
476 | } | |
477 | ||
478 | ||
479 | /*! | |
480 | * \brief Handle basic network debug command. This is the global state dumper. | |
481 | */ | |
482 | afs_int32 | |
483 | SVOTE_DebugOld(struct rx_call * rxcall, | |
484 | struct ubik_debug_old * aparm) | |
485 | { | |
486 | ||
487 | /* fill in the basic debug structure. Note the the RPC protocol transfers, | |
488 | * integers in host order. */ | |
489 | ||
490 | aparm->now = FT_ApproxTime(); | |
491 | aparm->lastYesTime = vote_globals.ubik_lastYesTime; | |
492 | aparm->lastYesHost = ntohl(vote_globals.lastYesHost); | |
493 | aparm->lastYesState = vote_globals.lastYesState; | |
494 | aparm->lastYesClaim = vote_globals.lastYesClaim; | |
495 | aparm->lowestHost = ntohl(vote_globals.lowestHost); | |
496 | aparm->lowestTime = vote_globals.lowestTime; | |
497 | aparm->syncHost = ntohl(vote_globals.syncHost); | |
498 | aparm->syncTime = vote_globals.syncTime; | |
499 | memcpy(&aparm->syncVersion, &vote_globals.ubik_dbVersion, sizeof(struct ubik_version)); | |
500 | memcpy(&aparm->syncTid, &vote_globals.ubik_dbTid, sizeof(struct ubik_tid)); | |
501 | ||
502 | aparm->amSyncSite = beacon_globals.ubik_amSyncSite; | |
503 | ubeacon_Debug((ubik_debug *)aparm); | |
504 | ||
505 | udisk_Debug((ubik_debug *)aparm); | |
506 | ||
507 | ulock_Debug((ubik_debug *)aparm); | |
508 | ||
509 | /* Get the recovery state. The label of the database may not have | |
510 | * been written yet but set the flag so udebug behavior remains. | |
511 | * Defect 9477. | |
512 | */ | |
513 | aparm->recoveryState = urecovery_state; | |
514 | if ((urecovery_state & UBIK_RECSYNCSITE) | |
515 | && (urecovery_state & UBIK_RECFOUNDDB) | |
516 | && (urecovery_state & UBIK_RECHAVEDB)) { | |
517 | aparm->recoveryState |= UBIK_RECLABELDB; | |
518 | } | |
519 | aparm->activeWrite = (ubik_dbase->flags & DBWRITING); | |
520 | aparm->tidCounter = ubik_dbase->tidCounter; | |
521 | ||
522 | if (ubik_currentTrans) { | |
523 | aparm->currentTrans = 1; | |
524 | aparm->writeTrans = 1; | |
525 | } else { | |
526 | aparm->currentTrans = 0; | |
527 | } | |
528 | ||
529 | aparm->epochTime = version_globals.ubik_epochTime; | |
530 | ||
531 | return 0; | |
532 | } | |
533 | ||
534 | ||
535 | /*! | |
536 | * \brief Get the sync site; called by remote servers to find where they should go. | |
537 | */ | |
538 | afs_int32 | |
539 | SVOTE_GetSyncSite(struct rx_call * rxcall, | |
540 | afs_int32 * ahost) | |
541 | { | |
542 | afs_int32 temp; | |
543 | ||
544 | temp = uvote_GetSyncSite(); | |
545 | *ahost = ntohl(temp); | |
546 | return 0; | |
547 | } | |
548 | ||
549 | /*! | |
550 | * \brief Called once/run to init the vote module | |
551 | */ | |
552 | int | |
553 | uvote_Init(void) | |
554 | { | |
555 | UBIK_VOTE_LOCK; | |
556 | /* pretend we just voted for someone else, since we just restarted */ | |
557 | vote_globals.ubik_lastYesTime = FT_ApproxTime(); | |
558 | ||
559 | /* Initialize globals */ | |
560 | vote_globals.lastYesHost = 0xffffffff; | |
561 | vote_globals.lastYesClaim = 0; | |
562 | vote_globals.lastYesState = 0; | |
563 | vote_globals.lowestTime = 0; | |
564 | vote_globals.lowestHost = 0xffffffff; | |
565 | vote_globals.syncTime = 0; | |
566 | vote_globals.syncHost = 0; | |
567 | UBIK_VOTE_UNLOCK; | |
568 | ||
569 | return 0; | |
570 | } | |
571 | ||
572 | void | |
573 | uvote_set_dbVersion(struct ubik_version version) { | |
574 | UBIK_VOTE_LOCK; | |
575 | vote_globals.ubik_dbVersion = version; | |
576 | UBIK_VOTE_UNLOCK; | |
577 | } | |
578 | ||
579 | /* Compare given version to current DB version. Return true if equal. */ | |
580 | int | |
581 | uvote_eq_dbVersion(struct ubik_version version) { | |
582 | int ret = 0; | |
583 | ||
584 | UBIK_VOTE_LOCK; | |
585 | if (vote_globals.ubik_dbVersion.epoch == version.epoch && vote_globals.ubik_dbVersion.counter == version.counter) { | |
586 | ret = 1; | |
587 | } | |
588 | UBIK_VOTE_UNLOCK; | |
589 | return ret; | |
590 | } | |
591 | ||
592 | /*! | |
593 | * \brief Check if there is a sync site and whether we have a given db version | |
594 | * | |
595 | * \return 1 if there is a valid sync site, and the given db version matches the sync site's | |
596 | */ | |
597 | ||
598 | int | |
599 | uvote_HaveSyncAndVersion(struct ubik_version version) | |
600 | { | |
601 | afs_int32 now; | |
602 | int code; | |
603 | ||
604 | UBIK_VOTE_LOCK; | |
605 | now = FT_ApproxTime(); | |
606 | if (!vote_globals.lastYesState || (SMALLTIME + vote_globals.lastYesClaim < now) || | |
607 | vote_globals.ubik_dbVersion.epoch != version.epoch || | |
608 | vote_globals.ubik_dbVersion.counter != version.counter) | |
609 | code = 0; | |
610 | else | |
611 | code = 1; | |
612 | UBIK_VOTE_UNLOCK; | |
613 | return code; | |
614 | } |