Import Upstream version 1.8.5
[hcoop/debian/openafs.git] / src / ubik / ubikclient.c
1 /*
2 * Copyright 2000, International Business Machines Corporation and others.
3 * All Rights Reserved.
4 *
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
8 */
9
10 #include <afsconfig.h>
11 #include <afs/param.h>
12 #include <afs/stds.h>
13
14 #include <roken.h>
15 #include <afs/opr.h>
16 #ifdef AFS_PTHREAD_ENV
17 # include <opr/lock.h>
18 #endif
19
20 #ifdef IGNORE_SOME_GCC_WARNINGS
21 # pragma GCC diagnostic warning "-Wstrict-prototypes"
22 #endif
23
24 #include <afs/pthread_glock.h>
25 #include <rx/xdr.h>
26 #include <rx/rx.h>
27 #include <lock.h>
28 #include <afs/rxgen_consts.h>
29 #define UBIK_LEGACY_CALLITER
30 #include "ubik.h"
31
32 short ubik_initializationState; /*!< initial state is zero */
33
34
35 /*!
36 * \brief Parse list for clients.
37 */
38 int
39 ubik_ParseClientList(int argc, char **argv, afs_uint32 * aothers)
40 {
41 afs_int32 i;
42 char *tp;
43 struct hostent *th;
44 afs_uint32 temp;
45 afs_int32 counter;
46 int inServer;
47
48 inServer = 0; /* haven't seen -servers yet */
49 counter = 0;
50 for (i = 1; i < argc; i++) {
51 /* look for -servers argument */
52 tp = argv[i];
53
54 if (inServer) {
55 if (*tp == '-')
56 break; /* done */
57 /* otherwise this is a new host name */
58 LOCK_GLOBAL_MUTEX;
59 th = gethostbyname(tp);
60 if (!th) {
61 UNLOCK_GLOBAL_MUTEX;
62 return UBADHOST;
63 }
64 memmove((void *)&temp, (const void *)th->h_addr,
65 sizeof(afs_int32));
66 UNLOCK_GLOBAL_MUTEX;
67 if (counter++ >= MAXSERVERS)
68 return UNHOSTS;
69 *aothers++ = temp;
70 } else {
71 /* haven't seen a -server yet */
72 if (!strcmp(tp, "-servers")) {
73 inServer = 1;
74 }
75 }
76 }
77 if (!inServer) {
78 /* never saw a -server */
79 return UNOENT;
80 }
81 if (counter < MAXSERVERS)
82 *aothers++ = 0; /* null terminate if room */
83 return 0;
84 }
85
86 #ifdef AFS_PTHREAD_ENV
87 #include <pthread.h>
88
89 static pthread_once_t random_once = PTHREAD_ONCE_INIT;
90 static int called_afs_random_once;
91 static pthread_key_t random_number_key;
92
93 static void
94 afs_random_once(void)
95 {
96 opr_Verify(pthread_key_create(&random_number_key, NULL) == 0);
97 called_afs_random_once = 1;
98 }
99
100 #endif
101
102 /*!
103 * \brief use time and pid to try to get some initial randomness.
104 */
105 #define ranstage(x) (x)= (afs_uint32) (3141592621U*((afs_uint32)x)+1)
106
107 /*!
108 * \brief Random number generator and constants from KnuthV2 2d ed, p170
109 *
110 * Rules: \n
111 * X = (aX + c) % m \n
112 * m is a power of two \n
113 * a % 8 is 5 \n
114 * a is 0.73m should be 0.01m .. 0.99m \n
115 * c is more or less immaterial. 1 or a is suggested. \n
116 *
117 * NB: LOW ORDER BITS are not very random. To get small random numbers,
118 * treat result as <1, with implied binary point, and multiply by
119 * desired modulus.
120 *
121 * NB: Has to be unsigned, since shifts on signed quantities may preserve
122 * the sign bit.
123 *
124 * In this case, m == 2^32, the mod operation is implicit. a == pi, which
125 * is used because it has some interesting characteristics (lacks any
126 * interesting bit-patterns).
127 */
128 unsigned int
129 afs_random(void)
130 {
131 #ifdef AFS_PTHREAD_ENV
132 afs_uint32 state;
133
134 if (!called_afs_random_once)
135 pthread_once(&random_once, afs_random_once);
136
137 state = (uintptr_t) pthread_getspecific(random_number_key);
138 #else
139 static afs_uint32 state = 0;
140 #endif
141
142 if (!state) {
143 int i;
144 state = time(0) + getpid();
145 for (i = 0; i < 15; i++) {
146 ranstage(state);
147 }
148 }
149
150 ranstage(state);
151 #ifdef AFS_PTHREAD_ENV
152 pthread_setspecific(random_number_key, (const void *)(uintptr_t)state);
153 #endif
154 return (state);
155
156 }
157
158 /*!
159 * \brief Returns int 0..14 using the high bits of a pseudo-random number instead of
160 * the low bits, as the low bits are "less random" than the high ones...
161 *
162 * \todo Slight roundoff error exists, an excercise for the reader.
163 *
164 * Need to multiply by something with lots of ones in it, so multiply by
165 * 8 or 16 is right out.
166 */
167 static unsigned int
168 afs_randomMod15(void)
169 {
170 afs_uint32 temp;
171
172 temp = afs_random() >> 4;
173 temp = (temp * 15) >> 28;
174
175 return temp;
176 }
177
178 #ifdef abs
179 #undef abs
180 #endif /* abs */
181 #define abs(a) ((a) < 0 ? -1*(a) : (a))
182 int
183 ubik_ClientInit(struct rx_connection **serverconns,
184 struct ubik_client **aclient)
185 {
186 int i, j;
187 int count;
188 int offset;
189 struct ubik_client *tc;
190
191 initialize_U_error_table();
192
193 if (*aclient) { /* the application is doing a re-initialization */
194 LOCK_UBIK_CLIENT((*aclient));
195 /* this is an important defensive check */
196 if (!((*aclient)->initializationState)) {
197 UNLOCK_UBIK_CLIENT((*aclient));
198 return UREINITIALIZE;
199 }
200
201 /* release all existing connections */
202 for (tc = *aclient, i = 0; i < MAXSERVERS; i++) {
203 struct rx_connection *rxConn = ubik_GetRPCConn(tc, i);
204 if (rxConn == 0)
205 break;
206 #ifdef AFS_PTHREAD_ENV
207 rx_ReleaseCachedConnection(rxConn);
208 #else
209 rx_DestroyConnection(rxConn);
210 #endif
211 }
212 UNLOCK_UBIK_CLIENT((*aclient));
213 #ifdef AFS_PTHREAD_ENV
214 if (pthread_mutex_destroy(&((*aclient)->cm)))
215 return UMUTEXDESTROY;
216 #endif
217 } else {
218 tc = malloc(sizeof(struct ubik_client));
219 }
220 if (tc == NULL)
221 return UNOMEM;
222 memset((void *)tc, 0, sizeof(*tc));
223 #ifdef AFS_PTHREAD_ENV
224 if (pthread_mutex_init(&(tc->cm), (const pthread_mutexattr_t *)0)) {
225 free(tc);
226 return UMUTEXINIT;
227 }
228 #endif
229 tc->initializationState = ++ubik_initializationState;
230
231 /* first count the # of server conns so we can randomize properly */
232 count = 0;
233 for (i = 0; i < MAXSERVERS; i++) {
234 if (serverconns[i] == (struct rx_connection *)0)
235 break;
236 count++;
237 }
238
239 /* here count is the # of servers we're actually passed in. Compute
240 * offset, a number between 0..count-1, where we'll start copying from the
241 * client-provided array. */
242 for (i = 0; i < count; i++) {
243 offset = afs_randomMod15() % count;
244 for (j = abs(offset); j < 2 * count; j++) {
245 if (!tc->conns[abs(j % count)]) {
246 tc->conns[abs(j % count)] = serverconns[i];
247 break;
248 }
249 }
250 }
251
252 *aclient = tc;
253 return 0;
254 }
255
256 /*!
257 * \brief Destroy an ubik connection.
258 *
259 * It calls rx to destroy the component rx connections, then frees the ubik
260 * connection structure.
261 */
262 afs_int32
263 ubik_ClientDestroy(struct ubik_client * aclient)
264 {
265 int c;
266
267 if (aclient == 0)
268 return 0;
269 LOCK_UBIK_CLIENT(aclient);
270 for (c = 0; c < MAXSERVERS; c++) {
271 struct rx_connection *rxConn = ubik_GetRPCConn(aclient, c);
272 if (rxConn == 0)
273 break;
274 #ifdef AFS_PTHREAD_ENV
275 rx_ReleaseCachedConnection(rxConn);
276 #else
277 rx_DestroyConnection(rxConn);
278 #endif
279 }
280 aclient->initializationState = 0; /* client in not initialized */
281 UNLOCK_UBIK_CLIENT(aclient);
282 #ifdef AFS_PTHREAD_ENV
283 pthread_mutex_destroy(&(aclient->cm)); /* ignore failure */
284 #endif
285 free(aclient);
286 return 0;
287 }
288
289 /*!
290 * \brief So that intermittent failures that cause connections to die
291 * don't kill whole ubik connection, refresh them when the connection is in
292 * error.
293 */
294 struct rx_connection *
295 ubik_RefreshConn(struct rx_connection *tc)
296 {
297 afs_uint32 host;
298 u_short port;
299 u_short service;
300 struct rx_securityClass *sc;
301 int si;
302 struct rx_connection *newTc;
303
304 host = rx_HostOf(rx_PeerOf(tc));
305 port = rx_PortOf(rx_PeerOf(tc));
306 service = rx_ServiceIdOf(tc);
307 sc = rx_SecurityObjectOf(tc);
308 si = rx_SecurityClassOf(tc);
309
310 /*
311 * destroy old one after creating new one so that refCount on security
312 * object cannot reach zero.
313 */
314 newTc = rx_NewConnection(host, port, service, sc, si);
315 rx_DestroyConnection(tc);
316 return newTc;
317 }
318
319 #ifdef AFS_PTHREAD_ENV
320
321 pthread_once_t ubik_client_once = PTHREAD_ONCE_INIT;
322 pthread_mutex_t ubik_client_mutex;
323 #define LOCK_UCLNT_CACHE do { \
324 opr_Verify(pthread_once(&ubik_client_once, ubik_client_init_mutex) == 0); \
325 MUTEX_ENTER(&ubik_client_mutex); \
326 } while (0)
327 #define UNLOCK_UCLNT_CACHE MUTEX_EXIT(&ubik_client_mutex)
328
329 void
330 ubik_client_init_mutex(void)
331 {
332 MUTEX_INIT(&ubik_client_mutex, "client init", MUTEX_DEFAULT, 0);
333 }
334
335 #else
336
337 #define LOCK_UCLNT_CACHE
338 #define UNLOCK_UCLNT_CACHE
339
340 #endif
341
342 #define SYNCCOUNT 10
343 static int *calls_needsync[SYNCCOUNT]; /* proc calls that need the sync site */
344 static int synccount = 0;
345
346
347
348 /*!
349 * \brief Call this after getting back a #UNOTSYNC.
350 *
351 * \note Getting a #UNOTSYNC error code back does \b not guarantee
352 * that there is a sync site yet elected. However, if there is a sync
353 * site out there somewhere, and you're trying an operation that
354 * requires a sync site, ubik will return #UNOTSYNC, indicating the
355 * operation won't work until you find a sync site
356 */
357 static int
358 try_GetSyncSite(struct ubik_client *aclient, afs_int32 apos)
359 {
360 struct rx_peer *rxp;
361 afs_int32 code;
362 int i;
363 afs_int32 thisHost, newHost;
364 struct rx_connection *tc;
365 short origLevel;
366
367 origLevel = aclient->initializationState;
368
369 /* get this conn */
370 tc = aclient->conns[apos];
371 if (tc && rx_ConnError(tc)) {
372 aclient->conns[apos] = (tc = ubik_RefreshConn(tc));
373 }
374 if (!tc) {
375 return -1;
376 }
377
378 /* now see if we can find the sync site host */
379 code = VOTE_GetSyncSite(tc, &newHost);
380 if (aclient->initializationState != origLevel) {
381 return -1; /* somebody did a ubik_ClientInit */
382 }
383
384 if (!code && newHost) {
385 newHost = htonl(newHost); /* convert back to network order */
386
387 /*
388 * position count at the appropriate slot in the client
389 * structure and retry. If we can't find in slot, we'll just
390 * continue through the whole list
391 */
392 for (i = 0; i < MAXSERVERS; i++) {
393 rxp = rx_PeerOf(aclient->conns[i]);
394 thisHost = rx_HostOf(rxp);
395 if (!thisHost) {
396 return -1;
397 } else if (thisHost == newHost) {
398 return i; /* we were told to use this one */
399 }
400 }
401 }
402 return -1;
403 }
404
405 #define NEED_LOCK 1
406 #define NO_LOCK 0
407
408 /*!
409 * \brief Create an internal version of ubik_CallIter that takes an additional
410 * parameter - to indicate whether the ubik client handle has already
411 * been locked.
412 */
413 static afs_int32
414 CallIter(int (*aproc) (), struct ubik_client *aclient,
415 afs_int32 aflags, int *apos, long p1, long p2, long p3, long p4,
416 long p5, long p6, long p7, long p8, long p9, long p10, long p11,
417 long p12, long p13, long p14, long p15, long p16, int needlock)
418 {
419 afs_int32 code;
420 struct rx_connection *tc;
421 short origLevel;
422
423 if (needlock) {
424 LOCK_UBIK_CLIENT(aclient);
425 }
426 origLevel = aclient->initializationState;
427
428 code = UNOSERVERS;
429
430 while (*apos < MAXSERVERS) {
431 /* tc is the next conn to try */
432 tc = aclient->conns[*apos];
433 if (!tc)
434 goto errout;
435
436 if (rx_ConnError(tc)) {
437 tc = ubik_RefreshConn(tc);
438 aclient->conns[*apos] = tc;
439 }
440
441 if ((aflags & UPUBIKONLY) && (aclient->states[*apos] & CFLastFailed)) {
442 (*apos)++; /* try another one if this server is down */
443 } else {
444 break; /* this is the desired path */
445 }
446 }
447 if (*apos >= MAXSERVERS)
448 goto errout;
449
450 code =
451 (*aproc) (tc, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13,
452 p14, p15, p16);
453 if (aclient->initializationState != origLevel)
454 /* somebody did a ubik_ClientInit */
455 goto errout;
456
457 /* what should I do in case of UNOQUORUM ? */
458 if (code < 0) {
459 aclient->states[*apos] |= CFLastFailed; /* network errors */
460 } else {
461 /* either misc ubik code, or misc application code or success. */
462 aclient->states[*apos] &= ~CFLastFailed; /* operation worked */
463 }
464
465 (*apos)++;
466 errout:
467 if (needlock) {
468 UNLOCK_UBIK_CLIENT(aclient);
469 }
470 return code;
471 }
472
473 /*!
474 * \brief This is part of an iterator. It doesn't handle finding sync sites.
475 */
476 afs_int32
477 ubik_CallIter(int (*aproc) (), struct ubik_client *aclient,
478 afs_int32 aflags, int *apos, long p1, long p2,
479 long p3, long p4, long p5, long p6, long p7,
480 long p8, long p9, long p10, long p11, long p12,
481 long p13, long p14, long p15, long p16)
482 {
483 return CallIter(aproc, aclient, aflags, apos, p1, p2, p3, p4, p5, p6, p7,
484 p8, p9, p10, p11, p12, p13, p14, p15, p16, NEED_LOCK);
485 }
486
487 /*!
488 * \brief Call this instead of stub and we'll guarantee to find a host that's up.
489 *
490 * \todo In the future, we should also put in a protocol to find the sync site.
491 */
492 afs_int32
493 ubik_Call_New(int (*aproc) (), struct ubik_client *aclient,
494 afs_int32 aflags, long p1, long p2, long p3, long p4, long p5,
495 long p6, long p7, long p8, long p9, long p10, long p11,
496 long p12, long p13, long p14, long p15, long p16)
497 {
498 afs_int32 code, rcode;
499 afs_int32 count;
500 afs_int32 temp;
501 int pass;
502 int stepBack;
503 short origLevel;
504
505 LOCK_UBIK_CLIENT(aclient);
506 restart:
507 rcode = UNOSERVERS;
508 origLevel = aclient->initializationState;
509
510 /* Do two passes. First pass only checks servers known running */
511 for (aflags |= UPUBIKONLY, pass = 0; pass < 2;
512 pass++, aflags &= ~UPUBIKONLY) {
513 stepBack = 0;
514 count = 0;
515 while (1) {
516 code =
517 CallIter(aproc, aclient, aflags, &count, p1, p2, p3, p4, p5,
518 p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16,
519 NO_LOCK);
520 if (code && (aclient->initializationState != origLevel)) {
521 goto restart;
522 }
523 if (code == UNOSERVERS) {
524 break;
525 }
526 rcode = code; /* remember code from last good call */
527
528 if (code == UNOTSYNC) { /* means this requires a sync site */
529 if (aclient->conns[3]) { /* don't bother unless 4 or more srv */
530 temp = try_GetSyncSite(aclient, count);
531 if (aclient->initializationState != origLevel) {
532 goto restart; /* somebody did a ubik_ClientInit */
533 }
534 if ((temp >= 0) && ((temp > count) || (stepBack++ <= 2))) {
535 count = temp; /* generally try to make progress */
536 }
537 }
538 } else if ((code >= 0) && (code != UNOQUORUM)) {
539 UNLOCK_UBIK_CLIENT(aclient);
540 return code; /* success or global error condition */
541 }
542 }
543 }
544 UNLOCK_UBIK_CLIENT(aclient);
545 return rcode;
546 }
547
548 /*!
549 * call this instead of stub and we'll guarantee to find a host that's up.
550 *
551 * \todo In the future, we should also put in a protocol to find the sync site.
552 */
553 afs_int32
554 ubik_Call(int (*aproc) (), struct ubik_client *aclient,
555 afs_int32 aflags, long p1, long p2, long p3, long p4,
556 long p5, long p6, long p7, long p8, long p9, long p10,
557 long p11, long p12, long p13, long p14, long p15, long p16)
558 {
559 afs_int32 rcode, code, newHost, thisHost, i, count;
560 int chaseCount, pass, needsync, inlist, j;
561 struct rx_connection *tc;
562 struct rx_peer *rxp;
563 short origLevel;
564
565 if (aflags & UBIK_CALL_NEW)
566 return ubik_Call_New(aproc, aclient, aflags, p1, p2, p3, p4,
567 p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15,
568 p16);
569
570 if (!aclient)
571 return UNOENT;
572 LOCK_UBIK_CLIENT(aclient);
573
574 restart:
575 origLevel = aclient->initializationState;
576 rcode = UNOSERVERS;
577 chaseCount = inlist = needsync = 0;
578
579 LOCK_UCLNT_CACHE;
580 for (j = 0; ((j < SYNCCOUNT) && calls_needsync[j]); j++) {
581 if (calls_needsync[j] == (int *)aproc) {
582 inlist = needsync = 1;
583 break;
584 }
585 }
586 UNLOCK_UCLNT_CACHE;
587 /*
588 * First pass, we try all servers that are up.
589 * Second pass, we try all servers.
590 */
591 for (pass = 0; pass < 2; pass++) { /*p */
592 /* For each entry in our servers list */
593 for (count = 0;; count++) { /*s */
594
595 if (needsync) {
596 /* Need a sync site. Lets try to quickly find it */
597 if (aclient->syncSite) {
598 newHost = aclient->syncSite; /* already in network order */
599 aclient->syncSite = 0; /* Will reset if it works */
600 } else if (aclient->conns[3]) {
601 /* If there are fewer than four db servers in a cell,
602 * there's no point in making the GetSyncSite call.
603 * At best, it's a wash. At worst, it results in more
604 * RPCs than you would otherwise make.
605 */
606 tc = aclient->conns[count];
607 if (tc && rx_ConnError(tc)) {
608 aclient->conns[count] = tc = ubik_RefreshConn(tc);
609 }
610 if (!tc)
611 break;
612 code = VOTE_GetSyncSite(tc, &newHost);
613 if (aclient->initializationState != origLevel)
614 goto restart; /* somebody did a ubik_ClientInit */
615 if (code)
616 newHost = 0;
617 newHost = htonl(newHost); /* convert to network order */
618 } else {
619 newHost = 0;
620 }
621 if (newHost) {
622 /* position count at the appropriate slot in the client
623 * structure and retry. If we can't find in slot, we'll
624 * just continue through the whole list
625 */
626 for (i = 0; i < MAXSERVERS && aclient->conns[i]; i++) {
627 rxp = rx_PeerOf(aclient->conns[i]);
628 thisHost = rx_HostOf(rxp);
629 if (!thisHost)
630 break;
631 if (thisHost == newHost) {
632 if (chaseCount++ > 2)
633 break; /* avoid loop asking */
634 count = i; /* this index is the sync site */
635 break;
636 }
637 }
638 }
639 }
640 /*needsync */
641 tc = aclient->conns[count];
642 if (tc && rx_ConnError(tc)) {
643 aclient->conns[count] = tc = ubik_RefreshConn(tc);
644 }
645 if (!tc)
646 break;
647
648 if ((pass == 0) && (aclient->states[count] & CFLastFailed)) {
649 continue; /* this guy's down */
650 }
651
652 rcode =
653 (*aproc) (tc, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11,
654 p12, p13, p14, p15, p16);
655 if (aclient->initializationState != origLevel) {
656 /* somebody did a ubik_ClientInit */
657 if (rcode)
658 goto restart; /* call failed */
659 else
660 goto done; /* call suceeded */
661 }
662 if (rcode < 0) { /* network errors */
663 aclient->states[count] |= CFLastFailed; /* Mark serer down */
664 } else if (rcode == UNOTSYNC) {
665 needsync = 1;
666 } else if (rcode != UNOQUORUM) {
667 /* either misc ubik code, or misc appl code, or success. */
668 aclient->states[count] &= ~CFLastFailed; /* mark server up */
669 goto done; /* all done */
670 }
671 } /*s */
672 } /*p */
673
674 done:
675 if (needsync) {
676 if (!inlist) { /* Remember proc call that needs sync site */
677 LOCK_UCLNT_CACHE;
678 calls_needsync[synccount % SYNCCOUNT] = (int *)aproc;
679 synccount++;
680 UNLOCK_UCLNT_CACHE;
681 inlist = 1;
682 }
683 if (!rcode) { /* Remember the sync site - cmd successful */
684 rxp = rx_PeerOf(aclient->conns[count]);
685 aclient->syncSite = rx_HostOf(rxp);
686 }
687 }
688 UNLOCK_UBIK_CLIENT(aclient);
689 return rcode;
690 }