src/afs/afs_cbqueue.c

/*
 * Copyright 2000, International Business Machines Corporation and others.
 * All Rights Reserved.
 *
 * This software has been released under the terms of the IBM Public
 * License.  For details, see the LICENSE file in the top-level source
 * directory or online at http://www.openafs.org/dl/license10.html
 */

/*
 * This package is used to actively manage the expiration of callbacks,
 * so that the rest of the cache manager doesn't need to compute
 * whether a callback has expired or not, but can tell with one simple
 * check, that is, whether the CStatd bit is on or off.
 *
 * The base of the hash table moves periodically (every 128 seconds)
 * QueueCallback rarely touches the first 3 slots in the hash table
 * (only when called from CheckCallbacks) since MinTimeOut in
 * viced/callback.c is currently 7 minutes.
 * Therefore, CheckCallbacks should be able to run concurrently with
 * QueueCallback, given the proper locking, of course.
 *
 * Note:
 * 1. CheckCallbacks and BumpBase never run simultaneously.  This is because
 * they are only called from afs_Daemon.  Therefore, base and basetime will
 * always be consistent during CheckCallbacks.
 * 2. cbHashT [base] rarely (if ever) gets stuff queued in it.  The only way
 * that could happen is CheckCallbacks might fencepost and move something in
 * place, or BumpBase might push some stuff up.
 * 3. Hash chains aren't particularly sorted.
 * 4. The file server keeps its callback state around for 3 minutes
 * longer than it promises the cache manager in order to account for
 * clock skew, network delay, and other bogeymen.
 *
 * For now I just use one large lock, which is fine on a uniprocessor,
 * since it's not held during any RPCs or low-priority I/O operations.
 * To make this code MP-fast, you need no more locks than processors,
 * but probably more than one.  In measurements on MP-safe implementations,
 * I have never seen any contention over the xcbhash lock.
 *
 * Incompatible operations:
 * Enqueue and "dequeue of first vcache" in same slot
 * dequeue and "dequeue of preceding vcache" in same slot
 * dequeue and "dequeue of successive vcache" in same slot
 * BumpBase pushing a list and enqueue in the new base slot
 * Two enqueues in same slot
 * more...
 *
 * Certain invariants exist:
 *    1  Callback expiration times granted by a file server will never
 *       decrease for a particular vnode UNLESS a CallBack RPC is invoked
 *       by the server in the interim.
 *    2  A vcache will always expire no sooner than the slot in which it is
 *       currently enqueued.  Callback times granted by the server may
 *       increase, in which case the vcache will be updated in-place.  As a
 *       result, it may expire later than the slot in which it is enqueued.
 *       Not to worry, the CheckCallbacks code will move it if neccessary.
 *       This approach means that busy vnodes won't be continually moved
 *       around within the expiry queue: they are only moved when they
 *       finally advance to the lead bucket.
 *    3  Anything which has a callback on it must be in the expiry
 *       queue.  In AFS 3.3, that means everything but symlinks (which
 *       are immutable), including contents of Read-Only volumes
 *       (which have callbacks by virtue of the whole-volume callback)
 *
 * QueueCallback only checks that its vcache is in the list
 * somewhere, counting on invariant #1 to guarantee that the vcache
 * won't be in a slot later than QueueCallback would otherwise place
 * it. Therefore, whenever we turn off the CStatd bit on the vcache, we
 * *must* remove the vcache from the expiry queue.  Otherwise, we
 * might have missed a CallBack RPC, and a subsequent callback might be
 * granted with a shorter expiration time.
 */
#include <afsconfig.h>
#include "afs/param.h"


#include "afs/sysincludes.h"	/*Standard vendor system headers */
#include "afsincludes.h"	/*AFS-based standard headers */
#include "afs/afs_cbqueue.h"
#include "afs/afs.h"
#include "afs/lock.h"
#include "afs/afs_stats.h"

static unsigned int base = 0;
static unsigned int basetime = 0;
static struct vcache *debugvc;	/* used only for post-mortem debugging */
struct bucket {
    struct afs_q head;
    /*  struct afs_lock lock;  only if you want lots of locks... */
};
static struct bucket cbHashT[CBHTSIZE];
struct afs_lock afs_xcbhash;

/* afs_QueueCallback
 * Takes a write-locked vcache pointer and a callback expiration time
 * as returned by the file server (ie, in units of 128 seconds from "now").
 *
 * Uses the time as an index into a hash table, and inserts the vcache
 * structure into the overflow chain.
 *
 * If the vcache is already on some hash chain, leave it there.
 * CheckCallbacks will get to it eventually.  In the meantime, it
 * might get flushed, or it might already be on the right hash chain,
 * so why bother messing with it now?
 *
 * NOTE: The caller must hold a write lock on afs_xcbhash
 */

void
afs_QueueCallback(struct vcache *avc, unsigned int atime, struct volume *avp)
{
    if (avp && (avp->expireTime < avc->cbExpires))
	avp->expireTime = avc->cbExpires;
    if (!(avc->callsort.next)) {
	atime = (atime + base) % CBHTSIZE;
	QAdd(&(cbHashT[atime].head), &(avc->callsort));
    }

    return;
}				/* afs_QueueCallback */

/* afs_DequeueCallback
 * Takes a write-locked vcache pointer and removes it from the callback
 * hash table, without knowing beforehand which slot it was in.
 *
 * for now, just get a lock on everything when doing the dequeue, don't
 * worry about getting a lock on the individual slot.
 *
 * the only other places that do anything like dequeues are CheckCallbacks
 * and BumpBase.
 *
 * NOTE: The caller must hold a write lock on afs_xcbhash
 */
void
afs_DequeueCallback(struct vcache *avc)
{

    debugvc = avc;
    if (avc->callsort.prev) {
	QRemove(&(avc->callsort));
    } else;			/* must have got dequeued in a race */

    return;
}				/* afs_DequeueCallback */

/* afs_CheckCallbacks
 * called periodically to determine which callbacks are likely to
 * expire in the next n second interval.  Preemptively marks them as
 * expired.  Rehashes items which are now in the wrong hash bucket.
 * Preemptively renew recently-accessed items.  Only removes things
 * from the first and second bucket (as long as secs < 128), and
 * inserts things into other, later buckets.  either need to advance
 * to the second bucket if secs spans two intervals, or else be
 * certain to call afs_CheckCallbacks immediately after calling
 * BumpBase (allows a little more slop but it's ok because file server
 * keeps 3 minutes of slop time)
 *
 * There is a little race between CheckCallbacks and any code which
 * updates cbExpires, always just prior to calling QueueCallback. We
 * don't lock the vcache struct here (can't, or we'd risk deadlock),
 * so GetVCache (for example) may update cbExpires before or after #1
 * below.  If before, CheckCallbacks moves this entry to its proper
 * slot.  If after, GetVCache blocks in the call to QueueCallbacks,
 * this code dequeues the vcache, and then QueueCallbacks re-enqueues it.
 *
 * XXX to avoid the race, make QueueCallback take the "real" time
 * and update cbExpires under the xcbhash lock.
 *
 * NB #1: There's a little optimization here: if I go to invalidate a
 * RO vcache or volume, first check to see if the server is down.  If
 * it _is_, don't invalidate it, cuz we might just as well keep using
 * it.  Possibly, we could do the same thing for items in RW volumes,
 * but that bears some drinking about.
 *
 * Don't really need to invalidate the hints, we could just wait to see if
 * the dv has changed after a subsequent FetchStatus, but this is safer.
 */

/* Sanity check on the callback queue. Allow for slop in the computation. */
#if defined(AFS_LINUX22_ENV)
#define CBQ_LIMIT (afs_maxvcount + 10)
#else
#define CBQ_LIMIT (afs_cacheStats + afs_stats_cmperf.vcacheXAllocs + 10)
#endif

void
afs_CheckCallbacks(unsigned int secs)
{
    struct vcache *tvc;
    struct afs_q *tq;
    struct afs_q *uq;
    afs_uint32 now;
    struct volume *tvp;
    int safety;

    ObtainWriteLock(&afs_xcbhash, 85);	/* pretty likely I'm going to remove something */
    now = osi_Time();
    for (safety = 0, tq = cbHashT[base].head.prev;
	 (safety <= CBQ_LIMIT) && (tq != &(cbHashT[base].head));
	 tq = uq, safety++) {

	uq = QPrev(tq);
	tvc = CBQTOV(tq);
	if (tvc->cbExpires < now + secs) {	/* race #1 here */
	    /* Get the volume, and if its callback expiration time is more than secs
	     * seconds into the future, update this vcache entry and requeue it below
	     */
	    if ((tvc->f.states & CRO)
		&& (tvp = afs_FindVolume(&(tvc->f.fid), READ_LOCK))) {
		if (tvp->expireTime > now + secs) {
		    tvc->cbExpires = tvp->expireTime;	/* XXX race here */
		} else {
		    int i;
		    for (i = 0; i < AFS_MAXHOSTS && tvp->serverHost[i]; i++) {
			if (!(tvp->serverHost[i]->flags & SRVR_ISDOWN)) {
			    /* What about locking xvcache or vrefcount++ or
			     * write locking tvc? */
			    afs_StaleVCacheFlags(tvc, AFS_STALEVC_CBLOCKED |
				AFS_STALEVC_SKIP_DNLC_FOR_INIT_FLUSHED,
				CMValid | CUnique);
			    tvc->dchint = NULL;	/*invalidate em */
			    afs_ResetVolumeInfo(tvp);
			    break;
			}
		    }
		}
		afs_PutVolume(tvp, READ_LOCK);
	    } else {
		/* Do I need to worry about things like execsorwriters?
		 * What about locking xvcache or vrefcount++ or write locking tvc?
		 */
		afs_StaleVCacheFlags(tvc, AFS_STALEVC_CBLOCKED |
				     AFS_STALEVC_SKIP_DNLC_FOR_INIT_FLUSHED,
				     CMValid | CUnique);
	    }
	}

	if ((tvc->cbExpires > basetime) && CBHash(tvc->cbExpires - basetime)) {
	    /* it's been renewed on us.  Have to be careful not to put it back
	     * into this slot, or we may never get out of here.
	     */
	    int slot;
	    slot = (CBHash(tvc->cbExpires - basetime) + base) % CBHTSIZE;
	    if (slot != base) {
		if (QPrev(tq))
		    QRemove(&(tvc->callsort));
		QAdd(&(cbHashT[slot].head), &(tvc->callsort));
		/* XXX remember to update volume expiration time */
		/* -- not needed for correctness, though */
	    }
	}
    }

    if (safety > CBQ_LIMIT) {
	afs_stats_cmperf.cbloops++;
	if (afs_paniconwarn)
	    osi_Panic("CheckCallbacks");

	afs_warn
	    ("AFS Internal Error (minor): please contact AFS Product Support.\n");
	ReleaseWriteLock(&afs_xcbhash);
	afs_FlushCBs();
	return;
    } else
	ReleaseWriteLock(&afs_xcbhash);


/* XXX future optimization:
   if this item has been recently accessed, queue up a stat for it.
   {
   struct dcache * adc;

   ObtainReadLock(&afs_xdcache);
   if ((adc = tvc->quick.dc) && (adc->stamp == tvc->quick.stamp)
   && (afs_indexTimes[adc->index] > afs_indexCounter - 20)) {
   queue up the stat request
   }
   ReleaseReadLock(&afs_xdcache);
   }
   */

    return;
}				/* afs_CheckCallback */

/* afs_FlushCBs
 * to be used only in dire circumstances, this drops all callbacks on
 * the floor, without giving them back to the server.  It's ok, the server can
 * deal with it, but it is a little bit rude.
 */
void
afs_FlushCBs(void)
{
    int i;
    struct vcache *tvc;

    ObtainWriteLock(&afs_xcbhash, 86);	/* pretty likely I'm going to remove something */

    for (i = 0; i < VCSIZE; i++)	/* reset all the vnodes */
	for (tvc = afs_vhashT[i]; tvc; tvc = tvc->hnext) {
	    afs_StaleVCacheFlags(tvc, AFS_STALEVC_CBLOCKED |
				 AFS_STALEVC_CLEARCB |
				 AFS_STALEVC_SKIP_DNLC_FOR_INIT_FLUSHED, 0);
	    tvc->dchint = NULL;	/* invalidate hints */
	}

    afs_InitCBQueue(0);

    ReleaseWriteLock(&afs_xcbhash);
}

/* afs_FlushServerCBs
 * to be used only in dire circumstances, this drops all callbacks on
 * the floor for a specific server, without giving them back to the server.
 * It's ok, the server can deal with it, but it is a little bit rude.
 */
void
afs_FlushServerCBs(struct server *srvp)
{
    int i;
    struct vcache *tvc;

    ObtainWriteLock(&afs_xcbhash, 86);	/* pretty likely I'm going to remove something */

    for (i = 0; i < VCSIZE; i++) {	/* reset all the vnodes */
	for (tvc = afs_vhashT[i]; tvc; tvc = tvc->hnext) {
	    if (tvc->callback == srvp) {
		afs_StaleVCacheFlags(tvc, AFS_STALEVC_CBLOCKED |
				     AFS_STALEVC_CLEARCB |
				     AFS_STALEVC_SKIP_DNLC_FOR_INIT_FLUSHED, 0);
		tvc->dchint = NULL;	/* invalidate hints */
	    }
	}
    }

    ReleaseWriteLock(&afs_xcbhash);
}

/* afs_InitCBQueue
 *  called to initialize static and global variables associated with
 *  the Callback expiration management mechanism.
 */
void
afs_InitCBQueue(int doLockInit)
{
    int i;

    memset(cbHashT, 0, CBHTSIZE * sizeof(struct bucket));
    for (i = 0; i < CBHTSIZE; i++) {
	QInit(&(cbHashT[i].head));
	/* Lock_Init(&(cbHashT[i].lock)); only if you want lots of locks, which
	 * don't seem too useful at present.  */
    }
    base = 0;
    basetime = osi_Time();
    if (doLockInit)
	Lock_Init(&afs_xcbhash);
}

/* Because there are no real-time guarantees, and especially because a
 * thread may wait on a lock indefinitely, this routine has to be
 * careful that it doesn't get permanently out-of-date.  Important
 * assumption: this routine is only called from afs_Daemon, so there
 * can't be more than one instance of this running at any one time.
 * Presumes that basetime is never 0, and is always sane.
 *
 * Before calling this routine, be sure that the first slot is pretty
 * empty.  This -20 is because the granularity of the checks in
 * afs_Daemon is pretty large, so I'd rather err on the side of safety
 * sometimes.  The fact that I only bump basetime by CBHTSLOTLEN-1
 * instead of the whole CBHTSLOTLEN is also for "safety".
 * Conceptually, it makes this clock run just a little faster than the
 * clock governing which slot a callback gets hashed into.  Both of these
 * things make CheckCallbacks work a little harder than it would have to
 * if I wanted to cut things finer.
 * Everything from the old first slot is carried over into the new first
 * slot.  Thus, if there were some things that ought to have been invalidated,
 * but weren't (say, if the server was down), they will be examined at every
 * opportunity thereafter.
 */
int
afs_BumpBase(void)
{
    afs_uint32 now;
    int didbump;
    u_int oldbase;

    ObtainWriteLock(&afs_xcbhash, 87);
    didbump = 0;
    now = osi_Time();
    while (basetime + (CBHTSLOTLEN - 20) <= now) {
	oldbase = base;
	basetime += CBHTSLOTLEN - 1;
	base = (base + 1) % CBHTSIZE;
	didbump++;
	if (!QEmpty(&(cbHashT[oldbase].head))) {
	    QCat(&(cbHashT[oldbase].head), &(cbHashT[base].head));
	}
    }
    ReleaseWriteLock(&afs_xcbhash);

    return didbump;
}
Commit	Line	Data
	1	/*
	2	* Copyright 2000, International Business Machines Corporation and others.
	3	* All Rights Reserved.
	4	*
	5	* This software has been released under the terms of the IBM Public
	6	* License. For details, see the LICENSE file in the top-level source
	7	* directory or online at http://www.openafs.org/dl/license10.html
	8	*/
	9
	10	/*
	11	* This package is used to actively manage the expiration of callbacks,
	12	* so that the rest of the cache manager doesn't need to compute
	13	* whether a callback has expired or not, but can tell with one simple
	14	* check, that is, whether the CStatd bit is on or off.
	15	*
	16	* The base of the hash table moves periodically (every 128 seconds)
	17	* QueueCallback rarely touches the first 3 slots in the hash table
	18	* (only when called from CheckCallbacks) since MinTimeOut in
	19	* viced/callback.c is currently 7 minutes.
	20	* Therefore, CheckCallbacks should be able to run concurrently with
	21	* QueueCallback, given the proper locking, of course.
	22	*
	23	* Note:
	24	* 1. CheckCallbacks and BumpBase never run simultaneously. This is because
	25	* they are only called from afs_Daemon. Therefore, base and basetime will
	26	* always be consistent during CheckCallbacks.
	27	* 2. cbHashT [base] rarely (if ever) gets stuff queued in it. The only way
	28	* that could happen is CheckCallbacks might fencepost and move something in
	29	* place, or BumpBase might push some stuff up.
	30	* 3. Hash chains aren't particularly sorted.
	31	* 4. The file server keeps its callback state around for 3 minutes
	32	* longer than it promises the cache manager in order to account for
	33	* clock skew, network delay, and other bogeymen.
	34	*
	35	* For now I just use one large lock, which is fine on a uniprocessor,
	36	* since it's not held during any RPCs or low-priority I/O operations.
	37	* To make this code MP-fast, you need no more locks than processors,
	38	* but probably more than one. In measurements on MP-safe implementations,
	39	* I have never seen any contention over the xcbhash lock.
	40	*
	41	* Incompatible operations:
	42	* Enqueue and "dequeue of first vcache" in same slot
	43	* dequeue and "dequeue of preceding vcache" in same slot
	44	* dequeue and "dequeue of successive vcache" in same slot
	45	* BumpBase pushing a list and enqueue in the new base slot
	46	* Two enqueues in same slot
	47	* more...
	48	*
	49	* Certain invariants exist:
	50	* 1 Callback expiration times granted by a file server will never
	51	* decrease for a particular vnode UNLESS a CallBack RPC is invoked
	52	* by the server in the interim.
	53	* 2 A vcache will always expire no sooner than the slot in which it is
	54	* currently enqueued. Callback times granted by the server may
	55	* increase, in which case the vcache will be updated in-place. As a
	56	* result, it may expire later than the slot in which it is enqueued.
	57	* Not to worry, the CheckCallbacks code will move it if neccessary.
	58	* This approach means that busy vnodes won't be continually moved
	59	* around within the expiry queue: they are only moved when they
	60	* finally advance to the lead bucket.
	61	* 3 Anything which has a callback on it must be in the expiry
	62	* queue. In AFS 3.3, that means everything but symlinks (which
	63	* are immutable), including contents of Read-Only volumes
	64	* (which have callbacks by virtue of the whole-volume callback)
	65	*
	66	* QueueCallback only checks that its vcache is in the list
	67	* somewhere, counting on invariant #1 to guarantee that the vcache
	68	* won't be in a slot later than QueueCallback would otherwise place
	69	* it. Therefore, whenever we turn off the CStatd bit on the vcache, we
	70	* must remove the vcache from the expiry queue. Otherwise, we
	71	* might have missed a CallBack RPC, and a subsequent callback might be
	72	* granted with a shorter expiration time.
	73	*/
	74	#include <afsconfig.h>
	75	#include "afs/param.h"
	76
	77
	78	#include "afs/sysincludes.h" /Standard vendor system headers /
	79	#include "afsincludes.h" /AFS-based standard headers /
	80	#include "afs/afs_cbqueue.h"
	81	#include "afs/afs.h"
	82	#include "afs/lock.h"
	83	#include "afs/afs_stats.h"
	84
	85	static unsigned int base = 0;
	86	static unsigned int basetime = 0;
	87	static struct vcache debugvc; / used only for post-mortem debugging */
	88	struct bucket {
	89	struct afs_q head;
	90	/* struct afs_lock lock; only if you want lots of locks... */
	91	};
	92	static struct bucket cbHashT[CBHTSIZE];
	93	struct afs_lock afs_xcbhash;
	94
	95	/* afs_QueueCallback
	96	* Takes a write-locked vcache pointer and a callback expiration time
	97	* as returned by the file server (ie, in units of 128 seconds from "now").
	98	*
	99	* Uses the time as an index into a hash table, and inserts the vcache
	100	* structure into the overflow chain.
	101	*
	102	* If the vcache is already on some hash chain, leave it there.
	103	* CheckCallbacks will get to it eventually. In the meantime, it
	104	* might get flushed, or it might already be on the right hash chain,
	105	* so why bother messing with it now?
	106	*
	107	* NOTE: The caller must hold a write lock on afs_xcbhash
	108	*/
	109
	110	void
	111	afs_QueueCallback(struct vcache avc, unsigned int atime, struct volume avp)
	112	{
	113	if (avp && (avp->expireTime < avc->cbExpires))
	114	avp->expireTime = avc->cbExpires;
	115	if (!(avc->callsort.next)) {
	116	atime = (atime + base) % CBHTSIZE;
	117	QAdd(&(cbHashT[atime].head), &(avc->callsort));
	118	}
	119
	120	return;
	121	} /* afs_QueueCallback */
	122
	123	/* afs_DequeueCallback
	124	* Takes a write-locked vcache pointer and removes it from the callback
	125	* hash table, without knowing beforehand which slot it was in.
	126	*
	127	* for now, just get a lock on everything when doing the dequeue, don't
	128	* worry about getting a lock on the individual slot.
	129	*
	130	* the only other places that do anything like dequeues are CheckCallbacks
	131	* and BumpBase.
	132	*
	133	* NOTE: The caller must hold a write lock on afs_xcbhash
	134	*/
	135	void
	136	afs_DequeueCallback(struct vcache *avc)
	137	{
	138
	139	debugvc = avc;
	140	if (avc->callsort.prev) {
	141	QRemove(&(avc->callsort));
	142	} else; /* must have got dequeued in a race */
	143
	144	return;
	145	} /* afs_DequeueCallback */
	146
	147	/* afs_CheckCallbacks
	148	* called periodically to determine which callbacks are likely to
	149	* expire in the next n second interval. Preemptively marks them as
	150	* expired. Rehashes items which are now in the wrong hash bucket.
	151	* Preemptively renew recently-accessed items. Only removes things
	152	* from the first and second bucket (as long as secs < 128), and
	153	* inserts things into other, later buckets. either need to advance
	154	* to the second bucket if secs spans two intervals, or else be
	155	* certain to call afs_CheckCallbacks immediately after calling
	156	* BumpBase (allows a little more slop but it's ok because file server
	157	* keeps 3 minutes of slop time)
	158	*
	159	* There is a little race between CheckCallbacks and any code which
	160	* updates cbExpires, always just prior to calling QueueCallback. We
	161	* don't lock the vcache struct here (can't, or we'd risk deadlock),
	162	* so GetVCache (for example) may update cbExpires before or after #1
	163	* below. If before, CheckCallbacks moves this entry to its proper
	164	* slot. If after, GetVCache blocks in the call to QueueCallbacks,
	165	* this code dequeues the vcache, and then QueueCallbacks re-enqueues it.
	166	*
	167	* XXX to avoid the race, make QueueCallback take the "real" time
	168	* and update cbExpires under the xcbhash lock.
	169	*
	170	* NB #1: There's a little optimization here: if I go to invalidate a
	171	* RO vcache or volume, first check to see if the server is down. If
	172	* it _is_, don't invalidate it, cuz we might just as well keep using
	173	* it. Possibly, we could do the same thing for items in RW volumes,
	174	* but that bears some drinking about.
	175	*
	176	* Don't really need to invalidate the hints, we could just wait to see if
	177	* the dv has changed after a subsequent FetchStatus, but this is safer.
	178	*/
	179
	180	/* Sanity check on the callback queue. Allow for slop in the computation. */
	181	#if defined(AFS_LINUX22_ENV)
	182	#define CBQ_LIMIT (afs_maxvcount + 10)
	183	#else
	184	#define CBQ_LIMIT (afs_cacheStats + afs_stats_cmperf.vcacheXAllocs + 10)
	185	#endif
	186
	187	void
	188	afs_CheckCallbacks(unsigned int secs)
	189	{
	190	struct vcache *tvc;
	191	struct afs_q *tq;
	192	struct afs_q *uq;
	193	afs_uint32 now;
	194	struct volume *tvp;
	195	int safety;
	196
	197	ObtainWriteLock(&afs_xcbhash, 85); /* pretty likely I'm going to remove something */
	198	now = osi_Time();
	199	for (safety = 0, tq = cbHashT[base].head.prev;
	200	(safety <= CBQ_LIMIT) && (tq != &(cbHashT[base].head));
	201	tq = uq, safety++) {
	202
	203	uq = QPrev(tq);
	204	tvc = CBQTOV(tq);
	205	if (tvc->cbExpires < now + secs) { /* race #1 here */
	206	/* Get the volume, and if its callback expiration time is more than secs
	207	* seconds into the future, update this vcache entry and requeue it below
	208	*/
	209	if ((tvc->f.states & CRO)
	210	&& (tvp = afs_FindVolume(&(tvc->f.fid), READ_LOCK))) {
	211	if (tvp->expireTime > now + secs) {
	212	tvc->cbExpires = tvp->expireTime; /* XXX race here */
	213	} else {
	214	int i;
	215	for (i = 0; i < AFS_MAXHOSTS && tvp->serverHost[i]; i++) {
	216	if (!(tvp->serverHost[i]->flags & SRVR_ISDOWN)) {
	217	/* What about locking xvcache or vrefcount++ or
	218	* write locking tvc? */
	219	afs_StaleVCacheFlags(tvc, AFS_STALEVC_CBLOCKED \|
	220	AFS_STALEVC_SKIP_DNLC_FOR_INIT_FLUSHED,
	221	CMValid \| CUnique);
	222	tvc->dchint = NULL; /invalidate em /
	223	afs_ResetVolumeInfo(tvp);
	224	break;
	225	}
	226	}
	227	}
	228	afs_PutVolume(tvp, READ_LOCK);
	229	} else {
	230	/* Do I need to worry about things like execsorwriters?
	231	* What about locking xvcache or vrefcount++ or write locking tvc?
	232	*/
	233	afs_StaleVCacheFlags(tvc, AFS_STALEVC_CBLOCKED \|
	234	AFS_STALEVC_SKIP_DNLC_FOR_INIT_FLUSHED,
	235	CMValid \| CUnique);
	236	}
	237	}
	238
	239	if ((tvc->cbExpires > basetime) && CBHash(tvc->cbExpires - basetime)) {
	240	/* it's been renewed on us. Have to be careful not to put it back
	241	* into this slot, or we may never get out of here.
	242	*/
	243	int slot;
	244	slot = (CBHash(tvc->cbExpires - basetime) + base) % CBHTSIZE;
	245	if (slot != base) {
	246	if (QPrev(tq))
	247	QRemove(&(tvc->callsort));
	248	QAdd(&(cbHashT[slot].head), &(tvc->callsort));
	249	/* XXX remember to update volume expiration time */
	250	/* -- not needed for correctness, though */
	251	}
	252	}
	253	}
	254
	255	if (safety > CBQ_LIMIT) {
	256	afs_stats_cmperf.cbloops++;
	257	if (afs_paniconwarn)
	258	osi_Panic("CheckCallbacks");
	259
	260	afs_warn
	261	("AFS Internal Error (minor): please contact AFS Product Support.\n");
	262	ReleaseWriteLock(&afs_xcbhash);
	263	afs_FlushCBs();
	264	return;
	265	} else
	266	ReleaseWriteLock(&afs_xcbhash);
	267
	268
	269	/* XXX future optimization:
	270	if this item has been recently accessed, queue up a stat for it.
	271	{
	272	struct dcache * adc;
	273
	274	ObtainReadLock(&afs_xdcache);
	275	if ((adc = tvc->quick.dc) && (adc->stamp == tvc->quick.stamp)
	276	&& (afs_indexTimes[adc->index] > afs_indexCounter - 20)) {
	277	queue up the stat request
	278	}
	279	ReleaseReadLock(&afs_xdcache);
	280	}
	281	*/
	282
	283	return;
	284	} /* afs_CheckCallback */
	285
	286	/* afs_FlushCBs
	287	* to be used only in dire circumstances, this drops all callbacks on
	288	* the floor, without giving them back to the server. It's ok, the server can
	289	* deal with it, but it is a little bit rude.
	290	*/
	291	void
	292	afs_FlushCBs(void)
	293	{
	294	int i;
	295	struct vcache *tvc;
	296
	297	ObtainWriteLock(&afs_xcbhash, 86); /* pretty likely I'm going to remove something */
	298
	299	for (i = 0; i < VCSIZE; i++) /* reset all the vnodes */
	300	for (tvc = afs_vhashT[i]; tvc; tvc = tvc->hnext) {
	301	afs_StaleVCacheFlags(tvc, AFS_STALEVC_CBLOCKED \|
	302	AFS_STALEVC_CLEARCB \|
	303	AFS_STALEVC_SKIP_DNLC_FOR_INIT_FLUSHED, 0);
	304	tvc->dchint = NULL; /* invalidate hints */
	305	}
	306
	307	afs_InitCBQueue(0);
	308
	309	ReleaseWriteLock(&afs_xcbhash);
	310	}
	311
	312	/* afs_FlushServerCBs
	313	* to be used only in dire circumstances, this drops all callbacks on
	314	* the floor for a specific server, without giving them back to the server.
	315	* It's ok, the server can deal with it, but it is a little bit rude.
	316	*/
	317	void
	318	afs_FlushServerCBs(struct server *srvp)
	319	{
	320	int i;
	321	struct vcache *tvc;
	322
	323	ObtainWriteLock(&afs_xcbhash, 86); /* pretty likely I'm going to remove something */
	324
	325	for (i = 0; i < VCSIZE; i++) { /* reset all the vnodes */
	326	for (tvc = afs_vhashT[i]; tvc; tvc = tvc->hnext) {
	327	if (tvc->callback == srvp) {
	328	afs_StaleVCacheFlags(tvc, AFS_STALEVC_CBLOCKED \|
	329	AFS_STALEVC_CLEARCB \|
	330	AFS_STALEVC_SKIP_DNLC_FOR_INIT_FLUSHED, 0);
	331	tvc->dchint = NULL; /* invalidate hints */
	332	}
	333	}
	334	}
	335
	336	ReleaseWriteLock(&afs_xcbhash);
	337	}
	338
	339	/* afs_InitCBQueue
	340	* called to initialize static and global variables associated with
	341	* the Callback expiration management mechanism.
	342	*/
	343	void
	344	afs_InitCBQueue(int doLockInit)
	345	{
	346	int i;
	347
	348	memset(cbHashT, 0, CBHTSIZE * sizeof(struct bucket));
	349	for (i = 0; i < CBHTSIZE; i++) {
	350	QInit(&(cbHashT[i].head));
	351	/* Lock_Init(&(cbHashT[i].lock)); only if you want lots of locks, which
	352	* don't seem too useful at present. */
	353	}
	354	base = 0;
	355	basetime = osi_Time();
	356	if (doLockInit)
	357	Lock_Init(&afs_xcbhash);
	358	}
	359
	360	/* Because there are no real-time guarantees, and especially because a
	361	* thread may wait on a lock indefinitely, this routine has to be
	362	* careful that it doesn't get permanently out-of-date. Important
	363	* assumption: this routine is only called from afs_Daemon, so there
	364	* can't be more than one instance of this running at any one time.
	365	* Presumes that basetime is never 0, and is always sane.
	366	*
	367	* Before calling this routine, be sure that the first slot is pretty
	368	* empty. This -20 is because the granularity of the checks in
	369	* afs_Daemon is pretty large, so I'd rather err on the side of safety
	370	* sometimes. The fact that I only bump basetime by CBHTSLOTLEN-1
	371	* instead of the whole CBHTSLOTLEN is also for "safety".
	372	* Conceptually, it makes this clock run just a little faster than the
	373	* clock governing which slot a callback gets hashed into. Both of these
	374	* things make CheckCallbacks work a little harder than it would have to
	375	* if I wanted to cut things finer.
	376	* Everything from the old first slot is carried over into the new first
	377	* slot. Thus, if there were some things that ought to have been invalidated,
	378	* but weren't (say, if the server was down), they will be examined at every
	379	* opportunity thereafter.
	380	*/
	381	int
	382	afs_BumpBase(void)
	383	{
	384	afs_uint32 now;
	385	int didbump;
	386	u_int oldbase;
	387
	388	ObtainWriteLock(&afs_xcbhash, 87);
	389	didbump = 0;
	390	now = osi_Time();
	391	while (basetime + (CBHTSLOTLEN - 20) <= now) {
	392	oldbase = base;
	393	basetime += CBHTSLOTLEN - 1;
	394	base = (base + 1) % CBHTSIZE;
	395	didbump++;
	396	if (!QEmpty(&(cbHashT[oldbase].head))) {
	397	QCat(&(cbHashT[oldbase].head), &(cbHashT[base].head));
	398	}
	399	}
	400	ReleaseWriteLock(&afs_xcbhash);
	401
	402	return didbump;
	403	}