Commit | Line | Data |
---|---|---|
805e021f CE |
1 | /* |
2 | * Copyright 2000, International Business Machines Corporation and others. | |
3 | * All Rights Reserved. | |
4 | * | |
5 | * This software has been released under the terms of the IBM Public | |
6 | * License. For details, see the LICENSE file in the top-level source | |
7 | * directory or online at http://www.openafs.org/dl/license10.html | |
8 | */ | |
9 | ||
10 | /* | |
11 | * This package is used to actively manage the expiration of callbacks, | |
12 | * so that the rest of the cache manager doesn't need to compute | |
13 | * whether a callback has expired or not, but can tell with one simple | |
14 | * check, that is, whether the CStatd bit is on or off. | |
15 | * | |
16 | * The base of the hash table moves periodically (every 128 seconds) | |
17 | * QueueCallback rarely touches the first 3 slots in the hash table | |
18 | * (only when called from CheckCallbacks) since MinTimeOut in | |
19 | * viced/callback.c is currently 7 minutes. | |
20 | * Therefore, CheckCallbacks should be able to run concurrently with | |
21 | * QueueCallback, given the proper locking, of course. | |
22 | * | |
23 | * Note: | |
24 | * 1. CheckCallbacks and BumpBase never run simultaneously. This is because | |
25 | * they are only called from afs_Daemon. Therefore, base and basetime will | |
26 | * always be consistent during CheckCallbacks. | |
27 | * 2. cbHashT [base] rarely (if ever) gets stuff queued in it. The only way | |
28 | * that could happen is CheckCallbacks might fencepost and move something in | |
29 | * place, or BumpBase might push some stuff up. | |
30 | * 3. Hash chains aren't particularly sorted. | |
31 | * 4. The file server keeps its callback state around for 3 minutes | |
32 | * longer than it promises the cache manager in order to account for | |
33 | * clock skew, network delay, and other bogeymen. | |
34 | * | |
35 | * For now I just use one large lock, which is fine on a uniprocessor, | |
36 | * since it's not held during any RPCs or low-priority I/O operations. | |
37 | * To make this code MP-fast, you need no more locks than processors, | |
38 | * but probably more than one. In measurements on MP-safe implementations, | |
39 | * I have never seen any contention over the xcbhash lock. | |
40 | * | |
41 | * Incompatible operations: | |
42 | * Enqueue and "dequeue of first vcache" in same slot | |
43 | * dequeue and "dequeue of preceding vcache" in same slot | |
44 | * dequeue and "dequeue of successive vcache" in same slot | |
45 | * BumpBase pushing a list and enqueue in the new base slot | |
46 | * Two enqueues in same slot | |
47 | * more... | |
48 | * | |
49 | * Certain invariants exist: | |
50 | * 1 Callback expiration times granted by a file server will never | |
51 | * decrease for a particular vnode UNLESS a CallBack RPC is invoked | |
52 | * by the server in the interim. | |
53 | * 2 A vcache will always expire no sooner than the slot in which it is | |
54 | * currently enqueued. Callback times granted by the server may | |
55 | * increase, in which case the vcache will be updated in-place. As a | |
56 | * result, it may expire later than the slot in which it is enqueued. | |
57 | * Not to worry, the CheckCallbacks code will move it if neccessary. | |
58 | * This approach means that busy vnodes won't be continually moved | |
59 | * around within the expiry queue: they are only moved when they | |
60 | * finally advance to the lead bucket. | |
61 | * 3 Anything which has a callback on it must be in the expiry | |
62 | * queue. In AFS 3.3, that means everything but symlinks (which | |
63 | * are immutable), including contents of Read-Only volumes | |
64 | * (which have callbacks by virtue of the whole-volume callback) | |
65 | * | |
66 | * QueueCallback only checks that its vcache is in the list | |
67 | * somewhere, counting on invariant #1 to guarantee that the vcache | |
68 | * won't be in a slot later than QueueCallback would otherwise place | |
69 | * it. Therefore, whenever we turn off the CStatd bit on the vcache, we | |
70 | * *must* remove the vcache from the expiry queue. Otherwise, we | |
71 | * might have missed a CallBack RPC, and a subsequent callback might be | |
72 | * granted with a shorter expiration time. | |
73 | */ | |
74 | #include <afsconfig.h> | |
75 | #include "afs/param.h" | |
76 | ||
77 | ||
78 | #include "afs/sysincludes.h" /*Standard vendor system headers */ | |
79 | #include "afsincludes.h" /*AFS-based standard headers */ | |
80 | #include "afs/afs_cbqueue.h" | |
81 | #include "afs/afs.h" | |
82 | #include "afs/lock.h" | |
83 | #include "afs/afs_stats.h" | |
84 | ||
85 | static unsigned int base = 0; | |
86 | static unsigned int basetime = 0; | |
87 | static struct vcache *debugvc; /* used only for post-mortem debugging */ | |
88 | struct bucket { | |
89 | struct afs_q head; | |
90 | /* struct afs_lock lock; only if you want lots of locks... */ | |
91 | }; | |
92 | static struct bucket cbHashT[CBHTSIZE]; | |
93 | struct afs_lock afs_xcbhash; | |
94 | ||
95 | /* afs_QueueCallback | |
96 | * Takes a write-locked vcache pointer and a callback expiration time | |
97 | * as returned by the file server (ie, in units of 128 seconds from "now"). | |
98 | * | |
99 | * Uses the time as an index into a hash table, and inserts the vcache | |
100 | * structure into the overflow chain. | |
101 | * | |
102 | * If the vcache is already on some hash chain, leave it there. | |
103 | * CheckCallbacks will get to it eventually. In the meantime, it | |
104 | * might get flushed, or it might already be on the right hash chain, | |
105 | * so why bother messing with it now? | |
106 | * | |
107 | * NOTE: The caller must hold a write lock on afs_xcbhash | |
108 | */ | |
109 | ||
110 | void | |
111 | afs_QueueCallback(struct vcache *avc, unsigned int atime, struct volume *avp) | |
112 | { | |
113 | if (avp && (avp->expireTime < avc->cbExpires)) | |
114 | avp->expireTime = avc->cbExpires; | |
115 | if (!(avc->callsort.next)) { | |
116 | atime = (atime + base) % CBHTSIZE; | |
117 | QAdd(&(cbHashT[atime].head), &(avc->callsort)); | |
118 | } | |
119 | ||
120 | return; | |
121 | } /* afs_QueueCallback */ | |
122 | ||
123 | /* afs_DequeueCallback | |
124 | * Takes a write-locked vcache pointer and removes it from the callback | |
125 | * hash table, without knowing beforehand which slot it was in. | |
126 | * | |
127 | * for now, just get a lock on everything when doing the dequeue, don't | |
128 | * worry about getting a lock on the individual slot. | |
129 | * | |
130 | * the only other places that do anything like dequeues are CheckCallbacks | |
131 | * and BumpBase. | |
132 | * | |
133 | * NOTE: The caller must hold a write lock on afs_xcbhash | |
134 | */ | |
135 | void | |
136 | afs_DequeueCallback(struct vcache *avc) | |
137 | { | |
138 | ||
139 | debugvc = avc; | |
140 | if (avc->callsort.prev) { | |
141 | QRemove(&(avc->callsort)); | |
142 | } else; /* must have got dequeued in a race */ | |
143 | ||
144 | return; | |
145 | } /* afs_DequeueCallback */ | |
146 | ||
147 | /* afs_CheckCallbacks | |
148 | * called periodically to determine which callbacks are likely to | |
149 | * expire in the next n second interval. Preemptively marks them as | |
150 | * expired. Rehashes items which are now in the wrong hash bucket. | |
151 | * Preemptively renew recently-accessed items. Only removes things | |
152 | * from the first and second bucket (as long as secs < 128), and | |
153 | * inserts things into other, later buckets. either need to advance | |
154 | * to the second bucket if secs spans two intervals, or else be | |
155 | * certain to call afs_CheckCallbacks immediately after calling | |
156 | * BumpBase (allows a little more slop but it's ok because file server | |
157 | * keeps 3 minutes of slop time) | |
158 | * | |
159 | * There is a little race between CheckCallbacks and any code which | |
160 | * updates cbExpires, always just prior to calling QueueCallback. We | |
161 | * don't lock the vcache struct here (can't, or we'd risk deadlock), | |
162 | * so GetVCache (for example) may update cbExpires before or after #1 | |
163 | * below. If before, CheckCallbacks moves this entry to its proper | |
164 | * slot. If after, GetVCache blocks in the call to QueueCallbacks, | |
165 | * this code dequeues the vcache, and then QueueCallbacks re-enqueues it. | |
166 | * | |
167 | * XXX to avoid the race, make QueueCallback take the "real" time | |
168 | * and update cbExpires under the xcbhash lock. | |
169 | * | |
170 | * NB #1: There's a little optimization here: if I go to invalidate a | |
171 | * RO vcache or volume, first check to see if the server is down. If | |
172 | * it _is_, don't invalidate it, cuz we might just as well keep using | |
173 | * it. Possibly, we could do the same thing for items in RW volumes, | |
174 | * but that bears some drinking about. | |
175 | * | |
176 | * Don't really need to invalidate the hints, we could just wait to see if | |
177 | * the dv has changed after a subsequent FetchStatus, but this is safer. | |
178 | */ | |
179 | ||
180 | /* Sanity check on the callback queue. Allow for slop in the computation. */ | |
181 | #if defined(AFS_LINUX22_ENV) | |
182 | #define CBQ_LIMIT (afs_maxvcount + 10) | |
183 | #else | |
184 | #define CBQ_LIMIT (afs_cacheStats + afs_stats_cmperf.vcacheXAllocs + 10) | |
185 | #endif | |
186 | ||
187 | void | |
188 | afs_CheckCallbacks(unsigned int secs) | |
189 | { | |
190 | struct vcache *tvc; | |
191 | struct afs_q *tq; | |
192 | struct afs_q *uq; | |
193 | afs_uint32 now; | |
194 | struct volume *tvp; | |
195 | int safety; | |
196 | ||
197 | ObtainWriteLock(&afs_xcbhash, 85); /* pretty likely I'm going to remove something */ | |
198 | now = osi_Time(); | |
199 | for (safety = 0, tq = cbHashT[base].head.prev; | |
200 | (safety <= CBQ_LIMIT) && (tq != &(cbHashT[base].head)); | |
201 | tq = uq, safety++) { | |
202 | ||
203 | uq = QPrev(tq); | |
204 | tvc = CBQTOV(tq); | |
205 | if (tvc->cbExpires < now + secs) { /* race #1 here */ | |
206 | /* Get the volume, and if its callback expiration time is more than secs | |
207 | * seconds into the future, update this vcache entry and requeue it below | |
208 | */ | |
209 | if ((tvc->f.states & CRO) | |
210 | && (tvp = afs_FindVolume(&(tvc->f.fid), READ_LOCK))) { | |
211 | if (tvp->expireTime > now + secs) { | |
212 | tvc->cbExpires = tvp->expireTime; /* XXX race here */ | |
213 | } else { | |
214 | int i; | |
215 | for (i = 0; i < AFS_MAXHOSTS && tvp->serverHost[i]; i++) { | |
216 | if (!(tvp->serverHost[i]->flags & SRVR_ISDOWN)) { | |
217 | /* What about locking xvcache or vrefcount++ or | |
218 | * write locking tvc? */ | |
219 | afs_StaleVCacheFlags(tvc, AFS_STALEVC_CBLOCKED | | |
220 | AFS_STALEVC_SKIP_DNLC_FOR_INIT_FLUSHED, | |
221 | CMValid | CUnique); | |
222 | tvc->dchint = NULL; /*invalidate em */ | |
223 | afs_ResetVolumeInfo(tvp); | |
224 | break; | |
225 | } | |
226 | } | |
227 | } | |
228 | afs_PutVolume(tvp, READ_LOCK); | |
229 | } else { | |
230 | /* Do I need to worry about things like execsorwriters? | |
231 | * What about locking xvcache or vrefcount++ or write locking tvc? | |
232 | */ | |
233 | afs_StaleVCacheFlags(tvc, AFS_STALEVC_CBLOCKED | | |
234 | AFS_STALEVC_SKIP_DNLC_FOR_INIT_FLUSHED, | |
235 | CMValid | CUnique); | |
236 | } | |
237 | } | |
238 | ||
239 | if ((tvc->cbExpires > basetime) && CBHash(tvc->cbExpires - basetime)) { | |
240 | /* it's been renewed on us. Have to be careful not to put it back | |
241 | * into this slot, or we may never get out of here. | |
242 | */ | |
243 | int slot; | |
244 | slot = (CBHash(tvc->cbExpires - basetime) + base) % CBHTSIZE; | |
245 | if (slot != base) { | |
246 | if (QPrev(tq)) | |
247 | QRemove(&(tvc->callsort)); | |
248 | QAdd(&(cbHashT[slot].head), &(tvc->callsort)); | |
249 | /* XXX remember to update volume expiration time */ | |
250 | /* -- not needed for correctness, though */ | |
251 | } | |
252 | } | |
253 | } | |
254 | ||
255 | if (safety > CBQ_LIMIT) { | |
256 | afs_stats_cmperf.cbloops++; | |
257 | if (afs_paniconwarn) | |
258 | osi_Panic("CheckCallbacks"); | |
259 | ||
260 | afs_warn | |
261 | ("AFS Internal Error (minor): please contact AFS Product Support.\n"); | |
262 | ReleaseWriteLock(&afs_xcbhash); | |
263 | afs_FlushCBs(); | |
264 | return; | |
265 | } else | |
266 | ReleaseWriteLock(&afs_xcbhash); | |
267 | ||
268 | ||
269 | /* XXX future optimization: | |
270 | if this item has been recently accessed, queue up a stat for it. | |
271 | { | |
272 | struct dcache * adc; | |
273 | ||
274 | ObtainReadLock(&afs_xdcache); | |
275 | if ((adc = tvc->quick.dc) && (adc->stamp == tvc->quick.stamp) | |
276 | && (afs_indexTimes[adc->index] > afs_indexCounter - 20)) { | |
277 | queue up the stat request | |
278 | } | |
279 | ReleaseReadLock(&afs_xdcache); | |
280 | } | |
281 | */ | |
282 | ||
283 | return; | |
284 | } /* afs_CheckCallback */ | |
285 | ||
286 | /* afs_FlushCBs | |
287 | * to be used only in dire circumstances, this drops all callbacks on | |
288 | * the floor, without giving them back to the server. It's ok, the server can | |
289 | * deal with it, but it is a little bit rude. | |
290 | */ | |
291 | void | |
292 | afs_FlushCBs(void) | |
293 | { | |
294 | int i; | |
295 | struct vcache *tvc; | |
296 | ||
297 | ObtainWriteLock(&afs_xcbhash, 86); /* pretty likely I'm going to remove something */ | |
298 | ||
299 | for (i = 0; i < VCSIZE; i++) /* reset all the vnodes */ | |
300 | for (tvc = afs_vhashT[i]; tvc; tvc = tvc->hnext) { | |
301 | afs_StaleVCacheFlags(tvc, AFS_STALEVC_CBLOCKED | | |
302 | AFS_STALEVC_CLEARCB | | |
303 | AFS_STALEVC_SKIP_DNLC_FOR_INIT_FLUSHED, 0); | |
304 | tvc->dchint = NULL; /* invalidate hints */ | |
305 | } | |
306 | ||
307 | afs_InitCBQueue(0); | |
308 | ||
309 | ReleaseWriteLock(&afs_xcbhash); | |
310 | } | |
311 | ||
312 | /* afs_FlushServerCBs | |
313 | * to be used only in dire circumstances, this drops all callbacks on | |
314 | * the floor for a specific server, without giving them back to the server. | |
315 | * It's ok, the server can deal with it, but it is a little bit rude. | |
316 | */ | |
317 | void | |
318 | afs_FlushServerCBs(struct server *srvp) | |
319 | { | |
320 | int i; | |
321 | struct vcache *tvc; | |
322 | ||
323 | ObtainWriteLock(&afs_xcbhash, 86); /* pretty likely I'm going to remove something */ | |
324 | ||
325 | for (i = 0; i < VCSIZE; i++) { /* reset all the vnodes */ | |
326 | for (tvc = afs_vhashT[i]; tvc; tvc = tvc->hnext) { | |
327 | if (tvc->callback == srvp) { | |
328 | afs_StaleVCacheFlags(tvc, AFS_STALEVC_CBLOCKED | | |
329 | AFS_STALEVC_CLEARCB | | |
330 | AFS_STALEVC_SKIP_DNLC_FOR_INIT_FLUSHED, 0); | |
331 | tvc->dchint = NULL; /* invalidate hints */ | |
332 | } | |
333 | } | |
334 | } | |
335 | ||
336 | ReleaseWriteLock(&afs_xcbhash); | |
337 | } | |
338 | ||
339 | /* afs_InitCBQueue | |
340 | * called to initialize static and global variables associated with | |
341 | * the Callback expiration management mechanism. | |
342 | */ | |
343 | void | |
344 | afs_InitCBQueue(int doLockInit) | |
345 | { | |
346 | int i; | |
347 | ||
348 | memset(cbHashT, 0, CBHTSIZE * sizeof(struct bucket)); | |
349 | for (i = 0; i < CBHTSIZE; i++) { | |
350 | QInit(&(cbHashT[i].head)); | |
351 | /* Lock_Init(&(cbHashT[i].lock)); only if you want lots of locks, which | |
352 | * don't seem too useful at present. */ | |
353 | } | |
354 | base = 0; | |
355 | basetime = osi_Time(); | |
356 | if (doLockInit) | |
357 | Lock_Init(&afs_xcbhash); | |
358 | } | |
359 | ||
360 | /* Because there are no real-time guarantees, and especially because a | |
361 | * thread may wait on a lock indefinitely, this routine has to be | |
362 | * careful that it doesn't get permanently out-of-date. Important | |
363 | * assumption: this routine is only called from afs_Daemon, so there | |
364 | * can't be more than one instance of this running at any one time. | |
365 | * Presumes that basetime is never 0, and is always sane. | |
366 | * | |
367 | * Before calling this routine, be sure that the first slot is pretty | |
368 | * empty. This -20 is because the granularity of the checks in | |
369 | * afs_Daemon is pretty large, so I'd rather err on the side of safety | |
370 | * sometimes. The fact that I only bump basetime by CBHTSLOTLEN-1 | |
371 | * instead of the whole CBHTSLOTLEN is also for "safety". | |
372 | * Conceptually, it makes this clock run just a little faster than the | |
373 | * clock governing which slot a callback gets hashed into. Both of these | |
374 | * things make CheckCallbacks work a little harder than it would have to | |
375 | * if I wanted to cut things finer. | |
376 | * Everything from the old first slot is carried over into the new first | |
377 | * slot. Thus, if there were some things that ought to have been invalidated, | |
378 | * but weren't (say, if the server was down), they will be examined at every | |
379 | * opportunity thereafter. | |
380 | */ | |
381 | int | |
382 | afs_BumpBase(void) | |
383 | { | |
384 | afs_uint32 now; | |
385 | int didbump; | |
386 | u_int oldbase; | |
387 | ||
388 | ObtainWriteLock(&afs_xcbhash, 87); | |
389 | didbump = 0; | |
390 | now = osi_Time(); | |
391 | while (basetime + (CBHTSLOTLEN - 20) <= now) { | |
392 | oldbase = base; | |
393 | basetime += CBHTSLOTLEN - 1; | |
394 | base = (base + 1) % CBHTSIZE; | |
395 | didbump++; | |
396 | if (!QEmpty(&(cbHashT[oldbase].head))) { | |
397 | QCat(&(cbHashT[oldbase].head), &(cbHashT[base].head)); | |
398 | } | |
399 | } | |
400 | ReleaseWriteLock(&afs_xcbhash); | |
401 | ||
402 | return didbump; | |
403 | } |