Commit | Line | Data |
---|---|---|
805e021f CE |
1 | /* |
2 | * Copyright 2000, International Business Machines Corporation and others. | |
3 | * All Rights Reserved. | |
4 | * | |
5 | * This software has been released under the terms of the IBM Public | |
6 | * License. For details, see the LICENSE file in the top-level source | |
7 | * directory or online at http://www.openafs.org/dl/license10.html | |
8 | */ | |
9 | ||
10 | /* | |
11 | * Implements: | |
12 | */ | |
13 | #include <afsconfig.h> | |
14 | #include "afs/param.h" | |
15 | ||
16 | ||
17 | #include "afs/sysincludes.h" /*Standard vendor system headers */ | |
18 | #include "afsincludes.h" /*AFS-based standard headers */ | |
19 | #include "afs/afs_stats.h" /* statistics */ | |
20 | #include "afs/afs_cbqueue.h" | |
21 | #include "afs/afs_osidnlc.h" | |
22 | ||
23 | #include <opr/ffs.h> | |
24 | ||
25 | /* Forward declarations. */ | |
26 | static void afs_GetDownD(int anumber, int *aneedSpace, afs_int32 buckethint); | |
27 | static int afs_FreeDiscardedDCache(void); | |
28 | static void afs_DiscardDCache(struct dcache *); | |
29 | static void afs_FreeDCache(struct dcache *); | |
30 | /* For split cache */ | |
31 | static afs_int32 afs_DCGetBucket(struct vcache *); | |
32 | static void afs_DCAdjustSize(struct dcache *, afs_int32, afs_int32); | |
33 | static void afs_DCMoveBucket(struct dcache *, afs_int32, afs_int32); | |
34 | static void afs_DCSizeInit(void); | |
35 | static afs_int32 afs_DCWhichBucket(afs_int32, afs_int32); | |
36 | ||
37 | /* | |
38 | * --------------------- Exported definitions --------------------- | |
39 | */ | |
40 | /* For split cache */ | |
41 | afs_int32 afs_blocksUsed_0; /*!< 1K blocks in cache - in theory is zero */ | |
42 | afs_int32 afs_blocksUsed_1; /*!< 1K blocks in cache */ | |
43 | afs_int32 afs_blocksUsed_2; /*!< 1K blocks in cache */ | |
44 | afs_int32 afs_pct1 = -1; | |
45 | afs_int32 afs_pct2 = -1; | |
46 | afs_uint32 afs_tpct1 = 0; | |
47 | afs_uint32 afs_tpct2 = 0; | |
48 | afs_uint32 splitdcache = 0; | |
49 | ||
50 | afs_lock_t afs_xdcache; /*!< Lock: alloc new disk cache entries */ | |
51 | afs_int32 afs_freeDCList; /*!< Free list for disk cache entries */ | |
52 | afs_int32 afs_freeDCCount; /*!< Count of elts in freeDCList */ | |
53 | afs_int32 afs_discardDCList; /*!< Discarded disk cache entries */ | |
54 | afs_int32 afs_discardDCCount; /*!< Count of elts in discardDCList */ | |
55 | struct dcache *afs_freeDSList; /*!< Free list for disk slots */ | |
56 | struct dcache *afs_Initial_freeDSList; /*!< Initial list for above */ | |
57 | afs_dcache_id_t cacheInode; /*!< Inode for CacheItems file */ | |
58 | struct osi_file *afs_cacheInodep = 0; /*!< file for CacheItems inode */ | |
59 | struct afs_q afs_DLRU; /*!< dcache LRU */ | |
60 | afs_int32 afs_dhashsize = 1024; | |
61 | afs_int32 *afs_dvhashTbl; /*!< Data cache hash table: hashed by FID + chunk number. */ | |
62 | afs_int32 *afs_dchashTbl; /*!< Data cache hash table: hashed by FID. */ | |
63 | afs_int32 *afs_dvnextTbl; /*!< Dcache hash table links */ | |
64 | afs_int32 *afs_dcnextTbl; /*!< Dcache hash table links */ | |
65 | struct dcache **afs_indexTable; /*!< Pointers to dcache entries */ | |
66 | afs_hyper_t *afs_indexTimes; /*!< Dcache entry Access times */ | |
67 | afs_int32 *afs_indexUnique; /*!< dcache entry Fid.Unique */ | |
68 | unsigned char *afs_indexFlags; /*!< (only one) Is there data there? */ | |
69 | afs_hyper_t afs_indexCounter; /*!< Fake time for marking index | |
70 | * entries */ | |
71 | afs_int32 afs_cacheFiles = 0; /*!< Size of afs_indexTable */ | |
72 | afs_int32 afs_cacheBlocks; /*!< 1K blocks in cache */ | |
73 | afs_int32 afs_cacheStats; /*!< Stat entries in cache */ | |
74 | afs_int32 afs_blocksUsed; /*!< Number of blocks in use */ | |
75 | afs_int32 afs_blocksDiscarded; /*!<Blocks freed but not truncated */ | |
76 | afs_int32 afs_fsfragsize = AFS_MIN_FRAGSIZE; /*!< Underlying Filesystem minimum unit | |
77 | *of disk allocation usually 1K | |
78 | *this value is (truefrag -1 ) to | |
79 | *save a bunch of subtracts... */ | |
80 | #ifdef AFS_64BIT_CLIENT | |
81 | #ifdef AFS_VM_RDWR_ENV | |
82 | afs_size_t afs_vmMappingEnd; /* !< For large files (>= 2GB) the VM | |
83 | * mapping an 32bit addressing machines | |
84 | * can only be used below the 2 GB | |
85 | * line. From this point upwards we | |
86 | * must do direct I/O into the cache | |
87 | * files. The value should be on a | |
88 | * chunk boundary. */ | |
89 | #endif /* AFS_VM_RDWR_ENV */ | |
90 | #endif /* AFS_64BIT_CLIENT */ | |
91 | ||
92 | /* The following is used to ensure that new dcache's aren't obtained when | |
93 | * the cache is nearly full. | |
94 | */ | |
95 | int afs_WaitForCacheDrain = 0; | |
96 | int afs_TruncateDaemonRunning = 0; | |
97 | int afs_CacheTooFull = 0; | |
98 | ||
99 | afs_int32 afs_dcentries; /*!< In-memory dcache entries */ | |
100 | ||
101 | ||
102 | int dcacheDisabled = 0; | |
103 | ||
104 | struct afs_cacheOps afs_UfsCacheOps = { | |
105 | #ifndef HAVE_STRUCT_LABEL_SUPPORT | |
106 | osi_UFSOpen, | |
107 | osi_UFSTruncate, | |
108 | afs_osi_Read, | |
109 | afs_osi_Write, | |
110 | osi_UFSClose, | |
111 | afs_UFSReadUIO, | |
112 | afs_UFSWriteUIO, | |
113 | afs_UFSGetDSlot, | |
114 | afs_UFSGetVolSlot, | |
115 | afs_UFSHandleLink, | |
116 | #else | |
117 | .open = osi_UFSOpen, | |
118 | .truncate = osi_UFSTruncate, | |
119 | .fread = afs_osi_Read, | |
120 | .fwrite = afs_osi_Write, | |
121 | .close = osi_UFSClose, | |
122 | .vreadUIO = afs_UFSReadUIO, | |
123 | .vwriteUIO = afs_UFSWriteUIO, | |
124 | .GetDSlot = afs_UFSGetDSlot, | |
125 | .GetVolSlot = afs_UFSGetVolSlot, | |
126 | .HandleLink = afs_UFSHandleLink, | |
127 | #endif | |
128 | }; | |
129 | ||
130 | struct afs_cacheOps afs_MemCacheOps = { | |
131 | #ifndef HAVE_STRUCT_LABEL_SUPPORT | |
132 | afs_MemCacheOpen, | |
133 | afs_MemCacheTruncate, | |
134 | afs_MemReadBlk, | |
135 | afs_MemWriteBlk, | |
136 | afs_MemCacheClose, | |
137 | afs_MemReadUIO, | |
138 | afs_MemWriteUIO, | |
139 | afs_MemGetDSlot, | |
140 | afs_MemGetVolSlot, | |
141 | afs_MemHandleLink, | |
142 | #else | |
143 | .open = afs_MemCacheOpen, | |
144 | .truncate = afs_MemCacheTruncate, | |
145 | .fread = afs_MemReadBlk, | |
146 | .fwrite = afs_MemWriteBlk, | |
147 | .close = afs_MemCacheClose, | |
148 | .vreadUIO = afs_MemReadUIO, | |
149 | .vwriteUIO = afs_MemWriteUIO, | |
150 | .GetDSlot = afs_MemGetDSlot, | |
151 | .GetVolSlot = afs_MemGetVolSlot, | |
152 | .HandleLink = afs_MemHandleLink, | |
153 | #endif | |
154 | }; | |
155 | ||
156 | int cacheDiskType; /*Type of backing disk for cache */ | |
157 | struct afs_cacheOps *afs_cacheType; | |
158 | ||
159 | ||
160 | /* | |
161 | * The PFlush algorithm makes use of the fact that Fid.Unique is not used in | |
162 | * below hash algorithms. Change it if need be so that flushing algorithm | |
163 | * doesn't move things from one hash chain to another. | |
164 | */ | |
165 | /*Vnode, Chunk -> Hash table index */ | |
166 | int DCHash(struct VenusFid *fid, afs_int32 chunk) | |
167 | { | |
168 | afs_uint32 buf[3]; | |
169 | ||
170 | buf[0] = fid->Fid.Volume; | |
171 | buf[1] = fid->Fid.Vnode; | |
172 | buf[2] = chunk; | |
173 | return opr_jhash(buf, 3, 0) & (afs_dhashsize - 1); | |
174 | } | |
175 | /*Vnode -> Other hash table index */ | |
176 | int DVHash(struct VenusFid *fid) | |
177 | { | |
178 | return opr_jhash_int2(fid->Fid.Volume, fid->Fid.Vnode, 0) & | |
179 | (afs_dhashsize - 1); | |
180 | } | |
181 | ||
182 | /*! | |
183 | * Where is this vcache's entry associated dcache located/ | |
184 | * \param avc The vcache entry. | |
185 | * \return Bucket index: | |
186 | * 1 : main | |
187 | * 2 : RO | |
188 | */ | |
189 | static afs_int32 | |
190 | afs_DCGetBucket(struct vcache *avc) | |
191 | { | |
192 | if (!splitdcache) | |
193 | return 1; | |
194 | ||
195 | /* This should be replaced with some sort of user configurable function */ | |
196 | if (avc->f.states & CRO) { | |
197 | return 2; | |
198 | } else if (avc->f.states & CBackup) { | |
199 | return 1; | |
200 | } else { | |
201 | /* RW */ | |
202 | } | |
203 | /* main bucket */ | |
204 | return 1; | |
205 | } | |
206 | ||
207 | /*! | |
208 | * Readjust a dcache's size. | |
209 | * | |
210 | * \param adc The dcache to be adjusted. | |
211 | * \param oldSize Old size for the dcache. | |
212 | * \param newSize The new size to be adjusted to. | |
213 | * | |
214 | */ | |
215 | static void | |
216 | afs_DCAdjustSize(struct dcache *adc, afs_int32 oldSize, afs_int32 newSize) | |
217 | { | |
218 | afs_int32 adjustSize = newSize - oldSize; | |
219 | ||
220 | if (!splitdcache) | |
221 | return; | |
222 | ||
223 | switch (adc->bucket) | |
224 | { | |
225 | case 0: | |
226 | afs_blocksUsed_0 += adjustSize; | |
227 | afs_stats_cmperf.cacheBucket0_Discarded += oldSize; | |
228 | break; | |
229 | case 1: | |
230 | afs_blocksUsed_1 += adjustSize; | |
231 | afs_stats_cmperf.cacheBucket1_Discarded += oldSize; | |
232 | break; | |
233 | case 2: | |
234 | afs_blocksUsed_2 += adjustSize; | |
235 | afs_stats_cmperf.cacheBucket2_Discarded += oldSize; | |
236 | break; | |
237 | } | |
238 | ||
239 | return; | |
240 | } | |
241 | ||
242 | /*! | |
243 | * Move a dcache from one bucket to another. | |
244 | * | |
245 | * \param adc Operate on this dcache. | |
246 | * \param size Size in bucket (?). | |
247 | * \param newBucket Destination bucket. | |
248 | * | |
249 | */ | |
250 | static void | |
251 | afs_DCMoveBucket(struct dcache *adc, afs_int32 size, afs_int32 newBucket) | |
252 | { | |
253 | if (!splitdcache) | |
254 | return; | |
255 | ||
256 | /* Substract size from old bucket. */ | |
257 | switch (adc->bucket) | |
258 | { | |
259 | case 0: | |
260 | afs_blocksUsed_0 -= size; | |
261 | break; | |
262 | case 1: | |
263 | afs_blocksUsed_1 -= size; | |
264 | break; | |
265 | case 2: | |
266 | afs_blocksUsed_2 -= size; | |
267 | break; | |
268 | } | |
269 | ||
270 | /* Set new bucket and increase destination bucket size. */ | |
271 | adc->bucket = newBucket; | |
272 | ||
273 | switch (adc->bucket) | |
274 | { | |
275 | case 0: | |
276 | afs_blocksUsed_0 += size; | |
277 | break; | |
278 | case 1: | |
279 | afs_blocksUsed_1 += size; | |
280 | break; | |
281 | case 2: | |
282 | afs_blocksUsed_2 += size; | |
283 | break; | |
284 | } | |
285 | ||
286 | return; | |
287 | } | |
288 | ||
289 | /*! | |
290 | * Init split caches size. | |
291 | */ | |
292 | static void | |
293 | afs_DCSizeInit(void) | |
294 | { | |
295 | afs_blocksUsed_0 = afs_blocksUsed_1 = afs_blocksUsed_2 = 0; | |
296 | } | |
297 | ||
298 | ||
299 | /*! | |
300 | * \param phase | |
301 | * \param bucket | |
302 | */ | |
303 | static afs_int32 | |
304 | afs_DCWhichBucket(afs_int32 phase, afs_int32 bucket) | |
305 | { | |
306 | if (!splitdcache) | |
307 | return 0; | |
308 | ||
309 | afs_pct1 = afs_blocksUsed_1 / (afs_cacheBlocks / 100); | |
310 | afs_pct2 = afs_blocksUsed_2 / (afs_cacheBlocks / 100); | |
311 | ||
312 | /* Short cut: if we don't know about it, try to kill it */ | |
313 | if (phase < 2 && afs_blocksUsed_0) | |
314 | return 0; | |
315 | ||
316 | if (afs_pct1 > afs_tpct1) | |
317 | return 1; | |
318 | if (afs_pct2 > afs_tpct2) | |
319 | return 2; | |
320 | return 0; /* unlikely */ | |
321 | } | |
322 | ||
323 | ||
324 | /*! | |
325 | * Warn about failing to store a file. | |
326 | * | |
327 | * \param acode Associated error code. | |
328 | * \param avolume Volume involved. | |
329 | * \param aflags How to handle the output: | |
330 | * aflags & 1: Print out on console | |
331 | * aflags & 2: Print out on controlling tty | |
332 | * | |
333 | * \note Environment: Call this from close call when vnodeops is RCS unlocked. | |
334 | */ | |
335 | ||
336 | void | |
337 | afs_StoreWarn(afs_int32 acode, afs_int32 avolume, | |
338 | afs_int32 aflags) | |
339 | { | |
340 | static char problem_fmt[] = | |
341 | "afs: failed to store file in volume %d (%s)\n"; | |
342 | static char problem_fmt_w_error[] = | |
343 | "afs: failed to store file in volume %d (error %d)\n"; | |
344 | static char netproblems[] = "network problems"; | |
345 | static char partfull[] = "partition full"; | |
346 | static char overquota[] = "over quota"; | |
347 | ||
348 | AFS_STATCNT(afs_StoreWarn); | |
349 | if (acode < 0) { | |
350 | /* | |
351 | * Network problems | |
352 | */ | |
353 | if (aflags & 1) | |
354 | afs_warn(problem_fmt, avolume, netproblems); | |
355 | if (aflags & 2) | |
356 | afs_warnuser(problem_fmt, avolume, netproblems); | |
357 | } else if (acode == ENOSPC) { | |
358 | /* | |
359 | * Partition full | |
360 | */ | |
361 | if (aflags & 1) | |
362 | afs_warn(problem_fmt, avolume, partfull); | |
363 | if (aflags & 2) | |
364 | afs_warnuser(problem_fmt, avolume, partfull); | |
365 | } else | |
366 | #ifdef EDQUOT | |
367 | /* EDQUOT doesn't exist on solaris and won't be sent by the server. | |
368 | * Instead ENOSPC will be sent... | |
369 | */ | |
370 | if (acode == EDQUOT) { | |
371 | /* | |
372 | * Quota exceeded | |
373 | */ | |
374 | if (aflags & 1) | |
375 | afs_warn(problem_fmt, avolume, overquota); | |
376 | if (aflags & 2) | |
377 | afs_warnuser(problem_fmt, avolume, overquota); | |
378 | } else | |
379 | #endif | |
380 | { | |
381 | /* | |
382 | * Unknown error | |
383 | */ | |
384 | if (aflags & 1) | |
385 | afs_warn(problem_fmt_w_error, avolume, acode); | |
386 | if (aflags & 2) | |
387 | afs_warnuser(problem_fmt_w_error, avolume, acode); | |
388 | } | |
389 | } /*afs_StoreWarn */ | |
390 | ||
391 | /*! | |
392 | * Try waking up truncation daemon, if it's worth it. | |
393 | */ | |
394 | void | |
395 | afs_MaybeWakeupTruncateDaemon(void) | |
396 | { | |
397 | if (!afs_CacheTooFull && afs_CacheIsTooFull()) { | |
398 | afs_CacheTooFull = 1; | |
399 | if (!afs_TruncateDaemonRunning) | |
400 | afs_osi_Wakeup((int *)afs_CacheTruncateDaemon); | |
401 | } else if (!afs_TruncateDaemonRunning | |
402 | && afs_blocksDiscarded > CM_MAXDISCARDEDCHUNKS) { | |
403 | afs_osi_Wakeup((int *)afs_CacheTruncateDaemon); | |
404 | } | |
405 | } | |
406 | ||
407 | /*! | |
408 | * /struct CTD_stats | |
409 | * | |
410 | * Keep statistics on run time for afs_CacheTruncateDaemon. This is a | |
411 | * struct so we need only export one symbol for AIX. | |
412 | */ | |
413 | static struct CTD_stats { | |
414 | osi_timeval_t CTD_beforeSleep; | |
415 | osi_timeval_t CTD_afterSleep; | |
416 | osi_timeval_t CTD_sleepTime; | |
417 | osi_timeval_t CTD_runTime; | |
418 | int CTD_nSleeps; | |
419 | } CTD_stats; | |
420 | ||
421 | u_int afs_min_cache = 0; | |
422 | ||
423 | /*! | |
424 | * If there are waiters for the cache to drain, wake them if | |
425 | * the number of free or discarded cache blocks reaches the | |
426 | * CM_CACHESIZEDDRAINEDPCT limit. | |
427 | * | |
428 | * \note Environment: | |
429 | * This routine must be called with the afs_xdcache lock held | |
430 | * (in write mode). | |
431 | */ | |
432 | static void | |
433 | afs_WakeCacheWaitersIfDrained(void) | |
434 | { | |
435 | if (afs_WaitForCacheDrain) { | |
436 | if ((afs_blocksUsed - afs_blocksDiscarded) <= | |
437 | PERCENT(CM_CACHESIZEDRAINEDPCT, afs_cacheBlocks)) { | |
438 | afs_WaitForCacheDrain = 0; | |
439 | afs_osi_Wakeup(&afs_WaitForCacheDrain); | |
440 | } | |
441 | } | |
442 | } | |
443 | ||
444 | /*! | |
445 | * Keeps the cache clean and free by truncating uneeded files, when used. | |
446 | * \param | |
447 | * \return | |
448 | */ | |
449 | void | |
450 | afs_CacheTruncateDaemon(void) | |
451 | { | |
452 | osi_timeval_t CTD_tmpTime; | |
453 | u_int counter; | |
454 | u_int cb_lowat; | |
455 | u_int dc_hiwat = | |
456 | PERCENT((100 - CM_DCACHECOUNTFREEPCT + CM_DCACHEEXTRAPCT), afs_cacheFiles); | |
457 | afs_min_cache = | |
458 | (((10 * AFS_CHUNKSIZE(0)) + afs_fsfragsize) & ~afs_fsfragsize) >> 10; | |
459 | ||
460 | osi_GetuTime(&CTD_stats.CTD_afterSleep); | |
461 | afs_TruncateDaemonRunning = 1; | |
462 | while (1) { | |
463 | cb_lowat = PERCENT((CM_DCACHESPACEFREEPCT - CM_DCACHEEXTRAPCT), afs_cacheBlocks); | |
464 | ObtainWriteLock(&afs_xdcache, 266); | |
465 | if (afs_CacheTooFull || afs_WaitForCacheDrain) { | |
466 | int space_needed, slots_needed; | |
467 | /* if we get woken up, we should try to clean something out */ | |
468 | for (counter = 0; counter < 10; counter++) { | |
469 | space_needed = | |
470 | afs_blocksUsed - afs_blocksDiscarded - cb_lowat; | |
471 | if (space_needed < 0) | |
472 | space_needed = 0; | |
473 | slots_needed = | |
474 | dc_hiwat - afs_freeDCCount - afs_discardDCCount; | |
475 | if (slots_needed < 0) | |
476 | slots_needed = 0; | |
477 | if (slots_needed || space_needed) | |
478 | afs_GetDownD(slots_needed, &space_needed, 0); | |
479 | if ((space_needed <= 0) && (slots_needed <= 0)) { | |
480 | break; | |
481 | } | |
482 | if (afs_termState == AFSOP_STOP_TRUNCDAEMON) | |
483 | break; | |
484 | } | |
485 | if (!afs_CacheIsTooFull()) { | |
486 | afs_CacheTooFull = 0; | |
487 | afs_WakeCacheWaitersIfDrained(); | |
488 | } | |
489 | } /* end of cache cleanup */ | |
490 | ReleaseWriteLock(&afs_xdcache); | |
491 | ||
492 | /* | |
493 | * This is a defensive check to try to avoid starving threads | |
494 | * that may need the global lock so thay can help free some | |
495 | * cache space. If this thread won't be sleeping or truncating | |
496 | * any cache files then give up the global lock so other | |
497 | * threads get a chance to run. | |
498 | */ | |
499 | if ((afs_termState != AFSOP_STOP_TRUNCDAEMON) && afs_CacheTooFull | |
500 | && (!afs_blocksDiscarded || afs_WaitForCacheDrain)) { | |
501 | afs_osi_Wait(100, 0, 0); /* 100 milliseconds */ | |
502 | } | |
503 | ||
504 | /* | |
505 | * This is where we free the discarded cache elements. | |
506 | */ | |
507 | while (afs_blocksDiscarded && !afs_WaitForCacheDrain | |
508 | && (afs_termState != AFSOP_STOP_TRUNCDAEMON)) { | |
509 | int code = afs_FreeDiscardedDCache(); | |
510 | if (code) { | |
511 | /* If we can't free any discarded dcache entries, that's okay. | |
512 | * We're just doing this in the background; if someone needs | |
513 | * discarded entries freed, they will try it themselves and/or | |
514 | * signal us that the cache is too full. In any case, we'll | |
515 | * try doing this again the next time we run through the loop. | |
516 | */ | |
517 | break; | |
518 | } | |
519 | } | |
520 | ||
521 | /* See if we need to continue to run. Someone may have | |
522 | * signalled us while we were executing. | |
523 | */ | |
524 | if (!afs_WaitForCacheDrain && !afs_CacheTooFull | |
525 | && (afs_termState != AFSOP_STOP_TRUNCDAEMON)) { | |
526 | /* Collect statistics on truncate daemon. */ | |
527 | CTD_stats.CTD_nSleeps++; | |
528 | osi_GetuTime(&CTD_stats.CTD_beforeSleep); | |
529 | afs_stats_GetDiff(CTD_tmpTime, CTD_stats.CTD_afterSleep, | |
530 | CTD_stats.CTD_beforeSleep); | |
531 | afs_stats_AddTo(CTD_stats.CTD_runTime, CTD_tmpTime); | |
532 | ||
533 | afs_TruncateDaemonRunning = 0; | |
534 | afs_osi_Sleep((int *)afs_CacheTruncateDaemon); | |
535 | afs_TruncateDaemonRunning = 1; | |
536 | ||
537 | osi_GetuTime(&CTD_stats.CTD_afterSleep); | |
538 | afs_stats_GetDiff(CTD_tmpTime, CTD_stats.CTD_beforeSleep, | |
539 | CTD_stats.CTD_afterSleep); | |
540 | afs_stats_AddTo(CTD_stats.CTD_sleepTime, CTD_tmpTime); | |
541 | } | |
542 | if (afs_termState == AFSOP_STOP_TRUNCDAEMON) { | |
543 | afs_termState = AFSOP_STOP_AFSDB; | |
544 | afs_osi_Wakeup(&afs_termState); | |
545 | break; | |
546 | } | |
547 | } | |
548 | } | |
549 | ||
550 | ||
551 | /*! | |
552 | * Make adjustment for the new size in the disk cache entry | |
553 | * | |
554 | * \note Major Assumptions Here: | |
555 | * Assumes that frag size is an integral power of two, less one, | |
556 | * and that this is a two's complement machine. I don't | |
557 | * know of any filesystems which violate this assumption... | |
558 | * | |
559 | * \param adc Ptr to dcache entry. | |
560 | * \param anewsize New size desired. | |
561 | * | |
562 | */ | |
563 | ||
564 | void | |
565 | afs_AdjustSize(struct dcache *adc, afs_int32 newSize) | |
566 | { | |
567 | afs_int32 oldSize; | |
568 | ||
569 | AFS_STATCNT(afs_AdjustSize); | |
570 | ||
571 | if (newSize > afs_OtherCSize && !(adc->f.fid.Fid.Vnode & 1)) { | |
572 | /* No non-dir cache files should be larger than the chunk size. | |
573 | * (Directory blobs are fetched in a single chunk file, so directories | |
574 | * can be larger.) If someone is requesting that a chunk is larger than | |
575 | * the chunk size, something strange is happening. Log a message about | |
576 | * it, to give a hint to subsequent strange behavior, if any occurs. */ | |
577 | static int warned; | |
578 | if (!warned) { | |
579 | warned = 1; | |
580 | afs_warn("afs: Warning: dcache %d is very large (%d > %d). This " | |
581 | "should not happen, but trying to continue regardless. If " | |
582 | "AFS starts hanging or behaving strangely, this might be " | |
583 | "why.\n", | |
584 | adc->index, newSize, afs_OtherCSize); | |
585 | } | |
586 | } | |
587 | ||
588 | adc->dflags |= DFEntryMod; | |
589 | oldSize = ((adc->f.chunkBytes + afs_fsfragsize) ^ afs_fsfragsize) >> 10; /* round up */ | |
590 | adc->f.chunkBytes = newSize; | |
591 | if (!newSize) | |
592 | adc->validPos = 0; | |
593 | newSize = ((newSize + afs_fsfragsize) ^ afs_fsfragsize) >> 10; /* round up */ | |
594 | afs_DCAdjustSize(adc, oldSize, newSize); | |
595 | if ((newSize > oldSize) && !AFS_IS_DISCONNECTED) { | |
596 | ||
597 | /* We're growing the file, wakeup the daemon */ | |
598 | afs_MaybeWakeupTruncateDaemon(); | |
599 | } | |
600 | afs_blocksUsed += (newSize - oldSize); | |
601 | afs_stats_cmperf.cacheBlocksInUse = afs_blocksUsed; /* XXX */ | |
602 | } | |
603 | ||
604 | ||
605 | /*! | |
606 | * This routine is responsible for moving at least one entry (but up | |
607 | * to some number of them) from the LRU queue to the free queue. | |
608 | * | |
609 | * \param anumber Number of entries that should ideally be moved. | |
610 | * \param aneedSpace How much space we need (1K blocks); | |
611 | * | |
612 | * \note Environment: | |
613 | * The anumber parameter is just a hint; at least one entry MUST be | |
614 | * moved, or we'll panic. We must be called with afs_xdcache | |
615 | * write-locked. We should try to satisfy both anumber and aneedspace, | |
616 | * whichever is more demanding - need to do several things: | |
617 | * 1. only grab up to anumber victims if aneedSpace <= 0, not | |
618 | * the whole set of MAXATONCE. | |
619 | * 2. dynamically choose MAXATONCE to reflect severity of | |
620 | * demand: something like (*aneedSpace >> (logChunk - 9)) | |
621 | * | |
622 | * \note N.B. if we're called with aneedSpace <= 0 and anumber > 0, that | |
623 | * indicates that the cache is not properly configured/tuned or | |
624 | * something. We should be able to automatically correct that problem. | |
625 | */ | |
626 | ||
627 | #define MAXATONCE 16 /* max we can obtain at once */ | |
628 | static void | |
629 | afs_GetDownD(int anumber, int *aneedSpace, afs_int32 buckethint) | |
630 | { | |
631 | ||
632 | struct dcache *tdc; | |
633 | struct VenusFid *afid; | |
634 | afs_int32 i, j; | |
635 | afs_hyper_t vtime; | |
636 | int skip, phase; | |
637 | struct vcache *tvc; | |
638 | afs_uint32 victims[MAXATONCE]; | |
639 | struct dcache *victimDCs[MAXATONCE]; | |
640 | afs_hyper_t victimTimes[MAXATONCE]; /* youngest (largest LRU time) first */ | |
641 | afs_uint32 victimPtr; /* next free item in victim arrays */ | |
642 | afs_hyper_t maxVictimTime; /* youngest (largest LRU time) victim */ | |
643 | afs_uint32 maxVictimPtr; /* where it is */ | |
644 | int discard; | |
645 | int curbucket; | |
646 | ||
647 | AFS_STATCNT(afs_GetDownD); | |
648 | ||
649 | if (CheckLock(&afs_xdcache) != -1) | |
650 | osi_Panic("getdownd nolock"); | |
651 | /* decrement anumber first for all dudes in free list */ | |
652 | /* SHOULD always decrement anumber first, even if aneedSpace >0, | |
653 | * because we should try to free space even if anumber <=0 */ | |
654 | if (!aneedSpace || *aneedSpace <= 0) { | |
655 | anumber -= afs_freeDCCount; | |
656 | if (anumber <= 0) { | |
657 | return; /* enough already free */ | |
658 | } | |
659 | } | |
660 | ||
661 | /* bounds check parameter */ | |
662 | if (anumber > MAXATONCE) | |
663 | anumber = MAXATONCE; /* all we can do */ | |
664 | ||
665 | /* rewrite so phases include a better eligiblity for gc test*/ | |
666 | /* | |
667 | * The phase variable manages reclaims. Set to 0, the first pass, | |
668 | * we don't reclaim active entries, or other than target bucket. | |
669 | * Set to 1, we reclaim even active ones in target bucket. | |
670 | * Set to 2, we reclaim any inactive one. | |
671 | * Set to 3, we reclaim even active ones. On Solaris, we also reclaim | |
672 | * entries whose corresponding vcache has a nonempty multiPage list, when | |
673 | * possible. | |
674 | */ | |
675 | if (splitdcache) { | |
676 | phase = 0; | |
677 | } else { | |
678 | phase = 4; | |
679 | } | |
680 | ||
681 | for (i = 0; i < afs_cacheFiles; i++) | |
682 | /* turn off all flags */ | |
683 | afs_indexFlags[i] &= ~IFFlag; | |
684 | ||
685 | while (anumber > 0 || (aneedSpace && *aneedSpace > 0)) { | |
686 | /* find oldest entries for reclamation */ | |
687 | maxVictimPtr = victimPtr = 0; | |
688 | hzero(maxVictimTime); | |
689 | curbucket = afs_DCWhichBucket(phase, buckethint); | |
690 | /* select victims from access time array */ | |
691 | for (i = 0; i < afs_cacheFiles; i++) { | |
692 | if (afs_indexFlags[i] & (IFDataMod | IFFree | IFDiscarded)) { | |
693 | /* skip if dirty or already free */ | |
694 | continue; | |
695 | } | |
696 | tdc = afs_indexTable[i]; | |
697 | if (tdc && (curbucket != tdc->bucket) && (phase < 4)) | |
698 | { | |
699 | /* Wrong bucket; can't use it! */ | |
700 | continue; | |
701 | } | |
702 | if (tdc && (tdc->refCount != 0)) { | |
703 | /* Referenced; can't use it! */ | |
704 | continue; | |
705 | } | |
706 | hset(vtime, afs_indexTimes[i]); | |
707 | ||
708 | /* if we've already looked at this one, skip it */ | |
709 | if (afs_indexFlags[i] & IFFlag) | |
710 | continue; | |
711 | ||
712 | if (victimPtr < MAXATONCE) { | |
713 | /* if there's at least one free victim slot left */ | |
714 | victims[victimPtr] = i; | |
715 | hset(victimTimes[victimPtr], vtime); | |
716 | if (hcmp(vtime, maxVictimTime) > 0) { | |
717 | hset(maxVictimTime, vtime); | |
718 | maxVictimPtr = victimPtr; | |
719 | } | |
720 | victimPtr++; | |
721 | } else if (hcmp(vtime, maxVictimTime) < 0) { | |
722 | /* | |
723 | * We're older than youngest victim, so we replace at | |
724 | * least one victim | |
725 | */ | |
726 | /* find youngest (largest LRU) victim */ | |
727 | j = maxVictimPtr; | |
728 | if (j == victimPtr) | |
729 | osi_Panic("getdownd local"); | |
730 | victims[j] = i; | |
731 | hset(victimTimes[j], vtime); | |
732 | /* recompute maxVictimTime */ | |
733 | hset(maxVictimTime, vtime); | |
734 | for (j = 0; j < victimPtr; j++) | |
735 | if (hcmp(maxVictimTime, victimTimes[j]) < 0) { | |
736 | hset(maxVictimTime, victimTimes[j]); | |
737 | maxVictimPtr = j; | |
738 | } | |
739 | } | |
740 | } /* big for loop */ | |
741 | ||
742 | /* now really reclaim the victims */ | |
743 | j = 0; /* flag to track if we actually got any of the victims */ | |
744 | /* first, hold all the victims, since we're going to release the lock | |
745 | * during the truncate operation. | |
746 | */ | |
747 | for (i = 0; i < victimPtr; i++) { | |
748 | tdc = afs_GetValidDSlot(victims[i]); | |
749 | /* We got tdc->tlock(R) here */ | |
750 | if (tdc && tdc->refCount == 1) | |
751 | victimDCs[i] = tdc; | |
752 | else | |
753 | victimDCs[i] = 0; | |
754 | if (tdc) { | |
755 | ReleaseReadLock(&tdc->tlock); | |
756 | if (!victimDCs[i]) | |
757 | afs_PutDCache(tdc); | |
758 | } | |
759 | } | |
760 | for (i = 0; i < victimPtr; i++) { | |
761 | /* q is first elt in dcache entry */ | |
762 | tdc = victimDCs[i]; | |
763 | /* now, since we're dropping the afs_xdcache lock below, we | |
764 | * have to verify, before proceeding, that there are no other | |
765 | * references to this dcache entry, even now. Note that we | |
766 | * compare with 1, since we bumped it above when we called | |
767 | * afs_GetValidDSlot to preserve the entry's identity. | |
768 | */ | |
769 | if (tdc && tdc->refCount == 1) { | |
770 | unsigned char chunkFlags; | |
771 | afs_size_t tchunkoffset = 0; | |
772 | afid = &tdc->f.fid; | |
773 | /* xdcache is lower than the xvcache lock */ | |
774 | ReleaseWriteLock(&afs_xdcache); | |
775 | ObtainReadLock(&afs_xvcache); | |
776 | tvc = afs_FindVCache(afid, 0, 0 /* no stats, no vlru */ ); | |
777 | ReleaseReadLock(&afs_xvcache); | |
778 | ObtainWriteLock(&afs_xdcache, 527); | |
779 | skip = 0; | |
780 | if (tdc->refCount > 1) | |
781 | skip = 1; | |
782 | if (tvc) { | |
783 | tchunkoffset = AFS_CHUNKTOBASE(tdc->f.chunk); | |
784 | chunkFlags = afs_indexFlags[tdc->index]; | |
785 | if (((phase & 1) == 0) && osi_Active(tvc)) | |
786 | skip = 1; | |
787 | if (((phase & 1) == 1) && osi_Active(tvc) | |
788 | && (tvc->f.states & CDCLock) | |
789 | && (chunkFlags & IFAnyPages)) | |
790 | skip = 1; | |
791 | if (chunkFlags & IFDataMod) | |
792 | skip = 1; | |
793 | afs_Trace4(afs_iclSetp, CM_TRACE_GETDOWND, | |
794 | ICL_TYPE_POINTER, tvc, ICL_TYPE_INT32, skip, | |
795 | ICL_TYPE_INT32, tdc->index, ICL_TYPE_OFFSET, | |
796 | ICL_HANDLE_OFFSET(tchunkoffset)); | |
797 | ||
798 | #if defined(AFS_SUN5_ENV) | |
799 | /* | |
800 | * Now we try to invalidate pages. We do this only for | |
801 | * Solaris. For other platforms, it's OK to recycle a | |
802 | * dcache entry out from under a page, because the strategy | |
803 | * function can call afs_GetDCache(). | |
804 | */ | |
805 | if (!skip && (chunkFlags & IFAnyPages)) { | |
806 | int code; | |
807 | ||
808 | ReleaseWriteLock(&afs_xdcache); | |
809 | ObtainWriteLock(&tvc->vlock, 543); | |
810 | if (!QEmpty(&tvc->multiPage)) { | |
811 | if (phase < 3 || osi_VM_MultiPageConflict(tvc, tdc)) { | |
812 | skip = 1; | |
813 | goto endmultipage; | |
814 | } | |
815 | } | |
816 | /* block locking pages */ | |
817 | tvc->vstates |= VPageCleaning; | |
818 | /* block getting new pages */ | |
819 | tvc->activeV++; | |
820 | ReleaseWriteLock(&tvc->vlock); | |
821 | /* One last recheck */ | |
822 | ObtainWriteLock(&afs_xdcache, 333); | |
823 | chunkFlags = afs_indexFlags[tdc->index]; | |
824 | if (tdc->refCount > 1 || (chunkFlags & IFDataMod) | |
825 | || (osi_Active(tvc) && (tvc->f.states & CDCLock) | |
826 | && (chunkFlags & IFAnyPages))) { | |
827 | skip = 1; | |
828 | ReleaseWriteLock(&afs_xdcache); | |
829 | goto endputpage; | |
830 | } | |
831 | ReleaseWriteLock(&afs_xdcache); | |
832 | ||
833 | code = osi_VM_GetDownD(tvc, tdc); | |
834 | ||
835 | ObtainWriteLock(&afs_xdcache, 269); | |
836 | /* we actually removed all pages, clean and dirty */ | |
837 | if (code == 0) { | |
838 | afs_indexFlags[tdc->index] &= | |
839 | ~(IFDirtyPages | IFAnyPages); | |
840 | } else | |
841 | skip = 1; | |
842 | ReleaseWriteLock(&afs_xdcache); | |
843 | endputpage: | |
844 | ObtainWriteLock(&tvc->vlock, 544); | |
845 | if (--tvc->activeV == 0 | |
846 | && (tvc->vstates & VRevokeWait)) { | |
847 | tvc->vstates &= ~VRevokeWait; | |
848 | afs_osi_Wakeup((char *)&tvc->vstates); | |
849 | ||
850 | } | |
851 | if (tvc->vstates & VPageCleaning) { | |
852 | tvc->vstates &= ~VPageCleaning; | |
853 | afs_osi_Wakeup((char *)&tvc->vstates); | |
854 | } | |
855 | endmultipage: | |
856 | ReleaseWriteLock(&tvc->vlock); | |
857 | } else | |
858 | #endif /* AFS_SUN5_ENV */ | |
859 | { | |
860 | ReleaseWriteLock(&afs_xdcache); | |
861 | } | |
862 | ||
863 | afs_PutVCache(tvc); /*XXX was AFS_FAST_RELE?*/ | |
864 | ObtainWriteLock(&afs_xdcache, 528); | |
865 | if (afs_indexFlags[tdc->index] & | |
866 | (IFDataMod | IFDirtyPages | IFAnyPages)) | |
867 | skip = 1; | |
868 | if (tdc->refCount > 1) | |
869 | skip = 1; | |
870 | } | |
871 | #if defined(AFS_SUN5_ENV) | |
872 | else { | |
873 | /* no vnode, so IFDirtyPages is spurious (we don't | |
874 | * sweep dcaches on vnode recycling, so we can have | |
875 | * DIRTYPAGES set even when all pages are gone). Just | |
876 | * clear the flag. | |
877 | * Hold vcache lock to prevent vnode from being | |
878 | * created while we're clearing IFDirtyPages. | |
879 | */ | |
880 | afs_indexFlags[tdc->index] &= | |
881 | ~(IFDirtyPages | IFAnyPages); | |
882 | } | |
883 | #endif | |
884 | if (skip) { | |
885 | /* skip this guy and mark him as recently used */ | |
886 | afs_indexFlags[tdc->index] |= IFFlag; | |
887 | afs_Trace4(afs_iclSetp, CM_TRACE_GETDOWND, | |
888 | ICL_TYPE_POINTER, tvc, ICL_TYPE_INT32, 2, | |
889 | ICL_TYPE_INT32, tdc->index, ICL_TYPE_OFFSET, | |
890 | ICL_HANDLE_OFFSET(tchunkoffset)); | |
891 | } else { | |
892 | /* flush this dude from the data cache and reclaim; | |
893 | * first, make sure no one will care that we damage | |
894 | * it, by removing it from all hash tables. Then, | |
895 | * melt it down for parts. Note that any concurrent | |
896 | * (new possibility!) calls to GetDownD won't touch | |
897 | * this guy because his reference count is > 0. */ | |
898 | afs_Trace4(afs_iclSetp, CM_TRACE_GETDOWND, | |
899 | ICL_TYPE_POINTER, tvc, ICL_TYPE_INT32, 3, | |
900 | ICL_TYPE_INT32, tdc->index, ICL_TYPE_OFFSET, | |
901 | ICL_HANDLE_OFFSET(tchunkoffset)); | |
902 | AFS_STATCNT(afs_gget); | |
903 | afs_HashOutDCache(tdc, 1); | |
904 | if (tdc->f.chunkBytes != 0) { | |
905 | discard = 1; | |
906 | if (aneedSpace) | |
907 | *aneedSpace -= | |
908 | (tdc->f.chunkBytes + afs_fsfragsize) >> 10; | |
909 | } else { | |
910 | discard = 0; | |
911 | } | |
912 | if (discard) { | |
913 | afs_DiscardDCache(tdc); | |
914 | } else { | |
915 | afs_FreeDCache(tdc); | |
916 | } | |
917 | anumber--; | |
918 | j = 1; /* we reclaimed at least one victim */ | |
919 | } | |
920 | } | |
921 | if (tdc) | |
922 | afs_PutDCache(tdc); | |
923 | } /* end of for victims loop */ | |
924 | ||
925 | if (phase < 5) { | |
926 | /* Phase is 0 and no one was found, so try phase 1 (ignore | |
927 | * osi_Active flag) */ | |
928 | if (j == 0) { | |
929 | phase++; | |
930 | for (i = 0; i < afs_cacheFiles; i++) | |
931 | /* turn off all flags */ | |
932 | afs_indexFlags[i] &= ~IFFlag; | |
933 | } | |
934 | } else { | |
935 | /* found no one in phases 0-5, we're hosed */ | |
936 | if (victimPtr == 0) | |
937 | break; | |
938 | } | |
939 | } /* big while loop */ | |
940 | ||
941 | return; | |
942 | ||
943 | } /*afs_GetDownD */ | |
944 | ||
945 | ||
946 | /*! | |
947 | * Remove adc from any hash tables that would allow it to be located | |
948 | * again by afs_FindDCache or afs_GetDCache. | |
949 | * | |
950 | * \param adc Pointer to dcache entry to remove from hash tables. | |
951 | * | |
952 | * \note Locks: Must have the afs_xdcache lock write-locked to call this function. | |
953 | * | |
954 | */ | |
955 | int | |
956 | afs_HashOutDCache(struct dcache *adc, int zap) | |
957 | { | |
958 | int i, us; | |
959 | ||
960 | AFS_STATCNT(afs_glink); | |
961 | if (zap) | |
962 | /* we know this guy's in the LRUQ. We'll move dude into DCQ below */ | |
963 | DZap(adc); | |
964 | /* if this guy is in the hash table, pull him out */ | |
965 | if (adc->f.fid.Fid.Volume != 0) { | |
966 | /* remove entry from first hash chains */ | |
967 | i = DCHash(&adc->f.fid, adc->f.chunk); | |
968 | us = afs_dchashTbl[i]; | |
969 | if (us == adc->index) { | |
970 | /* first dude in the list */ | |
971 | afs_dchashTbl[i] = afs_dcnextTbl[adc->index]; | |
972 | } else { | |
973 | /* somewhere on the chain */ | |
974 | while (us != NULLIDX) { | |
975 | if (afs_dcnextTbl[us] == adc->index) { | |
976 | /* found item pointing at the one to delete */ | |
977 | afs_dcnextTbl[us] = afs_dcnextTbl[adc->index]; | |
978 | break; | |
979 | } | |
980 | us = afs_dcnextTbl[us]; | |
981 | } | |
982 | if (us == NULLIDX) | |
983 | osi_Panic("dcache hc"); | |
984 | } | |
985 | /* remove entry from *other* hash chain */ | |
986 | i = DVHash(&adc->f.fid); | |
987 | us = afs_dvhashTbl[i]; | |
988 | if (us == adc->index) { | |
989 | /* first dude in the list */ | |
990 | afs_dvhashTbl[i] = afs_dvnextTbl[adc->index]; | |
991 | } else { | |
992 | /* somewhere on the chain */ | |
993 | while (us != NULLIDX) { | |
994 | if (afs_dvnextTbl[us] == adc->index) { | |
995 | /* found item pointing at the one to delete */ | |
996 | afs_dvnextTbl[us] = afs_dvnextTbl[adc->index]; | |
997 | break; | |
998 | } | |
999 | us = afs_dvnextTbl[us]; | |
1000 | } | |
1001 | if (us == NULLIDX) | |
1002 | osi_Panic("dcache hv"); | |
1003 | } | |
1004 | } | |
1005 | ||
1006 | if (zap) { | |
1007 | /* prevent entry from being found on a reboot (it is already out of | |
1008 | * the hash table, but after a crash, we just look at fid fields of | |
1009 | * stable (old) entries). | |
1010 | */ | |
1011 | adc->f.fid.Fid.Volume = 0; /* invalid */ | |
1012 | ||
1013 | /* mark entry as modified */ | |
1014 | adc->dflags |= DFEntryMod; | |
1015 | } | |
1016 | ||
1017 | /* all done */ | |
1018 | return 0; | |
1019 | } /*afs_HashOutDCache */ | |
1020 | ||
1021 | /*! | |
1022 | * Flush the given dcache entry, pulling it from hash chains | |
1023 | * and truncating the associated cache file. | |
1024 | * | |
1025 | * \param adc Ptr to dcache entry to flush. | |
1026 | * | |
1027 | * \note Environment: | |
1028 | * This routine must be called with the afs_xdcache lock held | |
1029 | * (in write mode). | |
1030 | */ | |
1031 | void | |
1032 | afs_FlushDCache(struct dcache *adc) | |
1033 | { | |
1034 | AFS_STATCNT(afs_FlushDCache); | |
1035 | /* | |
1036 | * Bump the number of cache files flushed. | |
1037 | */ | |
1038 | afs_stats_cmperf.cacheFlushes++; | |
1039 | ||
1040 | /* remove from all hash tables */ | |
1041 | afs_HashOutDCache(adc, 1); | |
1042 | ||
1043 | /* Free its space; special case null operation, since truncate operation | |
1044 | * in UFS is slow even in this case, and this allows us to pre-truncate | |
1045 | * these files at more convenient times with fewer locks set | |
1046 | * (see afs_GetDownD). | |
1047 | */ | |
1048 | if (adc->f.chunkBytes != 0) { | |
1049 | afs_DiscardDCache(adc); | |
1050 | afs_MaybeWakeupTruncateDaemon(); | |
1051 | } else { | |
1052 | afs_FreeDCache(adc); | |
1053 | } | |
1054 | } /*afs_FlushDCache */ | |
1055 | ||
1056 | ||
1057 | /*! | |
1058 | * Put a dcache entry on the free dcache entry list. | |
1059 | * | |
1060 | * \param adc dcache entry to free. | |
1061 | * | |
1062 | * \note Environment: called with afs_xdcache lock write-locked. | |
1063 | */ | |
1064 | static void | |
1065 | afs_FreeDCache(struct dcache *adc) | |
1066 | { | |
1067 | /* Thread on free list, update free list count and mark entry as | |
1068 | * freed in its indexFlags element. Also, ensure DCache entry gets | |
1069 | * written out (set DFEntryMod). | |
1070 | */ | |
1071 | ||
1072 | afs_dvnextTbl[adc->index] = afs_freeDCList; | |
1073 | afs_freeDCList = adc->index; | |
1074 | afs_freeDCCount++; | |
1075 | afs_indexFlags[adc->index] |= IFFree; | |
1076 | adc->dflags |= DFEntryMod; | |
1077 | ||
1078 | afs_WakeCacheWaitersIfDrained(); | |
1079 | } /* afs_FreeDCache */ | |
1080 | ||
1081 | /*! | |
1082 | * Discard the cache element by moving it to the discardDCList. | |
1083 | * This puts the cache element into a quasi-freed state, where | |
1084 | * the space may be reused, but the file has not been truncated. | |
1085 | * | |
1086 | * \note Major Assumptions Here: | |
1087 | * Assumes that frag size is an integral power of two, less one, | |
1088 | * and that this is a two's complement machine. I don't | |
1089 | * know of any filesystems which violate this assumption... | |
1090 | * | |
1091 | * \param adr Ptr to dcache entry. | |
1092 | * | |
1093 | * \note Environment: | |
1094 | * Must be called with afs_xdcache write-locked. | |
1095 | */ | |
1096 | ||
1097 | static void | |
1098 | afs_DiscardDCache(struct dcache *adc) | |
1099 | { | |
1100 | afs_int32 size; | |
1101 | ||
1102 | AFS_STATCNT(afs_DiscardDCache); | |
1103 | ||
1104 | osi_Assert(adc->refCount == 1); | |
1105 | ||
1106 | size = ((adc->f.chunkBytes + afs_fsfragsize) ^ afs_fsfragsize) >> 10; /* round up */ | |
1107 | afs_blocksDiscarded += size; | |
1108 | afs_stats_cmperf.cacheBlocksDiscarded = afs_blocksDiscarded; | |
1109 | ||
1110 | afs_dvnextTbl[adc->index] = afs_discardDCList; | |
1111 | afs_discardDCList = adc->index; | |
1112 | afs_discardDCCount++; | |
1113 | ||
1114 | adc->f.fid.Fid.Volume = 0; | |
1115 | adc->dflags |= DFEntryMod; | |
1116 | afs_indexFlags[adc->index] |= IFDiscarded; | |
1117 | ||
1118 | afs_WakeCacheWaitersIfDrained(); | |
1119 | } /*afs_DiscardDCache */ | |
1120 | ||
1121 | /** | |
1122 | * Get a dcache entry from the discard or free list | |
1123 | * | |
1124 | * @param[in] indexp A pointer to the head of the dcache free list or discard | |
1125 | * list (afs_freeDCList, or afs_discardDCList) | |
1126 | * | |
1127 | * @return A dcache from that list, or NULL if none could be retrieved. | |
1128 | * | |
1129 | * @pre afs_xdcache is write-locked | |
1130 | */ | |
1131 | static struct dcache * | |
1132 | afs_GetDSlotFromList(afs_int32 *indexp) | |
1133 | { | |
1134 | struct dcache *tdc; | |
1135 | ||
1136 | if (*indexp != NULLIDX) { | |
1137 | tdc = afs_GetUnusedDSlot(*indexp); | |
1138 | if (tdc) { | |
1139 | osi_Assert(tdc->refCount == 1); | |
1140 | ReleaseReadLock(&tdc->tlock); | |
1141 | *indexp = afs_dvnextTbl[tdc->index]; | |
1142 | afs_dvnextTbl[tdc->index] = NULLIDX; | |
1143 | return tdc; | |
1144 | } | |
1145 | } | |
1146 | return NULL; | |
1147 | } | |
1148 | ||
1149 | /*! | |
1150 | * Free the next element on the list of discarded cache elements. | |
1151 | * | |
1152 | * Returns -1 if we encountered an error preventing us from freeing a | |
1153 | * discarded dcache, or 0 on success. | |
1154 | */ | |
1155 | static int | |
1156 | afs_FreeDiscardedDCache(void) | |
1157 | { | |
1158 | struct dcache *tdc; | |
1159 | struct osi_file *tfile; | |
1160 | afs_int32 size; | |
1161 | ||
1162 | AFS_STATCNT(afs_FreeDiscardedDCache); | |
1163 | ||
1164 | ObtainWriteLock(&afs_xdcache, 510); | |
1165 | if (!afs_blocksDiscarded) { | |
1166 | ReleaseWriteLock(&afs_xdcache); | |
1167 | return 0; | |
1168 | } | |
1169 | ||
1170 | /* | |
1171 | * Get an entry from the list of discarded cache elements | |
1172 | */ | |
1173 | tdc = afs_GetDSlotFromList(&afs_discardDCList); | |
1174 | if (!tdc) { | |
1175 | ReleaseWriteLock(&afs_xdcache); | |
1176 | return -1; | |
1177 | } | |
1178 | ||
1179 | afs_discardDCCount--; | |
1180 | size = ((tdc->f.chunkBytes + afs_fsfragsize) ^ afs_fsfragsize) >> 10; /* round up */ | |
1181 | afs_blocksDiscarded -= size; | |
1182 | afs_stats_cmperf.cacheBlocksDiscarded = afs_blocksDiscarded; | |
1183 | /* We can lock because we just took it off the free list */ | |
1184 | ObtainWriteLock(&tdc->lock, 626); | |
1185 | ReleaseWriteLock(&afs_xdcache); | |
1186 | ||
1187 | /* | |
1188 | * Truncate the element to reclaim its space | |
1189 | */ | |
1190 | tfile = afs_CFileOpen(&tdc->f.inode); | |
1191 | osi_Assert(tfile); | |
1192 | afs_CFileTruncate(tfile, 0); | |
1193 | afs_CFileClose(tfile); | |
1194 | afs_AdjustSize(tdc, 0); | |
1195 | afs_DCMoveBucket(tdc, 0, 0); | |
1196 | ||
1197 | /* | |
1198 | * Free the element we just truncated | |
1199 | */ | |
1200 | ObtainWriteLock(&afs_xdcache, 511); | |
1201 | afs_indexFlags[tdc->index] &= ~IFDiscarded; | |
1202 | afs_FreeDCache(tdc); | |
1203 | tdc->f.states &= ~(DRO|DBackup|DRW); | |
1204 | ReleaseWriteLock(&tdc->lock); | |
1205 | afs_PutDCache(tdc); | |
1206 | ReleaseWriteLock(&afs_xdcache); | |
1207 | ||
1208 | return 0; | |
1209 | } | |
1210 | ||
1211 | /*! | |
1212 | * Free as many entries from the list of discarded cache elements | |
1213 | * as we need to get the free space down below CM_WAITFORDRAINPCT (98%). | |
1214 | * | |
1215 | * \return 0 | |
1216 | */ | |
1217 | int | |
1218 | afs_MaybeFreeDiscardedDCache(void) | |
1219 | { | |
1220 | ||
1221 | AFS_STATCNT(afs_MaybeFreeDiscardedDCache); | |
1222 | ||
1223 | while (afs_blocksDiscarded | |
1224 | && (afs_blocksUsed > | |
1225 | PERCENT(CM_WAITFORDRAINPCT, afs_cacheBlocks))) { | |
1226 | int code = afs_FreeDiscardedDCache(); | |
1227 | if (code) { | |
1228 | /* Callers depend on us to get the afs_blocksDiscarded count down. | |
1229 | * If we cannot do that, the callers can spin by calling us over | |
1230 | * and over. Panic for now until we can figure out something | |
1231 | * better. */ | |
1232 | osi_Panic("Error freeing discarded dcache"); | |
1233 | } | |
1234 | } | |
1235 | return 0; | |
1236 | } | |
1237 | ||
1238 | /*! | |
1239 | * Try to free up a certain number of disk slots. | |
1240 | * | |
1241 | * \param anumber Targeted number of disk slots to free up. | |
1242 | * | |
1243 | * \note Environment: | |
1244 | * Must be called with afs_xdcache write-locked. | |
1245 | * | |
1246 | */ | |
1247 | static void | |
1248 | afs_GetDownDSlot(int anumber) | |
1249 | { | |
1250 | struct afs_q *tq, *nq; | |
1251 | struct dcache *tdc; | |
1252 | int ix; | |
1253 | unsigned int cnt; | |
1254 | ||
1255 | AFS_STATCNT(afs_GetDownDSlot); | |
1256 | if (cacheDiskType == AFS_FCACHE_TYPE_MEM) | |
1257 | osi_Panic("diskless getdowndslot"); | |
1258 | ||
1259 | if (CheckLock(&afs_xdcache) != -1) | |
1260 | osi_Panic("getdowndslot nolock"); | |
1261 | ||
1262 | /* decrement anumber first for all dudes in free list */ | |
1263 | for (tdc = afs_freeDSList; tdc; tdc = (struct dcache *)tdc->lruq.next) | |
1264 | anumber--; | |
1265 | if (anumber <= 0) | |
1266 | return; /* enough already free */ | |
1267 | ||
1268 | for (cnt = 0, tq = afs_DLRU.prev; tq != &afs_DLRU && anumber > 0; | |
1269 | tq = nq, cnt++) { | |
1270 | tdc = (struct dcache *)tq; /* q is first elt in dcache entry */ | |
1271 | nq = QPrev(tq); /* in case we remove it */ | |
1272 | if (tdc->refCount == 0) { | |
1273 | if ((ix = tdc->index) == NULLIDX) | |
1274 | osi_Panic("getdowndslot"); | |
1275 | ||
1276 | /* write-through if modified */ | |
1277 | if (tdc->dflags & DFEntryMod) { | |
1278 | #if defined(AFS_SGI_ENV) && defined(AFS_SGI_SHORTSTACK) | |
1279 | /* | |
1280 | * ask proxy to do this for us - we don't have the stack space | |
1281 | */ | |
1282 | while (tdc->dflags & DFEntryMod) { | |
1283 | int s; | |
1284 | AFS_GUNLOCK(); | |
1285 | s = SPLOCK(afs_sgibklock); | |
1286 | if (afs_sgibklist == NULL) { | |
1287 | /* if slot is free, grab it. */ | |
1288 | afs_sgibklist = tdc; | |
1289 | SV_SIGNAL(&afs_sgibksync); | |
1290 | } | |
1291 | /* wait for daemon to (start, then) finish. */ | |
1292 | SP_WAIT(afs_sgibklock, s, &afs_sgibkwait, PINOD); | |
1293 | AFS_GLOCK(); | |
1294 | } | |
1295 | #else | |
1296 | int code; | |
1297 | ||
1298 | code = afs_WriteDCache(tdc, 1); | |
1299 | if (code) { | |
1300 | /* | |
1301 | * We couldn't flush it at this time; return early because | |
1302 | * if afs_WriteDCache() failed once it is likely to | |
1303 | * continue failing for subsequent dcaches. | |
1304 | */ | |
1305 | return; | |
1306 | } | |
1307 | tdc->dflags &= ~DFEntryMod; | |
1308 | #endif | |
1309 | } | |
1310 | ||
1311 | /* pull the entry out of the lruq and put it on the free list */ | |
1312 | QRemove(&tdc->lruq); | |
1313 | afs_indexTable[ix] = NULL; | |
1314 | afs_indexFlags[ix] &= ~IFEverUsed; | |
1315 | tdc->index = NULLIDX; | |
1316 | tdc->lruq.next = (struct afs_q *)afs_freeDSList; | |
1317 | afs_freeDSList = tdc; | |
1318 | anumber--; | |
1319 | } | |
1320 | } | |
1321 | } /*afs_GetDownDSlot */ | |
1322 | ||
1323 | ||
1324 | /* | |
1325 | * afs_RefDCache | |
1326 | * | |
1327 | * Description: | |
1328 | * Increment the reference count on a disk cache entry, | |
1329 | * which already has a non-zero refcount. In order to | |
1330 | * increment the refcount of a zero-reference entry, you | |
1331 | * have to hold afs_xdcache. | |
1332 | * | |
1333 | * Parameters: | |
1334 | * adc : Pointer to the dcache entry to increment. | |
1335 | * | |
1336 | * Environment: | |
1337 | * Nothing interesting. | |
1338 | */ | |
1339 | int | |
1340 | afs_RefDCache(struct dcache *adc) | |
1341 | { | |
1342 | ObtainWriteLock(&adc->tlock, 627); | |
1343 | if (adc->refCount < 0) | |
1344 | osi_Panic("RefDCache: negative refcount"); | |
1345 | adc->refCount++; | |
1346 | ReleaseWriteLock(&adc->tlock); | |
1347 | return 0; | |
1348 | } | |
1349 | ||
1350 | ||
1351 | /* | |
1352 | * afs_PutDCache | |
1353 | * | |
1354 | * Description: | |
1355 | * Decrement the reference count on a disk cache entry. | |
1356 | * | |
1357 | * Parameters: | |
1358 | * ad : Ptr to the dcache entry to decrement. | |
1359 | * | |
1360 | * Environment: | |
1361 | * Nothing interesting. | |
1362 | */ | |
1363 | int | |
1364 | afs_PutDCache(struct dcache *adc) | |
1365 | { | |
1366 | AFS_STATCNT(afs_PutDCache); | |
1367 | ObtainWriteLock(&adc->tlock, 276); | |
1368 | if (adc->refCount <= 0) | |
1369 | osi_Panic("putdcache"); | |
1370 | --adc->refCount; | |
1371 | ReleaseWriteLock(&adc->tlock); | |
1372 | return 0; | |
1373 | } | |
1374 | ||
1375 | ||
1376 | /* | |
1377 | * afs_TryToSmush | |
1378 | * | |
1379 | * Description: | |
1380 | * Try to discard all data associated with this file from the | |
1381 | * cache. | |
1382 | * | |
1383 | * Parameters: | |
1384 | * avc : Pointer to the cache info for the file. | |
1385 | * | |
1386 | * Environment: | |
1387 | * Both pvnLock and lock are write held. | |
1388 | */ | |
1389 | void | |
1390 | afs_TryToSmush(struct vcache *avc, afs_ucred_t *acred, int sync) | |
1391 | { | |
1392 | struct dcache *tdc; | |
1393 | int index; | |
1394 | int i; | |
1395 | AFS_STATCNT(afs_TryToSmush); | |
1396 | afs_Trace2(afs_iclSetp, CM_TRACE_TRYTOSMUSH, ICL_TYPE_POINTER, avc, | |
1397 | ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(avc->f.m.Length)); | |
1398 | sync = 1; /* XX Temp testing XX */ | |
1399 | ||
1400 | #if defined(AFS_SUN5_ENV) | |
1401 | ObtainWriteLock(&avc->vlock, 573); | |
1402 | avc->activeV++; /* block new getpages */ | |
1403 | ReleaseWriteLock(&avc->vlock); | |
1404 | #endif | |
1405 | ||
1406 | /* Flush VM pages */ | |
1407 | osi_VM_TryToSmush(avc, acred, sync); | |
1408 | ||
1409 | /* | |
1410 | * Get the hash chain containing all dce's for this fid | |
1411 | */ | |
1412 | i = DVHash(&avc->f.fid); | |
1413 | ObtainWriteLock(&afs_xdcache, 277); | |
1414 | for (index = afs_dvhashTbl[i]; index != NULLIDX; index = i) { | |
1415 | i = afs_dvnextTbl[index]; /* next pointer this hash table */ | |
1416 | if (afs_indexUnique[index] == avc->f.fid.Fid.Unique) { | |
1417 | int releaseTlock = 1; | |
1418 | tdc = afs_GetValidDSlot(index); | |
1419 | if (!tdc) { | |
1420 | /* afs_TryToSmush is best-effort; we may not actually discard | |
1421 | * everything, so failure to discard dcaches due to an i/o | |
1422 | * error is okay. */ | |
1423 | break; | |
1424 | } | |
1425 | if (!FidCmp(&tdc->f.fid, &avc->f.fid)) { | |
1426 | if (sync) { | |
1427 | if ((afs_indexFlags[index] & IFDataMod) == 0 | |
1428 | && tdc->refCount == 1) { | |
1429 | ReleaseReadLock(&tdc->tlock); | |
1430 | releaseTlock = 0; | |
1431 | afs_FlushDCache(tdc); | |
1432 | } | |
1433 | } else | |
1434 | afs_indexTable[index] = 0; | |
1435 | } | |
1436 | if (releaseTlock) | |
1437 | ReleaseReadLock(&tdc->tlock); | |
1438 | afs_PutDCache(tdc); | |
1439 | } | |
1440 | } | |
1441 | #if defined(AFS_SUN5_ENV) | |
1442 | ObtainWriteLock(&avc->vlock, 545); | |
1443 | if (--avc->activeV == 0 && (avc->vstates & VRevokeWait)) { | |
1444 | avc->vstates &= ~VRevokeWait; | |
1445 | afs_osi_Wakeup((char *)&avc->vstates); | |
1446 | } | |
1447 | ReleaseWriteLock(&avc->vlock); | |
1448 | #endif | |
1449 | ReleaseWriteLock(&afs_xdcache); | |
1450 | /* | |
1451 | * It's treated like a callback so that when we do lookups we'll | |
1452 | * invalidate the unique bit if any | |
1453 | * trytoSmush occured during the lookup call | |
1454 | */ | |
1455 | afs_allCBs++; | |
1456 | } | |
1457 | ||
1458 | /* | |
1459 | * afs_DCacheMissingChunks | |
1460 | * | |
1461 | * Description | |
1462 | * Given the cached info for a file, return the number of chunks that | |
1463 | * are not available from the dcache. | |
1464 | * | |
1465 | * Parameters: | |
1466 | * avc: Pointer to the (held) vcache entry to look in. | |
1467 | * | |
1468 | * Returns: | |
1469 | * The number of chunks which are not currently cached. | |
1470 | * | |
1471 | * Environment: | |
1472 | * The vcache entry is held upon entry. | |
1473 | */ | |
1474 | ||
1475 | int | |
1476 | afs_DCacheMissingChunks(struct vcache *avc) | |
1477 | { | |
1478 | int i, index; | |
1479 | afs_size_t totalLength = 0; | |
1480 | afs_uint32 totalChunks = 0; | |
1481 | struct dcache *tdc; | |
1482 | ||
1483 | totalLength = avc->f.m.Length; | |
1484 | if (avc->f.truncPos < totalLength) | |
1485 | totalLength = avc->f.truncPos; | |
1486 | ||
1487 | /* Length is 0, no chunk missing. */ | |
1488 | if (totalLength == 0) | |
1489 | return 0; | |
1490 | ||
1491 | /* If totalLength is a multiple of chunksize, the last byte appears | |
1492 | * as being part of the next chunk, which does not exist. | |
1493 | * Decrementing totalLength by one fixes that. | |
1494 | */ | |
1495 | totalLength--; | |
1496 | totalChunks = (AFS_CHUNK(totalLength) + 1); | |
1497 | ||
1498 | /* If we're a directory, we only ever have one chunk, regardless of | |
1499 | * the size of the dir. | |
1500 | */ | |
1501 | if (avc->f.fid.Fid.Vnode & 1 || vType(avc) == VDIR) | |
1502 | totalChunks = 1; | |
1503 | ||
1504 | /* | |
1505 | printf("Should have %d chunks for %u bytes\n", | |
1506 | totalChunks, (totalLength + 1)); | |
1507 | */ | |
1508 | i = DVHash(&avc->f.fid); | |
1509 | ObtainWriteLock(&afs_xdcache, 1001); | |
1510 | for (index = afs_dvhashTbl[i]; index != NULLIDX; index = i) { | |
1511 | i = afs_dvnextTbl[index]; | |
1512 | if (afs_indexUnique[index] == avc->f.fid.Fid.Unique) { | |
1513 | tdc = afs_GetValidDSlot(index); | |
1514 | if (!tdc) { | |
1515 | break; | |
1516 | } | |
1517 | if (!FidCmp(&tdc->f.fid, &avc->f.fid)) { | |
1518 | totalChunks--; | |
1519 | } | |
1520 | ReleaseReadLock(&tdc->tlock); | |
1521 | afs_PutDCache(tdc); | |
1522 | } | |
1523 | } | |
1524 | ReleaseWriteLock(&afs_xdcache); | |
1525 | ||
1526 | /*printf("Missing %d chunks\n", totalChunks);*/ | |
1527 | ||
1528 | return (totalChunks); | |
1529 | } | |
1530 | ||
1531 | /* | |
1532 | * afs_FindDCache | |
1533 | * | |
1534 | * Description: | |
1535 | * Given the cached info for a file and a byte offset into the | |
1536 | * file, make sure the dcache entry for that file and containing | |
1537 | * the given byte is available, returning it to our caller. | |
1538 | * | |
1539 | * Parameters: | |
1540 | * avc : Pointer to the (held) vcache entry to look in. | |
1541 | * abyte : Which byte we want to get to. | |
1542 | * | |
1543 | * Returns: | |
1544 | * Pointer to the dcache entry covering the file & desired byte, | |
1545 | * or NULL if not found. | |
1546 | * | |
1547 | * Environment: | |
1548 | * The vcache entry is held upon entry. | |
1549 | */ | |
1550 | ||
1551 | struct dcache * | |
1552 | afs_FindDCache(struct vcache *avc, afs_size_t abyte) | |
1553 | { | |
1554 | afs_int32 chunk; | |
1555 | afs_int32 i, index; | |
1556 | struct dcache *tdc = NULL; | |
1557 | ||
1558 | AFS_STATCNT(afs_FindDCache); | |
1559 | chunk = AFS_CHUNK(abyte); | |
1560 | ||
1561 | /* | |
1562 | * Hash on the [fid, chunk] and get the corresponding dcache index | |
1563 | * after write-locking the dcache. | |
1564 | */ | |
1565 | i = DCHash(&avc->f.fid, chunk); | |
1566 | ObtainWriteLock(&afs_xdcache, 278); | |
1567 | for (index = afs_dchashTbl[i]; index != NULLIDX; index = afs_dcnextTbl[index]) { | |
1568 | if (afs_indexUnique[index] == avc->f.fid.Fid.Unique) { | |
1569 | tdc = afs_GetValidDSlot(index); | |
1570 | if (!tdc) { | |
1571 | /* afs_FindDCache is best-effort; we may not find the given | |
1572 | * file/offset, so if we cannot find the given dcache due to | |
1573 | * i/o errors, that is okay. */ | |
1574 | index = NULLIDX; | |
1575 | break; | |
1576 | } | |
1577 | ReleaseReadLock(&tdc->tlock); | |
1578 | if (!FidCmp(&tdc->f.fid, &avc->f.fid) && chunk == tdc->f.chunk) { | |
1579 | break; /* leaving refCount high for caller */ | |
1580 | } | |
1581 | afs_PutDCache(tdc); | |
1582 | } | |
1583 | } | |
1584 | if (index != NULLIDX) { | |
1585 | hset(afs_indexTimes[tdc->index], afs_indexCounter); | |
1586 | hadd32(afs_indexCounter, 1); | |
1587 | ReleaseWriteLock(&afs_xdcache); | |
1588 | return tdc; | |
1589 | } | |
1590 | ReleaseWriteLock(&afs_xdcache); | |
1591 | return NULL; | |
1592 | } /*afs_FindDCache */ | |
1593 | ||
1594 | /* only call these from afs_AllocDCache() */ | |
1595 | static struct dcache * | |
1596 | afs_AllocFreeDSlot(void) | |
1597 | { | |
1598 | struct dcache *tdc; | |
1599 | ||
1600 | tdc = afs_GetDSlotFromList(&afs_freeDCList); | |
1601 | if (!tdc) { | |
1602 | return NULL; | |
1603 | } | |
1604 | afs_indexFlags[tdc->index] &= ~IFFree; | |
1605 | ObtainWriteLock(&tdc->lock, 604); | |
1606 | afs_freeDCCount--; | |
1607 | ||
1608 | return tdc; | |
1609 | } | |
1610 | static struct dcache * | |
1611 | afs_AllocDiscardDSlot(afs_int32 lock) | |
1612 | { | |
1613 | struct dcache *tdc; | |
1614 | afs_uint32 size = 0; | |
1615 | struct osi_file *file; | |
1616 | ||
1617 | tdc = afs_GetDSlotFromList(&afs_discardDCList); | |
1618 | if (!tdc) { | |
1619 | return NULL; | |
1620 | } | |
1621 | afs_indexFlags[tdc->index] &= ~IFDiscarded; | |
1622 | ObtainWriteLock(&tdc->lock, 605); | |
1623 | afs_discardDCCount--; | |
1624 | size = | |
1625 | ((tdc->f.chunkBytes + | |
1626 | afs_fsfragsize) ^ afs_fsfragsize) >> 10; | |
1627 | tdc->f.states &= ~(DRO|DBackup|DRW); | |
1628 | afs_DCMoveBucket(tdc, size, 0); | |
1629 | afs_blocksDiscarded -= size; | |
1630 | afs_stats_cmperf.cacheBlocksDiscarded = afs_blocksDiscarded; | |
1631 | if ((lock & 2)) { | |
1632 | /* Truncate the chunk so zeroes get filled properly */ | |
1633 | file = afs_CFileOpen(&tdc->f.inode); | |
1634 | osi_Assert(file); | |
1635 | afs_CFileTruncate(file, 0); | |
1636 | afs_CFileClose(file); | |
1637 | afs_AdjustSize(tdc, 0); | |
1638 | } | |
1639 | ||
1640 | return tdc; | |
1641 | } | |
1642 | ||
1643 | /*! | |
1644 | * Get a fresh dcache from the free or discarded list. | |
1645 | * | |
1646 | * \param avc Who's dcache is this going to be? | |
1647 | * \param chunk The position where it will be placed in. | |
1648 | * \param lock How are locks held. | |
1649 | * \param ashFid If this dcache going to be used for a shadow dir, | |
1650 | * this is it's fid. | |
1651 | * | |
1652 | * \note Required locks: | |
1653 | * - afs_xdcache (W) | |
1654 | * - avc (R if (lock & 1) set and W otherwise) | |
1655 | * \note It write locks the new dcache. The caller must unlock it. | |
1656 | * | |
1657 | * \return The new dcache. | |
1658 | */ | |
1659 | struct dcache * | |
1660 | afs_AllocDCache(struct vcache *avc, afs_int32 chunk, afs_int32 lock, | |
1661 | struct VenusFid *ashFid) | |
1662 | { | |
1663 | struct dcache *tdc = NULL; | |
1664 | ||
1665 | /* if (lock & 2), prefer 'free' dcaches; otherwise, prefer 'discard' | |
1666 | * dcaches. In either case, try both if our first choice doesn't work. */ | |
1667 | if ((lock & 2)) { | |
1668 | tdc = afs_AllocFreeDSlot(); | |
1669 | if (!tdc) { | |
1670 | tdc = afs_AllocDiscardDSlot(lock); | |
1671 | } | |
1672 | } else { | |
1673 | tdc = afs_AllocDiscardDSlot(lock); | |
1674 | if (!tdc) { | |
1675 | tdc = afs_AllocFreeDSlot(); | |
1676 | } | |
1677 | } | |
1678 | if (!tdc) { | |
1679 | return NULL; | |
1680 | } | |
1681 | ||
1682 | /* | |
1683 | * Locks held: | |
1684 | * avc->lock(R) if setLocks | |
1685 | * avc->lock(W) if !setLocks | |
1686 | * tdc->lock(W) | |
1687 | * afs_xdcache(W) | |
1688 | */ | |
1689 | ||
1690 | /* | |
1691 | * Fill in the newly-allocated dcache record. | |
1692 | */ | |
1693 | afs_indexFlags[tdc->index] &= ~(IFDirtyPages | IFAnyPages); | |
1694 | if (ashFid) | |
1695 | /* Use shadow fid if provided. */ | |
1696 | tdc->f.fid = *ashFid; | |
1697 | else | |
1698 | /* Use normal vcache's fid otherwise. */ | |
1699 | tdc->f.fid = avc->f.fid; | |
1700 | if (avc->f.states & CRO) | |
1701 | tdc->f.states = DRO; | |
1702 | else if (avc->f.states & CBackup) | |
1703 | tdc->f.states = DBackup; | |
1704 | else | |
1705 | tdc->f.states = DRW; | |
1706 | afs_DCMoveBucket(tdc, 0, afs_DCGetBucket(avc)); | |
1707 | afs_indexUnique[tdc->index] = tdc->f.fid.Fid.Unique; | |
1708 | if (!ashFid) | |
1709 | hones(tdc->f.versionNo); /* invalid value */ | |
1710 | tdc->f.chunk = chunk; | |
1711 | tdc->validPos = AFS_CHUNKTOBASE(chunk); | |
1712 | /* XXX */ | |
1713 | if (tdc->lruq.prev == &tdc->lruq) | |
1714 | osi_Panic("lruq 1"); | |
1715 | ||
1716 | return tdc; | |
1717 | } | |
1718 | ||
1719 | /* | |
1720 | * afs_GetDCache | |
1721 | * | |
1722 | * Description: | |
1723 | * This function is called to obtain a reference to data stored in | |
1724 | * the disk cache, locating a chunk of data containing the desired | |
1725 | * byte and returning a reference to the disk cache entry, with its | |
1726 | * reference count incremented. | |
1727 | * | |
1728 | * Parameters: | |
1729 | * IN: | |
1730 | * avc : Ptr to a vcache entry (unlocked) | |
1731 | * abyte : Byte position in the file desired | |
1732 | * areq : Request structure identifying the requesting user. | |
1733 | * aflags : Settings as follows: | |
1734 | * 1 : Set locks | |
1735 | * 2 : Return after creating entry. | |
1736 | * 4 : called from afs_vnop_write.c | |
1737 | * *alen contains length of data to be written. | |
1738 | * OUT: | |
1739 | * aoffset : Set to the offset within the chunk where the resident | |
1740 | * byte is located. | |
1741 | * alen : Set to the number of bytes of data after the desired | |
1742 | * byte (including the byte itself) which can be read | |
1743 | * from this chunk. | |
1744 | * | |
1745 | * Environment: | |
1746 | * The vcache entry pointed to by avc is unlocked upon entry. | |
1747 | */ | |
1748 | ||
1749 | /* | |
1750 | * Update the vnode-to-dcache hint if we can get the vnode lock | |
1751 | * right away. Assumes dcache entry is at least read-locked. | |
1752 | */ | |
1753 | void | |
1754 | updateV2DC(int lockVc, struct vcache *v, struct dcache *d, int src) | |
1755 | { | |
1756 | if (!lockVc || 0 == NBObtainWriteLock(&v->lock, src)) { | |
1757 | if (hsame(v->f.m.DataVersion, d->f.versionNo) && v->callback) | |
1758 | v->dchint = d; | |
1759 | if (lockVc) | |
1760 | ReleaseWriteLock(&v->lock); | |
1761 | } | |
1762 | } | |
1763 | ||
1764 | /* avc - Write-locked unless aflags & 1 */ | |
1765 | struct dcache * | |
1766 | afs_GetDCache(struct vcache *avc, afs_size_t abyte, | |
1767 | struct vrequest *areq, afs_size_t * aoffset, | |
1768 | afs_size_t * alen, int aflags) | |
1769 | { | |
1770 | afs_int32 i, code, shortcut; | |
1771 | #if defined(AFS_AIX32_ENV) || defined(AFS_SGI_ENV) | |
1772 | afs_int32 adjustsize = 0; | |
1773 | #endif | |
1774 | int setLocks; | |
1775 | afs_int32 index; | |
1776 | afs_int32 us; | |
1777 | afs_int32 chunk; | |
1778 | afs_size_t Position = 0; | |
1779 | afs_int32 size, tlen; /* size of segment to transfer */ | |
1780 | struct afs_FetchOutput *tsmall = 0; | |
1781 | struct dcache *tdc; | |
1782 | struct osi_file *file; | |
1783 | struct afs_conn *tc; | |
1784 | int downDCount = 0; | |
1785 | struct server *newCallback = NULL; | |
1786 | char setNewCallback; | |
1787 | char setVcacheStatus; | |
1788 | char doVcacheUpdate; | |
1789 | char slowPass = 0; | |
1790 | int doAdjustSize = 0; | |
1791 | int doReallyAdjustSize = 0; | |
1792 | int overWriteWholeChunk = 0; | |
1793 | struct rx_connection *rxconn; | |
1794 | ||
1795 | #ifndef AFS_NOSTATS | |
1796 | struct afs_stats_AccessInfo *accP; /*Ptr to access record in stats */ | |
1797 | int fromReplica; /*Are we reading from a replica? */ | |
1798 | int numFetchLoops; /*# times around the fetch/analyze loop */ | |
1799 | #endif /* AFS_NOSTATS */ | |
1800 | ||
1801 | AFS_STATCNT(afs_GetDCache); | |
1802 | if (dcacheDisabled) | |
1803 | return NULL; | |
1804 | ||
1805 | setLocks = aflags & 1; | |
1806 | ||
1807 | /* | |
1808 | * Determine the chunk number and offset within the chunk corresponding | |
1809 | * to the desired byte. | |
1810 | */ | |
1811 | if (avc->f.fid.Fid.Vnode & 1) { /* if (vType(avc) == VDIR) */ | |
1812 | chunk = 0; | |
1813 | } else { | |
1814 | chunk = AFS_CHUNK(abyte); | |
1815 | } | |
1816 | ||
1817 | /* come back to here if we waited for the cache to drain. */ | |
1818 | RetryGetDCache: | |
1819 | ||
1820 | setNewCallback = setVcacheStatus = 0; | |
1821 | ||
1822 | if (setLocks) { | |
1823 | if (slowPass) | |
1824 | ObtainWriteLock(&avc->lock, 616); | |
1825 | else | |
1826 | ObtainReadLock(&avc->lock); | |
1827 | } | |
1828 | ||
1829 | /* | |
1830 | * Locks held: | |
1831 | * avc->lock(R) if setLocks && !slowPass | |
1832 | * avc->lock(W) if !setLocks || slowPass | |
1833 | */ | |
1834 | ||
1835 | shortcut = 0; | |
1836 | ||
1837 | /* check hints first! (might could use bcmp or some such...) */ | |
1838 | if ((tdc = avc->dchint)) { | |
1839 | int dcLocked; | |
1840 | ||
1841 | /* | |
1842 | * The locking order between afs_xdcache and dcache lock matters. | |
1843 | * The hint dcache entry could be anywhere, even on the free list. | |
1844 | * Locking afs_xdcache ensures that noone is trying to pull dcache | |
1845 | * entries from the free list, and thereby assuming them to be not | |
1846 | * referenced and not locked. | |
1847 | */ | |
1848 | ObtainReadLock(&afs_xdcache); | |
1849 | dcLocked = (0 == NBObtainSharedLock(&tdc->lock, 601)); | |
1850 | ||
1851 | if (dcLocked && (tdc->index != NULLIDX) | |
1852 | && !FidCmp(&tdc->f.fid, &avc->f.fid) && chunk == tdc->f.chunk | |
1853 | && !(afs_indexFlags[tdc->index] & (IFFree | IFDiscarded))) { | |
1854 | /* got the right one. It might not be the right version, and it | |
1855 | * might be fetching, but it's the right dcache entry. | |
1856 | */ | |
1857 | /* All this code should be integrated better with what follows: | |
1858 | * I can save a good bit more time under a write lock if I do.. | |
1859 | */ | |
1860 | ObtainWriteLock(&tdc->tlock, 603); | |
1861 | tdc->refCount++; | |
1862 | ReleaseWriteLock(&tdc->tlock); | |
1863 | ||
1864 | ReleaseReadLock(&afs_xdcache); | |
1865 | shortcut = 1; | |
1866 | ||
1867 | if (hsame(tdc->f.versionNo, avc->f.m.DataVersion) | |
1868 | && !(tdc->dflags & DFFetching)) { | |
1869 | ||
1870 | afs_stats_cmperf.dcacheHits++; | |
1871 | ObtainWriteLock(&afs_xdcache, 559); | |
1872 | QRemove(&tdc->lruq); | |
1873 | QAdd(&afs_DLRU, &tdc->lruq); | |
1874 | ReleaseWriteLock(&afs_xdcache); | |
1875 | ||
1876 | /* Locks held: | |
1877 | * avc->lock(R) if setLocks && !slowPass | |
1878 | * avc->lock(W) if !setLocks || slowPass | |
1879 | * tdc->lock(S) | |
1880 | */ | |
1881 | goto done; | |
1882 | } | |
1883 | } else { | |
1884 | if (dcLocked) | |
1885 | ReleaseSharedLock(&tdc->lock); | |
1886 | ReleaseReadLock(&afs_xdcache); | |
1887 | } | |
1888 | ||
1889 | if (!shortcut) | |
1890 | tdc = 0; | |
1891 | } | |
1892 | ||
1893 | /* Locks held: | |
1894 | * avc->lock(R) if setLocks && !slowPass | |
1895 | * avc->lock(W) if !setLocks || slowPass | |
1896 | * tdc->lock(S) if tdc | |
1897 | */ | |
1898 | ||
1899 | if (!tdc) { /* If the hint wasn't the right dcache entry */ | |
1900 | int dslot_error = 0; | |
1901 | /* | |
1902 | * Hash on the [fid, chunk] and get the corresponding dcache index | |
1903 | * after write-locking the dcache. | |
1904 | */ | |
1905 | RetryLookup: | |
1906 | ||
1907 | /* Locks held: | |
1908 | * avc->lock(R) if setLocks && !slowPass | |
1909 | * avc->lock(W) if !setLocks || slowPass | |
1910 | */ | |
1911 | ||
1912 | i = DCHash(&avc->f.fid, chunk); | |
1913 | /* check to make sure our space is fine */ | |
1914 | afs_MaybeWakeupTruncateDaemon(); | |
1915 | ||
1916 | ObtainWriteLock(&afs_xdcache, 280); | |
1917 | us = NULLIDX; | |
1918 | for (index = afs_dchashTbl[i]; index != NULLIDX; us = index, index = afs_dcnextTbl[index]) { | |
1919 | if (afs_indexUnique[index] == avc->f.fid.Fid.Unique) { | |
1920 | tdc = afs_GetValidDSlot(index); | |
1921 | if (!tdc) { | |
1922 | /* we got an i/o error when trying to get the given dslot. | |
1923 | * it's possible the dslot we're looking for is elsewhere, | |
1924 | * but most likely the disk cache is currently unusable, so | |
1925 | * all afs_GetValidDSlot calls will fail, so just bail out. */ | |
1926 | dslot_error = 1; | |
1927 | index = NULLIDX; | |
1928 | break; | |
1929 | } | |
1930 | ReleaseReadLock(&tdc->tlock); | |
1931 | /* | |
1932 | * Locks held: | |
1933 | * avc->lock(R) if setLocks && !slowPass | |
1934 | * avc->lock(W) if !setLocks || slowPass | |
1935 | * afs_xdcache(W) | |
1936 | */ | |
1937 | if (!FidCmp(&tdc->f.fid, &avc->f.fid) && chunk == tdc->f.chunk) { | |
1938 | /* Move it up in the beginning of the list */ | |
1939 | if (afs_dchashTbl[i] != index) { | |
1940 | afs_dcnextTbl[us] = afs_dcnextTbl[index]; | |
1941 | afs_dcnextTbl[index] = afs_dchashTbl[i]; | |
1942 | afs_dchashTbl[i] = index; | |
1943 | } | |
1944 | ReleaseWriteLock(&afs_xdcache); | |
1945 | ObtainSharedLock(&tdc->lock, 606); | |
1946 | break; /* leaving refCount high for caller */ | |
1947 | } | |
1948 | afs_PutDCache(tdc); | |
1949 | tdc = 0; | |
1950 | } | |
1951 | } | |
1952 | ||
1953 | /* | |
1954 | * If we didn't find the entry, we'll create one. | |
1955 | */ | |
1956 | if (index == NULLIDX) { | |
1957 | /* | |
1958 | * Locks held: | |
1959 | * avc->lock(R) if setLocks | |
1960 | * avc->lock(W) if !setLocks | |
1961 | * afs_xdcache(W) | |
1962 | */ | |
1963 | afs_Trace2(afs_iclSetp, CM_TRACE_GETDCACHE1, ICL_TYPE_POINTER, | |
1964 | avc, ICL_TYPE_INT32, chunk); | |
1965 | ||
1966 | if (dslot_error) { | |
1967 | /* We couldn't find the dcache we want, but we hit some i/o | |
1968 | * errors when trying to find it, so we're not sure if the | |
1969 | * dcache we want is in the cache or not. Error out, so we | |
1970 | * don't try to possibly create 2 separate dcaches for the | |
1971 | * same exact data. */ | |
1972 | ReleaseWriteLock(&afs_xdcache); | |
1973 | goto done; | |
1974 | } | |
1975 | ||
1976 | if (afs_discardDCList == NULLIDX && afs_freeDCList == NULLIDX) { | |
1977 | if (!setLocks) | |
1978 | avc->f.states |= CDCLock; | |
1979 | /* just need slots */ | |
1980 | afs_GetDownD(5, (int *)0, afs_DCGetBucket(avc)); | |
1981 | if (!setLocks) | |
1982 | avc->f.states &= ~CDCLock; | |
1983 | } | |
1984 | tdc = afs_AllocDCache(avc, chunk, aflags, NULL); | |
1985 | if (!tdc) { | |
1986 | ReleaseWriteLock(&afs_xdcache); | |
1987 | if (afs_discardDCList == NULLIDX && afs_freeDCList == NULLIDX) { | |
1988 | /* It looks like afs_AllocDCache failed because we don't | |
1989 | * have any free dslots to use. Maybe if we wait a little | |
1990 | * while, we'll be able to free up some slots, so try for 5 | |
1991 | * minutes, then bail out. */ | |
1992 | if (++downDCount > 300) { | |
1993 | afs_warn("afs: Unable to get free cache space for file " | |
1994 | "%u:%u.%u.%u for 5 minutes; failing with an i/o error\n", | |
1995 | avc->f.fid.Cell, | |
1996 | avc->f.fid.Fid.Volume, | |
1997 | avc->f.fid.Fid.Vnode, | |
1998 | avc->f.fid.Fid.Unique); | |
1999 | goto done; | |
2000 | } | |
2001 | afs_osi_Wait(1000, 0, 0); | |
2002 | goto RetryLookup; | |
2003 | } | |
2004 | ||
2005 | /* afs_AllocDCache failed, but not because we're out of free | |
2006 | * dslots. Something must be screwy with the cache, so bail out | |
2007 | * immediately without waiting. */ | |
2008 | afs_warn("afs: Error while alloc'ing cache slot for file " | |
2009 | "%u:%u.%u.%u; failing with an i/o error\n", | |
2010 | avc->f.fid.Cell, | |
2011 | avc->f.fid.Fid.Volume, | |
2012 | avc->f.fid.Fid.Vnode, | |
2013 | avc->f.fid.Fid.Unique); | |
2014 | goto done; | |
2015 | } | |
2016 | ||
2017 | /* | |
2018 | * Locks held: | |
2019 | * avc->lock(R) if setLocks | |
2020 | * avc->lock(W) if !setLocks | |
2021 | * tdc->lock(W) | |
2022 | * afs_xdcache(W) | |
2023 | */ | |
2024 | ||
2025 | /* | |
2026 | * Now add to the two hash chains - note that i is still set | |
2027 | * from the above DCHash call. | |
2028 | */ | |
2029 | afs_dcnextTbl[tdc->index] = afs_dchashTbl[i]; | |
2030 | afs_dchashTbl[i] = tdc->index; | |
2031 | i = DVHash(&avc->f.fid); | |
2032 | afs_dvnextTbl[tdc->index] = afs_dvhashTbl[i]; | |
2033 | afs_dvhashTbl[i] = tdc->index; | |
2034 | tdc->dflags = DFEntryMod; | |
2035 | tdc->mflags = 0; | |
2036 | afs_MaybeWakeupTruncateDaemon(); | |
2037 | ReleaseWriteLock(&afs_xdcache); | |
2038 | ConvertWToSLock(&tdc->lock); | |
2039 | } | |
2040 | } | |
2041 | ||
2042 | ||
2043 | /* vcache->dcache hint failed */ | |
2044 | /* | |
2045 | * Locks held: | |
2046 | * avc->lock(R) if setLocks && !slowPass | |
2047 | * avc->lock(W) if !setLocks || slowPass | |
2048 | * tdc->lock(S) | |
2049 | */ | |
2050 | afs_Trace4(afs_iclSetp, CM_TRACE_GETDCACHE2, ICL_TYPE_POINTER, avc, | |
2051 | ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, | |
2052 | hgetlo(tdc->f.versionNo), ICL_TYPE_INT32, | |
2053 | hgetlo(avc->f.m.DataVersion)); | |
2054 | /* | |
2055 | * Here we have the entry in tdc, with its refCount incremented. | |
2056 | * Note: we don't use the S-lock on avc; it costs concurrency when | |
2057 | * storing a file back to the server. | |
2058 | */ | |
2059 | ||
2060 | /* | |
2061 | * Not a newly created file so we need to check the file's length and | |
2062 | * compare data versions since someone could have changed the data or we're | |
2063 | * reading a file written elsewhere. We only want to bypass doing no-op | |
2064 | * read rpcs on newly created files (dv of 0) since only then we guarantee | |
2065 | * that this chunk's data hasn't been filled by another client. | |
2066 | */ | |
2067 | size = AFS_CHUNKSIZE(abyte); | |
2068 | if (aflags & 4) /* called from write */ | |
2069 | tlen = *alen; | |
2070 | else /* called from read */ | |
2071 | tlen = tdc->validPos - abyte; | |
2072 | Position = AFS_CHUNKTOBASE(chunk); | |
2073 | afs_Trace4(afs_iclSetp, CM_TRACE_GETDCACHE3, ICL_TYPE_INT32, tlen, | |
2074 | ICL_TYPE_INT32, aflags, ICL_TYPE_OFFSET, | |
2075 | ICL_HANDLE_OFFSET(abyte), ICL_TYPE_OFFSET, | |
2076 | ICL_HANDLE_OFFSET(Position)); | |
2077 | if ((aflags & 4) && (hiszero(avc->f.m.DataVersion))) | |
2078 | doAdjustSize = 1; | |
2079 | if ((AFS_CHUNKTOBASE(chunk) >= avc->f.m.Length) || | |
2080 | ((aflags & 4) && (abyte == Position) && (tlen >= size))) | |
2081 | overWriteWholeChunk = 1; | |
2082 | if (doAdjustSize || overWriteWholeChunk) { | |
2083 | #if defined(AFS_AIX32_ENV) || defined(AFS_SGI_ENV) | |
2084 | #ifdef AFS_SGI_ENV | |
2085 | #ifdef AFS_SGI64_ENV | |
2086 | if (doAdjustSize) | |
2087 | adjustsize = NBPP; | |
2088 | #else /* AFS_SGI64_ENV */ | |
2089 | if (doAdjustSize) | |
2090 | adjustsize = 8192; | |
2091 | #endif /* AFS_SGI64_ENV */ | |
2092 | #else /* AFS_SGI_ENV */ | |
2093 | if (doAdjustSize) | |
2094 | adjustsize = 4096; | |
2095 | #endif /* AFS_SGI_ENV */ | |
2096 | if (AFS_CHUNKTOBASE(chunk) + adjustsize >= avc->f.m.Length && | |
2097 | #else /* defined(AFS_AIX32_ENV) || defined(AFS_SGI_ENV) */ | |
2098 | #if defined(AFS_SUN5_ENV) | |
2099 | if ((doAdjustSize || (AFS_CHUNKTOBASE(chunk) >= avc->f.m.Length)) && | |
2100 | #else | |
2101 | if (AFS_CHUNKTOBASE(chunk) >= avc->f.m.Length && | |
2102 | #endif | |
2103 | #endif /* defined(AFS_AIX32_ENV) || defined(AFS_SGI_ENV) */ | |
2104 | !hsame(avc->f.m.DataVersion, tdc->f.versionNo)) | |
2105 | doReallyAdjustSize = 1; | |
2106 | ||
2107 | if (doReallyAdjustSize || overWriteWholeChunk) { | |
2108 | /* no data in file to read at this position */ | |
2109 | UpgradeSToWLock(&tdc->lock, 607); | |
2110 | file = afs_CFileOpen(&tdc->f.inode); | |
2111 | osi_Assert(file); | |
2112 | afs_CFileTruncate(file, 0); | |
2113 | afs_CFileClose(file); | |
2114 | afs_AdjustSize(tdc, 0); | |
2115 | hset(tdc->f.versionNo, avc->f.m.DataVersion); | |
2116 | tdc->dflags |= DFEntryMod; | |
2117 | ||
2118 | ConvertWToSLock(&tdc->lock); | |
2119 | } | |
2120 | } | |
2121 | ||
2122 | /* | |
2123 | * We must read in the whole chunk if the version number doesn't | |
2124 | * match. | |
2125 | */ | |
2126 | if (aflags & 2) { | |
2127 | /* don't need data, just a unique dcache entry */ | |
2128 | ObtainWriteLock(&afs_xdcache, 608); | |
2129 | hset(afs_indexTimes[tdc->index], afs_indexCounter); | |
2130 | hadd32(afs_indexCounter, 1); | |
2131 | ReleaseWriteLock(&afs_xdcache); | |
2132 | ||
2133 | updateV2DC(setLocks, avc, tdc, 553); | |
2134 | if (vType(avc) == VDIR) | |
2135 | *aoffset = abyte; | |
2136 | else | |
2137 | *aoffset = AFS_CHUNKOFFSET(abyte); | |
2138 | if (tdc->validPos < abyte) | |
2139 | *alen = (afs_size_t) 0; | |
2140 | else | |
2141 | *alen = tdc->validPos - abyte; | |
2142 | ReleaseSharedLock(&tdc->lock); | |
2143 | if (setLocks) { | |
2144 | if (slowPass) | |
2145 | ReleaseWriteLock(&avc->lock); | |
2146 | else | |
2147 | ReleaseReadLock(&avc->lock); | |
2148 | } | |
2149 | return tdc; /* check if we're done */ | |
2150 | } | |
2151 | ||
2152 | /* | |
2153 | * Locks held: | |
2154 | * avc->lock(R) if setLocks && !slowPass | |
2155 | * avc->lock(W) if !setLocks || slowPass | |
2156 | * tdc->lock(S) | |
2157 | */ | |
2158 | osi_Assert((setLocks && !slowPass) || WriteLocked(&avc->lock)); | |
2159 | ||
2160 | setNewCallback = setVcacheStatus = 0; | |
2161 | ||
2162 | /* | |
2163 | * Locks held: | |
2164 | * avc->lock(R) if setLocks && !slowPass | |
2165 | * avc->lock(W) if !setLocks || slowPass | |
2166 | * tdc->lock(S) | |
2167 | */ | |
2168 | if (!hsame(avc->f.m.DataVersion, tdc->f.versionNo) && !overWriteWholeChunk) { | |
2169 | /* | |
2170 | * Version number mismatch. | |
2171 | */ | |
2172 | /* | |
2173 | * If we are disconnected, then we can't do much of anything | |
2174 | * because the data doesn't match the file. | |
2175 | */ | |
2176 | if (AFS_IS_DISCONNECTED) { | |
2177 | ReleaseSharedLock(&tdc->lock); | |
2178 | if (setLocks) { | |
2179 | if (slowPass) | |
2180 | ReleaseWriteLock(&avc->lock); | |
2181 | else | |
2182 | ReleaseReadLock(&avc->lock); | |
2183 | } | |
2184 | /* Flush the Dcache */ | |
2185 | afs_PutDCache(tdc); | |
2186 | ||
2187 | return NULL; | |
2188 | } | |
2189 | UpgradeSToWLock(&tdc->lock, 609); | |
2190 | ||
2191 | /* | |
2192 | * If data ever existed for this vnode, and this is a text object, | |
2193 | * do some clearing. Now, you'd think you need only do the flush | |
2194 | * when VTEXT is on, but VTEXT is turned off when the text object | |
2195 | * is freed, while pages are left lying around in memory marked | |
2196 | * with this vnode. If we would reactivate (create a new text | |
2197 | * object from) this vnode, we could easily stumble upon some of | |
2198 | * these old pages in pagein. So, we always flush these guys. | |
2199 | * Sun has a wonderful lack of useful invariants in this system. | |
2200 | * | |
2201 | * avc->flushDV is the data version # of the file at the last text | |
2202 | * flush. Clearly, at least, we don't have to flush the file more | |
2203 | * often than it changes | |
2204 | */ | |
2205 | if (hcmp(avc->flushDV, avc->f.m.DataVersion) < 0) { | |
2206 | /* | |
2207 | * By here, the cache entry is always write-locked. We can | |
2208 | * deadlock if we call osi_Flush with the cache entry locked... | |
2209 | * Unlock the dcache too. | |
2210 | */ | |
2211 | ReleaseWriteLock(&tdc->lock); | |
2212 | if (setLocks && !slowPass) | |
2213 | ReleaseReadLock(&avc->lock); | |
2214 | else | |
2215 | ReleaseWriteLock(&avc->lock); | |
2216 | ||
2217 | osi_FlushText(avc); | |
2218 | /* | |
2219 | * Call osi_FlushPages in open, read/write, and map, since it | |
2220 | * is too hard here to figure out if we should lock the | |
2221 | * pvnLock. | |
2222 | */ | |
2223 | if (setLocks && !slowPass) | |
2224 | ObtainReadLock(&avc->lock); | |
2225 | else | |
2226 | ObtainWriteLock(&avc->lock, 66); | |
2227 | ObtainWriteLock(&tdc->lock, 610); | |
2228 | } | |
2229 | ||
2230 | /* | |
2231 | * Locks held: | |
2232 | * avc->lock(R) if setLocks && !slowPass | |
2233 | * avc->lock(W) if !setLocks || slowPass | |
2234 | * tdc->lock(W) | |
2235 | */ | |
2236 | ||
2237 | /* Watch for standard race condition around osi_FlushText */ | |
2238 | if (hsame(avc->f.m.DataVersion, tdc->f.versionNo)) { | |
2239 | updateV2DC(setLocks, avc, tdc, 569); /* set hint */ | |
2240 | afs_stats_cmperf.dcacheHits++; | |
2241 | ConvertWToSLock(&tdc->lock); | |
2242 | goto done; | |
2243 | } | |
2244 | ||
2245 | /* Sleep here when cache needs to be drained. */ | |
2246 | if (setLocks && !slowPass | |
2247 | && (afs_blocksUsed > | |
2248 | PERCENT(CM_WAITFORDRAINPCT, afs_cacheBlocks))) { | |
2249 | /* Make sure truncate daemon is running */ | |
2250 | afs_MaybeWakeupTruncateDaemon(); | |
2251 | ObtainWriteLock(&tdc->tlock, 614); | |
2252 | tdc->refCount--; /* we'll re-obtain the dcache when we re-try. */ | |
2253 | ReleaseWriteLock(&tdc->tlock); | |
2254 | ReleaseWriteLock(&tdc->lock); | |
2255 | ReleaseReadLock(&avc->lock); | |
2256 | while ((afs_blocksUsed - afs_blocksDiscarded) > | |
2257 | PERCENT(CM_WAITFORDRAINPCT, afs_cacheBlocks)) { | |
2258 | afs_WaitForCacheDrain = 1; | |
2259 | afs_osi_Sleep(&afs_WaitForCacheDrain); | |
2260 | } | |
2261 | afs_MaybeFreeDiscardedDCache(); | |
2262 | /* need to check if someone else got the chunk first. */ | |
2263 | goto RetryGetDCache; | |
2264 | } | |
2265 | ||
2266 | Position = AFS_CHUNKBASE(abyte); | |
2267 | if (vType(avc) == VDIR) { | |
2268 | size = avc->f.m.Length; | |
2269 | if (size > tdc->f.chunkBytes) { | |
2270 | /* pre-reserve space for file */ | |
2271 | afs_AdjustSize(tdc, size); | |
2272 | } | |
2273 | size = 999999999; /* max size for transfer */ | |
2274 | } else { | |
2275 | afs_size_t maxGoodLength; | |
2276 | ||
2277 | /* estimate how much data we're expecting back from the server, | |
2278 | * and reserve space in the dcache entry for it */ | |
2279 | ||
2280 | maxGoodLength = avc->f.m.Length; | |
2281 | if (avc->f.truncPos < maxGoodLength) | |
2282 | maxGoodLength = avc->f.truncPos; | |
2283 | ||
2284 | size = AFS_CHUNKSIZE(abyte); /* expected max size */ | |
2285 | if (Position > maxGoodLength) { /* If we're beyond EOF */ | |
2286 | size = 0; | |
2287 | } else if (Position + size > maxGoodLength) { | |
2288 | size = maxGoodLength - Position; | |
2289 | } | |
2290 | osi_Assert(size >= 0); | |
2291 | ||
2292 | if (size > tdc->f.chunkBytes) { | |
2293 | /* pre-reserve estimated space for file */ | |
2294 | afs_AdjustSize(tdc, size); /* changes chunkBytes */ | |
2295 | } | |
2296 | ||
2297 | if (size) { | |
2298 | /* For the actual fetch, do not limit the request to the | |
2299 | * length of the file. If this results in a read past EOF on | |
2300 | * the server, the server will just reply with less data than | |
2301 | * requested. If we limit ourselves to only requesting data up | |
2302 | * to the avc file length, we open ourselves up to races if the | |
2303 | * file is extended on the server at about the same time. | |
2304 | * | |
2305 | * However, we must restrict ourselves to the avc->f.truncPos | |
2306 | * length, since this represents an outstanding local | |
2307 | * truncation of the file that will be committed to the | |
2308 | * fileserver when we actually write the fileserver contents. | |
2309 | * If we do not restrict the fetch length based on | |
2310 | * avc->f.truncPos, a different truncate operation extending | |
2311 | * the file length could cause the old data after | |
2312 | * avc->f.truncPos to reappear, instead of extending the file | |
2313 | * with NUL bytes. */ | |
2314 | size = AFS_CHUNKSIZE(abyte); | |
2315 | if (Position > avc->f.truncPos) { | |
2316 | size = 0; | |
2317 | } else if (Position + size > avc->f.truncPos) { | |
2318 | size = avc->f.truncPos - Position; | |
2319 | } | |
2320 | osi_Assert(size >= 0); | |
2321 | } | |
2322 | } | |
2323 | if (afs_mariner && !tdc->f.chunk) | |
2324 | afs_MarinerLog("fetch$Fetching", avc); /* , Position, size, afs_indexCounter ); */ | |
2325 | /* | |
2326 | * Right now, we only have one tool, and it's a hammer. So, we | |
2327 | * fetch the whole file. | |
2328 | */ | |
2329 | DZap(tdc); /* pages in cache may be old */ | |
2330 | file = afs_CFileOpen(&tdc->f.inode); | |
2331 | if (!file) { | |
2332 | /* We can't access the file in the disk cache backing this dcache; | |
2333 | * bail out. */ | |
2334 | ReleaseWriteLock(&tdc->lock); | |
2335 | afs_PutDCache(tdc); | |
2336 | tdc = NULL; | |
2337 | goto done; | |
2338 | } | |
2339 | afs_RemoveVCB(&avc->f.fid); | |
2340 | tdc->f.states |= DWriting; | |
2341 | tdc->dflags |= DFFetching; | |
2342 | tdc->validPos = Position; /* which is AFS_CHUNKBASE(abyte) */ | |
2343 | if (tdc->mflags & DFFetchReq) { | |
2344 | tdc->mflags &= ~DFFetchReq; | |
2345 | if (afs_osi_Wakeup(&tdc->validPos) == 0) | |
2346 | afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAKE, ICL_TYPE_STRING, | |
2347 | __FILE__, ICL_TYPE_INT32, __LINE__, | |
2348 | ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, | |
2349 | tdc->dflags); | |
2350 | } | |
2351 | tsmall = osi_AllocLargeSpace(sizeof(struct afs_FetchOutput)); | |
2352 | setVcacheStatus = 0; | |
2353 | #ifndef AFS_NOSTATS | |
2354 | /* | |
2355 | * Remember if we are doing the reading from a replicated volume, | |
2356 | * and how many times we've zipped around the fetch/analyze loop. | |
2357 | */ | |
2358 | fromReplica = (avc->f.states & CRO) ? 1 : 0; | |
2359 | numFetchLoops = 0; | |
2360 | accP = &(afs_stats_cmfullperf.accessinf); | |
2361 | if (fromReplica) | |
2362 | (accP->replicatedRefs)++; | |
2363 | else | |
2364 | (accP->unreplicatedRefs)++; | |
2365 | #endif /* AFS_NOSTATS */ | |
2366 | /* this is a cache miss */ | |
2367 | afs_Trace4(afs_iclSetp, CM_TRACE_FETCHPROC, ICL_TYPE_POINTER, avc, | |
2368 | ICL_TYPE_FID, &(avc->f.fid), ICL_TYPE_OFFSET, | |
2369 | ICL_HANDLE_OFFSET(Position), ICL_TYPE_INT32, size); | |
2370 | ||
2371 | if (size) | |
2372 | afs_stats_cmperf.dcacheMisses++; | |
2373 | code = 0; | |
2374 | /* | |
2375 | * Dynamic root support: fetch data from local memory. | |
2376 | */ | |
2377 | if (afs_IsDynroot(avc)) { | |
2378 | char *dynrootDir; | |
2379 | int dynrootLen; | |
2380 | ||
2381 | afs_GetDynroot(&dynrootDir, &dynrootLen, &tsmall->OutStatus); | |
2382 | ||
2383 | dynrootDir += Position; | |
2384 | dynrootLen -= Position; | |
2385 | if (size > dynrootLen) | |
2386 | size = dynrootLen; | |
2387 | if (size < 0) | |
2388 | size = 0; | |
2389 | code = afs_CFileWrite(file, 0, dynrootDir, size); | |
2390 | afs_PutDynroot(); | |
2391 | ||
2392 | if (code == size) | |
2393 | code = 0; | |
2394 | else | |
2395 | code = -1; | |
2396 | ||
2397 | tdc->validPos = Position + size; | |
2398 | afs_CFileTruncate(file, size); /* prune it */ | |
2399 | } else if (afs_IsDynrootMount(avc)) { | |
2400 | char *dynrootDir; | |
2401 | int dynrootLen; | |
2402 | ||
2403 | afs_GetDynrootMount(&dynrootDir, &dynrootLen, &tsmall->OutStatus); | |
2404 | ||
2405 | dynrootDir += Position; | |
2406 | dynrootLen -= Position; | |
2407 | if (size > dynrootLen) | |
2408 | size = dynrootLen; | |
2409 | if (size < 0) | |
2410 | size = 0; | |
2411 | code = afs_CFileWrite(file, 0, dynrootDir, size); | |
2412 | afs_PutDynroot(); | |
2413 | ||
2414 | if (code == size) | |
2415 | code = 0; | |
2416 | else | |
2417 | code = -1; | |
2418 | ||
2419 | tdc->validPos = Position + size; | |
2420 | afs_CFileTruncate(file, size); /* prune it */ | |
2421 | } else | |
2422 | /* | |
2423 | * Not a dynamic vnode: do the real fetch. | |
2424 | */ | |
2425 | do { | |
2426 | /* | |
2427 | * Locks held: | |
2428 | * avc->lock(R) if setLocks && !slowPass | |
2429 | * avc->lock(W) if !setLocks || slowPass | |
2430 | * tdc->lock(W) | |
2431 | */ | |
2432 | ||
2433 | tc = afs_Conn(&avc->f.fid, areq, SHARED_LOCK, &rxconn); | |
2434 | if (tc) { | |
2435 | #ifndef AFS_NOSTATS | |
2436 | numFetchLoops++; | |
2437 | if (fromReplica) | |
2438 | (accP->numReplicasAccessed)++; | |
2439 | ||
2440 | #endif /* AFS_NOSTATS */ | |
2441 | if (!setLocks || slowPass) { | |
2442 | avc->callback = tc->parent->srvr->server; | |
2443 | } else { | |
2444 | newCallback = tc->parent->srvr->server; | |
2445 | setNewCallback = 1; | |
2446 | } | |
2447 | i = osi_Time(); | |
2448 | code = afs_CacheFetchProc(tc, rxconn, file, Position, tdc, | |
2449 | avc, size, tsmall); | |
2450 | } else | |
2451 | code = -1; | |
2452 | ||
2453 | if (code == 0) { | |
2454 | /* callback could have been broken (or expired) in a race here, | |
2455 | * but we return the data anyway. It's as good as we knew about | |
2456 | * when we started. */ | |
2457 | /* | |
2458 | * validPos is updated by CacheFetchProc, and can only be | |
2459 | * modifed under a dcache write lock, which we've blocked out | |
2460 | */ | |
2461 | size = tdc->validPos - Position; /* actual segment size */ | |
2462 | if (size < 0) | |
2463 | size = 0; | |
2464 | afs_CFileTruncate(file, size); /* prune it */ | |
2465 | } else { | |
2466 | if (!setLocks || slowPass) { | |
2467 | afs_StaleVCacheFlags(avc, AFS_STALEVC_CLEARCB, CUnique); | |
2468 | } else { | |
2469 | /* Something lost. Forget about performance, and go | |
2470 | * back with a vcache write lock. | |
2471 | */ | |
2472 | afs_CFileTruncate(file, 0); | |
2473 | afs_AdjustSize(tdc, 0); | |
2474 | afs_CFileClose(file); | |
2475 | osi_FreeLargeSpace(tsmall); | |
2476 | tsmall = 0; | |
2477 | ReleaseWriteLock(&tdc->lock); | |
2478 | afs_PutDCache(tdc); | |
2479 | tdc = 0; | |
2480 | ||
2481 | /* | |
2482 | * Call afs_Analyze to manage the connection references | |
2483 | * and handle the error code (possibly mark servers | |
2484 | * down, etc). We are going to retry getting the | |
2485 | * dcache regardless, so we just ignore the retry hint | |
2486 | * returned by afs_Analyze on this call. | |
2487 | */ | |
2488 | (void)afs_Analyze(tc, rxconn, code, &avc->f.fid, areq, | |
2489 | AFS_STATS_FS_RPCIDX_FETCHDATA, SHARED_LOCK, NULL); | |
2490 | ||
2491 | ReleaseReadLock(&avc->lock); | |
2492 | ||
2493 | slowPass = 1; | |
2494 | goto RetryGetDCache; | |
2495 | } | |
2496 | } | |
2497 | ||
2498 | } while (afs_Analyze | |
2499 | (tc, rxconn, code, &avc->f.fid, areq, | |
2500 | AFS_STATS_FS_RPCIDX_FETCHDATA, SHARED_LOCK, NULL)); | |
2501 | ||
2502 | /* | |
2503 | * Locks held: | |
2504 | * avc->lock(R) if setLocks && !slowPass | |
2505 | * avc->lock(W) if !setLocks || slowPass | |
2506 | * tdc->lock(W) | |
2507 | */ | |
2508 | ||
2509 | #ifndef AFS_NOSTATS | |
2510 | /* | |
2511 | * In the case of replicated access, jot down info on the number of | |
2512 | * attempts it took before we got through or gave up. | |
2513 | */ | |
2514 | if (fromReplica) { | |
2515 | if (numFetchLoops <= 1) | |
2516 | (accP->refFirstReplicaOK)++; | |
2517 | if (numFetchLoops > accP->maxReplicasPerRef) | |
2518 | accP->maxReplicasPerRef = numFetchLoops; | |
2519 | } | |
2520 | #endif /* AFS_NOSTATS */ | |
2521 | ||
2522 | tdc->dflags &= ~DFFetching; | |
2523 | if (afs_osi_Wakeup(&tdc->validPos) == 0) | |
2524 | afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAKE, ICL_TYPE_STRING, | |
2525 | __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, | |
2526 | tdc, ICL_TYPE_INT32, tdc->dflags); | |
2527 | if (avc->execsOrWriters == 0) | |
2528 | tdc->f.states &= ~DWriting; | |
2529 | ||
2530 | /* now, if code != 0, we have an error and should punt. | |
2531 | * note that we have the vcache write lock, either because | |
2532 | * !setLocks or slowPass. | |
2533 | */ | |
2534 | if (code) { | |
2535 | afs_CFileTruncate(file, 0); | |
2536 | afs_AdjustSize(tdc, 0); | |
2537 | afs_CFileClose(file); | |
2538 | ZapDCE(tdc); /* sets DFEntryMod */ | |
2539 | if (vType(avc) == VDIR) { | |
2540 | DZap(tdc); | |
2541 | } | |
2542 | tdc->f.states &= ~(DRO|DBackup|DRW); | |
2543 | afs_DCMoveBucket(tdc, 0, 0); | |
2544 | ReleaseWriteLock(&tdc->lock); | |
2545 | afs_PutDCache(tdc); | |
2546 | if (!afs_IsDynroot(avc)) { | |
2547 | afs_StaleVCacheFlags(avc, 0, CUnique); | |
2548 | /* | |
2549 | * Locks held: | |
2550 | * avc->lock(W); assert(!setLocks || slowPass) | |
2551 | */ | |
2552 | osi_Assert(!setLocks || slowPass); | |
2553 | } | |
2554 | tdc = NULL; | |
2555 | goto done; | |
2556 | } | |
2557 | ||
2558 | /* otherwise we copy in the just-fetched info */ | |
2559 | afs_CFileClose(file); | |
2560 | afs_AdjustSize(tdc, size); /* new size */ | |
2561 | /* | |
2562 | * Copy appropriate fields into vcache. Status is | |
2563 | * copied later where we selectively acquire the | |
2564 | * vcache write lock. | |
2565 | */ | |
2566 | if (slowPass) | |
2567 | afs_ProcessFS(avc, &tsmall->OutStatus, areq); | |
2568 | else | |
2569 | setVcacheStatus = 1; | |
2570 | hset64(tdc->f.versionNo, tsmall->OutStatus.dataVersionHigh, | |
2571 | tsmall->OutStatus.DataVersion); | |
2572 | tdc->dflags |= DFEntryMod; | |
2573 | afs_indexFlags[tdc->index] |= IFEverUsed; | |
2574 | ConvertWToSLock(&tdc->lock); | |
2575 | } /*Data version numbers don't match */ | |
2576 | else { | |
2577 | /* | |
2578 | * Data version numbers match. | |
2579 | */ | |
2580 | afs_stats_cmperf.dcacheHits++; | |
2581 | } /*Data version numbers match */ | |
2582 | ||
2583 | updateV2DC(setLocks, avc, tdc, 335); /* set hint */ | |
2584 | done: | |
2585 | /* | |
2586 | * Locks held: | |
2587 | * avc->lock(R) if setLocks && !slowPass | |
2588 | * avc->lock(W) if !setLocks || slowPass | |
2589 | * tdc->lock(S) if tdc | |
2590 | */ | |
2591 | ||
2592 | /* | |
2593 | * See if this was a reference to a file in the local cell. | |
2594 | */ | |
2595 | if (afs_IsPrimaryCellNum(avc->f.fid.Cell)) | |
2596 | afs_stats_cmperf.dlocalAccesses++; | |
2597 | else | |
2598 | afs_stats_cmperf.dremoteAccesses++; | |
2599 | ||
2600 | /* Fix up LRU info */ | |
2601 | ||
2602 | if (tdc) { | |
2603 | ObtainWriteLock(&afs_xdcache, 602); | |
2604 | hset(afs_indexTimes[tdc->index], afs_indexCounter); | |
2605 | hadd32(afs_indexCounter, 1); | |
2606 | ReleaseWriteLock(&afs_xdcache); | |
2607 | ||
2608 | /* return the data */ | |
2609 | if (vType(avc) == VDIR) | |
2610 | *aoffset = abyte; | |
2611 | else | |
2612 | *aoffset = AFS_CHUNKOFFSET(abyte); | |
2613 | *alen = (tdc->f.chunkBytes - *aoffset); | |
2614 | ReleaseSharedLock(&tdc->lock); | |
2615 | } | |
2616 | ||
2617 | /* | |
2618 | * Locks held: | |
2619 | * avc->lock(R) if setLocks && !slowPass | |
2620 | * avc->lock(W) if !setLocks || slowPass | |
2621 | */ | |
2622 | ||
2623 | /* Fix up the callback and status values in the vcache */ | |
2624 | doVcacheUpdate = 0; | |
2625 | if (setLocks && !slowPass) { | |
2626 | /* DCLOCKXXX | |
2627 | * | |
2628 | * This is our dirty little secret to parallel fetches. | |
2629 | * We don't write-lock the vcache while doing the fetch, | |
2630 | * but potentially we'll need to update the vcache after | |
2631 | * the fetch is done. | |
2632 | * | |
2633 | * Drop the read lock and try to re-obtain the write | |
2634 | * lock. If the vcache still has the same DV, it's | |
2635 | * ok to go ahead and install the new data. | |
2636 | */ | |
2637 | afs_hyper_t currentDV, statusDV; | |
2638 | ||
2639 | hset(currentDV, avc->f.m.DataVersion); | |
2640 | ||
2641 | if (setNewCallback && avc->callback != newCallback) | |
2642 | doVcacheUpdate = 1; | |
2643 | ||
2644 | if (tsmall) { | |
2645 | hset64(statusDV, tsmall->OutStatus.dataVersionHigh, | |
2646 | tsmall->OutStatus.DataVersion); | |
2647 | ||
2648 | if (setVcacheStatus && avc->f.m.Length != tsmall->OutStatus.Length) | |
2649 | doVcacheUpdate = 1; | |
2650 | if (setVcacheStatus && !hsame(currentDV, statusDV)) | |
2651 | doVcacheUpdate = 1; | |
2652 | } | |
2653 | ||
2654 | ReleaseReadLock(&avc->lock); | |
2655 | ||
2656 | if (doVcacheUpdate) { | |
2657 | ObtainWriteLock(&avc->lock, 615); | |
2658 | if (!hsame(avc->f.m.DataVersion, currentDV)) { | |
2659 | /* We lose. Someone will beat us to it. */ | |
2660 | doVcacheUpdate = 0; | |
2661 | ReleaseWriteLock(&avc->lock); | |
2662 | } | |
2663 | } | |
2664 | } | |
2665 | ||
2666 | /* With slow pass, we've already done all the updates */ | |
2667 | if (slowPass) { | |
2668 | ReleaseWriteLock(&avc->lock); | |
2669 | } | |
2670 | ||
2671 | /* Check if we need to perform any last-minute fixes with a write-lock */ | |
2672 | if (!setLocks || doVcacheUpdate) { | |
2673 | if (setNewCallback) | |
2674 | avc->callback = newCallback; | |
2675 | if (tsmall && setVcacheStatus) | |
2676 | afs_ProcessFS(avc, &tsmall->OutStatus, areq); | |
2677 | if (setLocks) | |
2678 | ReleaseWriteLock(&avc->lock); | |
2679 | } | |
2680 | ||
2681 | if (tsmall) | |
2682 | osi_FreeLargeSpace(tsmall); | |
2683 | ||
2684 | return tdc; | |
2685 | } /*afs_GetDCache */ | |
2686 | ||
2687 | ||
2688 | /* | |
2689 | * afs_WriteThroughDSlots | |
2690 | * | |
2691 | * Description: | |
2692 | * Sweep through the dcache slots and write out any modified | |
2693 | * in-memory data back on to our caching store. | |
2694 | * | |
2695 | * Parameters: | |
2696 | * None. | |
2697 | * | |
2698 | * Environment: | |
2699 | * The afs_xdcache is write-locked through this whole affair. | |
2700 | */ | |
2701 | int | |
2702 | afs_WriteThroughDSlots(void) | |
2703 | { | |
2704 | struct dcache *tdc; | |
2705 | afs_int32 i, touchedit = 0; | |
2706 | int code = 0; | |
2707 | ||
2708 | struct afs_q DirtyQ, *tq; | |
2709 | ||
2710 | AFS_STATCNT(afs_WriteThroughDSlots); | |
2711 | ||
2712 | /* | |
2713 | * Because of lock ordering, we can't grab dcache locks while | |
2714 | * holding afs_xdcache. So we enter xdcache, get a reference | |
2715 | * for every dcache entry, and exit xdcache. | |
2716 | */ | |
2717 | ObtainWriteLock(&afs_xdcache, 283); | |
2718 | QInit(&DirtyQ); | |
2719 | for (i = 0; i < afs_cacheFiles; i++) { | |
2720 | tdc = afs_indexTable[i]; | |
2721 | ||
2722 | /* Grab tlock in case the existing refcount isn't zero */ | |
2723 | if (tdc && !(afs_indexFlags[i] & (IFFree | IFDiscarded))) { | |
2724 | ObtainWriteLock(&tdc->tlock, 623); | |
2725 | tdc->refCount++; | |
2726 | ReleaseWriteLock(&tdc->tlock); | |
2727 | ||
2728 | QAdd(&DirtyQ, &tdc->dirty); | |
2729 | } | |
2730 | } | |
2731 | ReleaseWriteLock(&afs_xdcache); | |
2732 | ||
2733 | /* | |
2734 | * Now, for each dcache entry we found, check if it's dirty. | |
2735 | * If so, get write-lock, get afs_xdcache, which protects | |
2736 | * afs_cacheInodep, and flush it. Don't forget to put back | |
2737 | * the refcounts. | |
2738 | */ | |
2739 | ||
2740 | #define DQTODC(q) ((struct dcache *)(((char *) (q)) - sizeof(struct afs_q))) | |
2741 | ||
2742 | for (tq = DirtyQ.prev; tq != &DirtyQ && code == 0; tq = QPrev(tq)) { | |
2743 | tdc = DQTODC(tq); | |
2744 | if (tdc->dflags & DFEntryMod) { | |
2745 | int wrLock; | |
2746 | ||
2747 | wrLock = (0 == NBObtainWriteLock(&tdc->lock, 619)); | |
2748 | ||
2749 | /* Now that we have the write lock, double-check */ | |
2750 | if (wrLock && (tdc->dflags & DFEntryMod)) { | |
2751 | tdc->dflags &= ~DFEntryMod; | |
2752 | ObtainWriteLock(&afs_xdcache, 620); | |
2753 | code = afs_WriteDCache(tdc, 1); | |
2754 | ReleaseWriteLock(&afs_xdcache); | |
2755 | if (code) { | |
2756 | /* We didn't successfully write out the dslot; make sure we | |
2757 | * try again later */ | |
2758 | tdc->dflags |= DFEntryMod; | |
2759 | } else { | |
2760 | touchedit = 1; | |
2761 | } | |
2762 | } | |
2763 | if (wrLock) | |
2764 | ReleaseWriteLock(&tdc->lock); | |
2765 | } | |
2766 | ||
2767 | afs_PutDCache(tdc); | |
2768 | } | |
2769 | ||
2770 | if (code) { | |
2771 | return code; | |
2772 | } | |
2773 | ||
2774 | ObtainWriteLock(&afs_xdcache, 617); | |
2775 | if (!touchedit && (cacheDiskType != AFS_FCACHE_TYPE_MEM)) { | |
2776 | /* Touch the file to make sure that the mtime on the file is kept | |
2777 | * up-to-date to avoid losing cached files on cold starts because | |
2778 | * their mtime seems old... | |
2779 | */ | |
2780 | struct afs_fheader theader; | |
2781 | ||
2782 | afs_InitFHeader(&theader); | |
2783 | afs_osi_Write(afs_cacheInodep, 0, &theader, sizeof(theader)); | |
2784 | } | |
2785 | ReleaseWriteLock(&afs_xdcache); | |
2786 | return 0; | |
2787 | } | |
2788 | ||
2789 | /* | |
2790 | * afs_MemGetDSlot | |
2791 | * | |
2792 | * Description: | |
2793 | * Return a pointer to an freshly initialized dcache entry using | |
2794 | * a memory-based cache. The tlock will be read-locked. | |
2795 | * | |
2796 | * Parameters: | |
2797 | * aslot : Dcache slot to look at. | |
2798 | * type : What 'type' of dslot to get; see the dslot_state enum | |
2799 | * | |
2800 | * Environment: | |
2801 | * Must be called with afs_xdcache write-locked. | |
2802 | */ | |
2803 | ||
2804 | struct dcache * | |
2805 | afs_MemGetDSlot(afs_int32 aslot, dslot_state type) | |
2806 | { | |
2807 | struct dcache *tdc; | |
2808 | int existing = 0; | |
2809 | ||
2810 | AFS_STATCNT(afs_MemGetDSlot); | |
2811 | if (CheckLock(&afs_xdcache) != -1) | |
2812 | osi_Panic("getdslot nolock"); | |
2813 | if (aslot < 0 || aslot >= afs_cacheFiles) | |
2814 | osi_Panic("getdslot slot %d (of %d)", aslot, afs_cacheFiles); | |
2815 | tdc = afs_indexTable[aslot]; | |
2816 | if (tdc) { | |
2817 | QRemove(&tdc->lruq); /* move to queue head */ | |
2818 | QAdd(&afs_DLRU, &tdc->lruq); | |
2819 | /* We're holding afs_xdcache, but get tlock in case refCount != 0 */ | |
2820 | ObtainWriteLock(&tdc->tlock, 624); | |
2821 | tdc->refCount++; | |
2822 | ConvertWToRLock(&tdc->tlock); | |
2823 | return tdc; | |
2824 | } | |
2825 | ||
2826 | /* if we got here, the given slot is not in memory in our list of known | |
2827 | * slots. for memcache, the only place a dslot can exist is in memory, so | |
2828 | * if the caller is expecting to get back a known dslot, and we've reached | |
2829 | * here, something is very wrong. DSLOT_NEW is the only type of dslot that | |
2830 | * may not exist; for all others, the caller assumes the given dslot | |
2831 | * already exists. so, 'type' had better be DSLOT_NEW here, or something is | |
2832 | * very wrong. */ | |
2833 | osi_Assert(type == DSLOT_NEW); | |
2834 | ||
2835 | if (!afs_freeDSList) | |
2836 | afs_GetDownDSlot(4); | |
2837 | if (!afs_freeDSList) { | |
2838 | /* none free, making one is better than a panic */ | |
2839 | afs_stats_cmperf.dcacheXAllocs++; /* count in case we have a leak */ | |
2840 | tdc = afs_osi_Alloc(sizeof(struct dcache)); | |
2841 | osi_Assert(tdc != NULL); | |
2842 | #ifdef KERNEL_HAVE_PIN | |
2843 | pin((char *)tdc, sizeof(struct dcache)); /* XXX */ | |
2844 | #endif | |
2845 | } else { | |
2846 | tdc = afs_freeDSList; | |
2847 | afs_freeDSList = (struct dcache *)tdc->lruq.next; | |
2848 | existing = 1; | |
2849 | } | |
2850 | tdc->dflags = 0; /* up-to-date, not in free q */ | |
2851 | tdc->mflags = 0; | |
2852 | QAdd(&afs_DLRU, &tdc->lruq); | |
2853 | if (tdc->lruq.prev == &tdc->lruq) | |
2854 | osi_Panic("lruq 3"); | |
2855 | ||
2856 | /* initialize entry */ | |
2857 | tdc->f.fid.Cell = 0; | |
2858 | tdc->f.fid.Fid.Volume = 0; | |
2859 | tdc->f.chunk = -1; | |
2860 | hones(tdc->f.versionNo); | |
2861 | tdc->f.inode.mem = aslot; | |
2862 | tdc->dflags |= DFEntryMod; | |
2863 | tdc->refCount = 1; | |
2864 | tdc->index = aslot; | |
2865 | afs_indexUnique[aslot] = tdc->f.fid.Fid.Unique; | |
2866 | ||
2867 | if (existing) { | |
2868 | osi_Assert(0 == NBObtainWriteLock(&tdc->lock, 674)); | |
2869 | osi_Assert(0 == NBObtainWriteLock(&tdc->mflock, 675)); | |
2870 | osi_Assert(0 == NBObtainWriteLock(&tdc->tlock, 676)); | |
2871 | } | |
2872 | ||
2873 | AFS_RWLOCK_INIT(&tdc->lock, "dcache lock"); | |
2874 | AFS_RWLOCK_INIT(&tdc->tlock, "dcache tlock"); | |
2875 | AFS_RWLOCK_INIT(&tdc->mflock, "dcache flock"); | |
2876 | ObtainReadLock(&tdc->tlock); | |
2877 | ||
2878 | afs_indexTable[aslot] = tdc; | |
2879 | return tdc; | |
2880 | ||
2881 | } /*afs_MemGetDSlot */ | |
2882 | ||
2883 | unsigned int last_error = 0, lasterrtime = 0; | |
2884 | ||
2885 | /* | |
2886 | * afs_UFSGetDSlot | |
2887 | * | |
2888 | * Description: | |
2889 | * Return a pointer to an freshly initialized dcache entry using | |
2890 | * a UFS-based disk cache. The dcache tlock will be read-locked. | |
2891 | * | |
2892 | * Parameters: | |
2893 | * aslot : Dcache slot to look at. | |
2894 | * type : What 'type' of dslot to get; see the dslot_state enum | |
2895 | * | |
2896 | * Environment: | |
2897 | * afs_xdcache lock write-locked. | |
2898 | */ | |
2899 | struct dcache * | |
2900 | afs_UFSGetDSlot(afs_int32 aslot, dslot_state type) | |
2901 | { | |
2902 | afs_int32 code; | |
2903 | struct dcache *tdc; | |
2904 | int existing = 0; | |
2905 | int entryok; | |
2906 | int off; | |
2907 | ||
2908 | AFS_STATCNT(afs_UFSGetDSlot); | |
2909 | if (CheckLock(&afs_xdcache) != -1) | |
2910 | osi_Panic("getdslot nolock"); | |
2911 | if (aslot < 0 || aslot >= afs_cacheFiles) | |
2912 | osi_Panic("getdslot slot %d (of %d)", aslot, afs_cacheFiles); | |
2913 | tdc = afs_indexTable[aslot]; | |
2914 | if (tdc) { | |
2915 | QRemove(&tdc->lruq); /* move to queue head */ | |
2916 | QAdd(&afs_DLRU, &tdc->lruq); | |
2917 | /* Grab tlock in case refCount != 0 */ | |
2918 | ObtainWriteLock(&tdc->tlock, 625); | |
2919 | tdc->refCount++; | |
2920 | ConvertWToRLock(&tdc->tlock); | |
2921 | return tdc; | |
2922 | } | |
2923 | ||
2924 | /* otherwise we should read it in from the cache file */ | |
2925 | if (!afs_freeDSList) | |
2926 | afs_GetDownDSlot(4); | |
2927 | if (!afs_freeDSList) { | |
2928 | /* none free, making one is better than a panic */ | |
2929 | afs_stats_cmperf.dcacheXAllocs++; /* count in case we have a leak */ | |
2930 | tdc = afs_osi_Alloc(sizeof(struct dcache)); | |
2931 | osi_Assert(tdc != NULL); | |
2932 | #ifdef KERNEL_HAVE_PIN | |
2933 | pin((char *)tdc, sizeof(struct dcache)); /* XXX */ | |
2934 | #endif | |
2935 | } else { | |
2936 | tdc = afs_freeDSList; | |
2937 | afs_freeDSList = (struct dcache *)tdc->lruq.next; | |
2938 | existing = 1; | |
2939 | } | |
2940 | tdc->dflags = 0; /* up-to-date, not in free q */ | |
2941 | tdc->mflags = 0; | |
2942 | QAdd(&afs_DLRU, &tdc->lruq); | |
2943 | if (tdc->lruq.prev == &tdc->lruq) | |
2944 | osi_Panic("lruq 3"); | |
2945 | ||
2946 | /* | |
2947 | * Seek to the aslot'th entry and read it in. | |
2948 | */ | |
2949 | off = sizeof(struct fcache)*aslot + sizeof(struct afs_fheader); | |
2950 | code = | |
2951 | afs_osi_Read(afs_cacheInodep, | |
2952 | off, (char *)(&tdc->f), | |
2953 | sizeof(struct fcache)); | |
2954 | entryok = 1; | |
2955 | if (code != sizeof(struct fcache)) { | |
2956 | entryok = 0; | |
2957 | #if defined(KERNEL_HAVE_UERROR) | |
2958 | last_error = getuerror(); | |
2959 | #else | |
2960 | last_error = code; | |
2961 | #endif | |
2962 | lasterrtime = osi_Time(); | |
2963 | if (type != DSLOT_NEW) { | |
2964 | /* If we are requesting a non-DSLOT_NEW slot, this is an error. | |
2965 | * non-DSLOT_NEW slots are supposed to already exist, so if we | |
2966 | * failed to read in the slot, something is wrong. */ | |
2967 | struct osi_stat tstat; | |
2968 | if (afs_osi_Stat(afs_cacheInodep, &tstat)) { | |
2969 | tstat.size = -1; | |
2970 | } | |
2971 | afs_warn("afs: disk cache read error in CacheItems slot %d " | |
2972 | "off %d/%d code %d/%d\n", | |
2973 | (int)aslot, | |
2974 | off, (int)tstat.size, | |
2975 | (int)code, (int)sizeof(struct fcache)); | |
2976 | /* put tdc back on the free dslot list */ | |
2977 | QRemove(&tdc->lruq); | |
2978 | tdc->index = NULLIDX; | |
2979 | tdc->lruq.next = (struct afs_q *)afs_freeDSList; | |
2980 | afs_freeDSList = tdc; | |
2981 | return NULL; | |
2982 | } | |
2983 | } | |
2984 | if (!afs_CellNumValid(tdc->f.fid.Cell)) { | |
2985 | entryok = 0; | |
2986 | if (type == DSLOT_VALID) { | |
2987 | osi_Panic("afs: needed valid dcache but index %d off %d has " | |
2988 | "invalid cell num %d\n", | |
2989 | (int)aslot, off, (int)tdc->f.fid.Cell); | |
2990 | } | |
2991 | } | |
2992 | ||
2993 | if (type == DSLOT_VALID && tdc->f.fid.Fid.Volume == 0) { | |
2994 | osi_Panic("afs: invalid zero-volume dcache entry at slot %d off %d", | |
2995 | (int)aslot, off); | |
2996 | } | |
2997 | ||
2998 | if (type == DSLOT_UNUSED) { | |
2999 | /* the requested dslot is known to exist, but contain invalid data | |
3000 | * (this happens when we're using a dslot from the free or discard | |
3001 | * list). be sure not to re-use the data in it, so force invalidation. | |
3002 | */ | |
3003 | entryok = 0; | |
3004 | } | |
3005 | ||
3006 | if (!entryok) { | |
3007 | tdc->f.fid.Cell = 0; | |
3008 | tdc->f.fid.Fid.Volume = 0; | |
3009 | tdc->f.chunk = -1; | |
3010 | hones(tdc->f.versionNo); | |
3011 | tdc->dflags |= DFEntryMod; | |
3012 | afs_indexUnique[aslot] = tdc->f.fid.Fid.Unique; | |
3013 | tdc->f.states &= ~(DRO|DBackup|DRW); | |
3014 | afs_DCMoveBucket(tdc, 0, 0); | |
3015 | } else { | |
3016 | if (tdc->f.states & DRO) { | |
3017 | afs_DCMoveBucket(tdc, 0, 2); | |
3018 | } else if (tdc->f.states & DBackup) { | |
3019 | afs_DCMoveBucket(tdc, 0, 1); | |
3020 | } else { | |
3021 | afs_DCMoveBucket(tdc, 0, 1); | |
3022 | } | |
3023 | } | |
3024 | tdc->refCount = 1; | |
3025 | tdc->index = aslot; | |
3026 | if (tdc->f.chunk >= 0) | |
3027 | tdc->validPos = AFS_CHUNKTOBASE(tdc->f.chunk) + tdc->f.chunkBytes; | |
3028 | else | |
3029 | tdc->validPos = 0; | |
3030 | ||
3031 | if (existing) { | |
3032 | osi_Assert(0 == NBObtainWriteLock(&tdc->lock, 674)); | |
3033 | osi_Assert(0 == NBObtainWriteLock(&tdc->mflock, 675)); | |
3034 | osi_Assert(0 == NBObtainWriteLock(&tdc->tlock, 676)); | |
3035 | } | |
3036 | ||
3037 | AFS_RWLOCK_INIT(&tdc->lock, "dcache lock"); | |
3038 | AFS_RWLOCK_INIT(&tdc->tlock, "dcache tlock"); | |
3039 | AFS_RWLOCK_INIT(&tdc->mflock, "dcache flock"); | |
3040 | ObtainReadLock(&tdc->tlock); | |
3041 | ||
3042 | /* | |
3043 | * If we didn't read into a temporary dcache region, update the | |
3044 | * slot pointer table. | |
3045 | */ | |
3046 | afs_indexTable[aslot] = tdc; | |
3047 | return tdc; | |
3048 | ||
3049 | } /*afs_UFSGetDSlot */ | |
3050 | ||
3051 | ||
3052 | ||
3053 | /*! | |
3054 | * Write a particular dcache entry back to its home in the | |
3055 | * CacheInfo file. | |
3056 | * | |
3057 | * \param adc Pointer to the dcache entry to write. | |
3058 | * \param atime If true, set the modtime on the file to the current time. | |
3059 | * | |
3060 | * \note Environment: | |
3061 | * Must be called with the afs_xdcache lock at least read-locked, | |
3062 | * and dcache entry at least read-locked. | |
3063 | * The reference count is not changed. | |
3064 | */ | |
3065 | ||
3066 | int | |
3067 | afs_WriteDCache(struct dcache *adc, int atime) | |
3068 | { | |
3069 | afs_int32 code; | |
3070 | ||
3071 | if (cacheDiskType == AFS_FCACHE_TYPE_MEM) | |
3072 | return 0; | |
3073 | AFS_STATCNT(afs_WriteDCache); | |
3074 | osi_Assert(WriteLocked(&afs_xdcache)); | |
3075 | if (atime) | |
3076 | adc->f.modTime = osi_Time(); | |
3077 | ||
3078 | if ((afs_indexFlags[adc->index] & (IFFree | IFDiscarded)) == 0 && | |
3079 | adc->f.fid.Fid.Volume == 0) { | |
3080 | /* If a dcache slot is not on the free or discard list, it must be | |
3081 | * in the hash table. Thus, the volume must be non-zero, since that | |
3082 | * is how we determine whether or not to unhash the entry when kicking | |
3083 | * it out of the cache. Do this check now, since otherwise this can | |
3084 | * cause hash table corruption and a panic later on after we read the | |
3085 | * entry back in. */ | |
3086 | osi_Panic("afs_WriteDCache zero volume index %d flags 0x%x\n", | |
3087 | adc->index, (unsigned)afs_indexFlags[adc->index]); | |
3088 | } | |
3089 | ||
3090 | /* | |
3091 | * Seek to the right dcache slot and write the in-memory image out to disk. | |
3092 | */ | |
3093 | afs_cellname_write(); | |
3094 | code = | |
3095 | afs_osi_Write(afs_cacheInodep, | |
3096 | sizeof(struct fcache) * adc->index + | |
3097 | sizeof(struct afs_fheader), (char *)(&adc->f), | |
3098 | sizeof(struct fcache)); | |
3099 | if (code != sizeof(struct fcache)) { | |
3100 | afs_warn("afs: failed to write to CacheItems off %ld code %d/%d\n", | |
3101 | (long)(sizeof(struct fcache) * adc->index + sizeof(struct afs_fheader)), | |
3102 | (int)code, (int)sizeof(struct fcache)); | |
3103 | return EIO; | |
3104 | } | |
3105 | return 0; | |
3106 | } | |
3107 | ||
3108 | ||
3109 | ||
3110 | /*! | |
3111 | * Wake up users of a particular file waiting for stores to take | |
3112 | * place. | |
3113 | * | |
3114 | * \param avc Ptr to related vcache entry. | |
3115 | * | |
3116 | * \note Environment: | |
3117 | * Nothing interesting. | |
3118 | */ | |
3119 | int | |
3120 | afs_wakeup(struct vcache *avc) | |
3121 | { | |
3122 | int i; | |
3123 | struct brequest *tb; | |
3124 | tb = afs_brs; | |
3125 | AFS_STATCNT(afs_wakeup); | |
3126 | for (i = 0; i < NBRS; i++, tb++) { | |
3127 | /* if request is valid and for this file, we've found it */ | |
3128 | if (tb->refCount > 0 && avc == tb->vc) { | |
3129 | ||
3130 | /* | |
3131 | * If CSafeStore is on, then we don't awaken the guy | |
3132 | * waiting for the store until the whole store has finished. | |
3133 | * Otherwise, we do it now. Note that if CSafeStore is on, | |
3134 | * the BStore routine actually wakes up the user, instead | |
3135 | * of us. | |
3136 | * I think this is redundant now because this sort of thing | |
3137 | * is already being handled by the higher-level code. | |
3138 | */ | |
3139 | if ((avc->f.states & CSafeStore) == 0) { | |
3140 | tb->code_raw = tb->code_checkcode = 0; | |
3141 | tb->flags |= BUVALID; | |
3142 | if (tb->flags & BUWAIT) { | |
3143 | tb->flags &= ~BUWAIT; | |
3144 | afs_osi_Wakeup(tb); | |
3145 | } | |
3146 | } | |
3147 | break; | |
3148 | } | |
3149 | } | |
3150 | return 0; | |
3151 | } | |
3152 | ||
3153 | /*! | |
3154 | * Given a file name and inode, set up that file to be an | |
3155 | * active member in the AFS cache. This also involves checking | |
3156 | * the usability of its data. | |
3157 | * | |
3158 | * \param afile Name of the cache file to initialize. | |
3159 | * \param ainode Inode of the file. | |
3160 | * | |
3161 | * \note Environment: | |
3162 | * This function is called only during initialization. | |
3163 | */ | |
3164 | int | |
3165 | afs_InitCacheFile(char *afile, ino_t ainode) | |
3166 | { | |
3167 | afs_int32 code; | |
3168 | afs_int32 index; | |
3169 | int fileIsBad; | |
3170 | struct osi_file *tfile; | |
3171 | struct osi_stat tstat; | |
3172 | struct dcache *tdc; | |
3173 | ||
3174 | AFS_STATCNT(afs_InitCacheFile); | |
3175 | index = afs_stats_cmperf.cacheNumEntries; | |
3176 | if (index >= afs_cacheFiles) | |
3177 | return EINVAL; | |
3178 | ||
3179 | ObtainWriteLock(&afs_xdcache, 282); | |
3180 | tdc = afs_GetNewDSlot(index); | |
3181 | ReleaseReadLock(&tdc->tlock); | |
3182 | ReleaseWriteLock(&afs_xdcache); | |
3183 | ||
3184 | ObtainWriteLock(&tdc->lock, 621); | |
3185 | ObtainWriteLock(&afs_xdcache, 622); | |
3186 | if (!afile && !ainode) { | |
3187 | tfile = NULL; | |
3188 | fileIsBad = 1; | |
3189 | } else { | |
3190 | if (afile) { | |
3191 | code = afs_LookupInodeByPath(afile, &tdc->f.inode.ufs, NULL); | |
3192 | if (code) { | |
3193 | ReleaseWriteLock(&afs_xdcache); | |
3194 | ReleaseWriteLock(&tdc->lock); | |
3195 | afs_PutDCache(tdc); | |
3196 | return code; | |
3197 | } | |
3198 | } else { | |
3199 | /* Add any other 'complex' inode types here ... */ | |
3200 | #if !defined(AFS_LINUX26_ENV) && !defined(AFS_CACHE_VNODE_PATH) | |
3201 | tdc->f.inode.ufs = ainode; | |
3202 | #else | |
3203 | osi_Panic("Can't init cache with inode numbers when complex inodes are " | |
3204 | "in use\n"); | |
3205 | #endif | |
3206 | } | |
3207 | fileIsBad = 0; | |
3208 | if ((tdc->f.states & DWriting) || tdc->f.fid.Fid.Volume == 0) | |
3209 | fileIsBad = 1; | |
3210 | tfile = osi_UFSOpen(&tdc->f.inode); | |
3211 | if (!tfile) { | |
3212 | ReleaseWriteLock(&afs_xdcache); | |
3213 | ReleaseWriteLock(&tdc->lock); | |
3214 | afs_PutDCache(tdc); | |
3215 | return ENOENT; | |
3216 | } | |
3217 | ||
3218 | code = afs_osi_Stat(tfile, &tstat); | |
3219 | if (code) | |
3220 | osi_Panic("initcachefile stat"); | |
3221 | ||
3222 | /* | |
3223 | * If file size doesn't match the cache info file, it's probably bad. | |
3224 | */ | |
3225 | if (tdc->f.chunkBytes != tstat.size) | |
3226 | fileIsBad = 1; | |
3227 | /* | |
3228 | * If file changed within T (120?) seconds of cache info file, it's | |
3229 | * probably bad. In addition, if slot changed within last T seconds, | |
3230 | * the cache info file may be incorrectly identified, and so slot | |
3231 | * may be bad. | |
3232 | */ | |
3233 | if (cacheInfoModTime < tstat.mtime + 120) | |
3234 | fileIsBad = 1; | |
3235 | if (cacheInfoModTime < tdc->f.modTime + 120) | |
3236 | fileIsBad = 1; | |
3237 | /* In case write through is behind, make sure cache items entry is | |
3238 | * at least as new as the chunk. | |
3239 | */ | |
3240 | if (tdc->f.modTime < tstat.mtime) | |
3241 | fileIsBad = 1; | |
3242 | } | |
3243 | tdc->f.chunkBytes = 0; | |
3244 | ||
3245 | if (fileIsBad) { | |
3246 | tdc->f.fid.Fid.Volume = 0; /* not in the hash table */ | |
3247 | if (tfile && tstat.size != 0) | |
3248 | osi_UFSTruncate(tfile, 0); | |
3249 | tdc->f.states &= ~(DRO|DBackup|DRW); | |
3250 | afs_DCMoveBucket(tdc, 0, 0); | |
3251 | /* put entry in free cache slot list */ | |
3252 | afs_dvnextTbl[tdc->index] = afs_freeDCList; | |
3253 | afs_freeDCList = index; | |
3254 | afs_freeDCCount++; | |
3255 | afs_indexFlags[index] |= IFFree; | |
3256 | afs_indexUnique[index] = 0; | |
3257 | } else { | |
3258 | /* | |
3259 | * We must put this entry in the appropriate hash tables. | |
3260 | * Note that i is still set from the above DCHash call | |
3261 | */ | |
3262 | code = DCHash(&tdc->f.fid, tdc->f.chunk); | |
3263 | afs_dcnextTbl[tdc->index] = afs_dchashTbl[code]; | |
3264 | afs_dchashTbl[code] = tdc->index; | |
3265 | code = DVHash(&tdc->f.fid); | |
3266 | afs_dvnextTbl[tdc->index] = afs_dvhashTbl[code]; | |
3267 | afs_dvhashTbl[code] = tdc->index; | |
3268 | afs_AdjustSize(tdc, tstat.size); /* adjust to new size */ | |
3269 | if (tstat.size > 0) | |
3270 | /* has nontrivial amt of data */ | |
3271 | afs_indexFlags[index] |= IFEverUsed; | |
3272 | afs_stats_cmperf.cacheFilesReused++; | |
3273 | /* | |
3274 | * Initialize index times to file's mod times; init indexCounter | |
3275 | * to max thereof | |
3276 | */ | |
3277 | hset32(afs_indexTimes[index], tstat.atime); | |
3278 | if (hgetlo(afs_indexCounter) < tstat.atime) { | |
3279 | hset32(afs_indexCounter, tstat.atime); | |
3280 | } | |
3281 | afs_indexUnique[index] = tdc->f.fid.Fid.Unique; | |
3282 | } /*File is not bad */ | |
3283 | ||
3284 | if (tfile) | |
3285 | osi_UFSClose(tfile); | |
3286 | tdc->f.states &= ~DWriting; | |
3287 | tdc->dflags &= ~DFEntryMod; | |
3288 | /* don't set f.modTime; we're just cleaning up */ | |
3289 | osi_Assert(afs_WriteDCache(tdc, 0) == 0); | |
3290 | ReleaseWriteLock(&afs_xdcache); | |
3291 | ReleaseWriteLock(&tdc->lock); | |
3292 | afs_PutDCache(tdc); | |
3293 | afs_stats_cmperf.cacheNumEntries++; | |
3294 | return 0; | |
3295 | } | |
3296 | ||
3297 | ||
3298 | /*Max # of struct dcache's resident at any time*/ | |
3299 | /* | |
3300 | * If 'dchint' is enabled then in-memory dcache min is increased because of | |
3301 | * crashes... | |
3302 | */ | |
3303 | #define DDSIZE 200 | |
3304 | ||
3305 | /*! | |
3306 | * Initialize dcache related variables. | |
3307 | * | |
3308 | * \param afiles | |
3309 | * \param ablocks | |
3310 | * \param aDentries | |
3311 | * \param achunk | |
3312 | * \param aflags | |
3313 | * | |
3314 | */ | |
3315 | int | |
3316 | afs_dcacheInit(int afiles, int ablocks, int aDentries, int achunk, int aflags) | |
3317 | { | |
3318 | struct dcache *tdp; | |
3319 | int i; | |
3320 | int code; | |
3321 | int afs_dhashbits; | |
3322 | ||
3323 | afs_freeDCList = NULLIDX; | |
3324 | afs_discardDCList = NULLIDX; | |
3325 | afs_freeDCCount = 0; | |
3326 | afs_freeDSList = NULL; | |
3327 | hzero(afs_indexCounter); | |
3328 | ||
3329 | LOCK_INIT(&afs_xdcache, "afs_xdcache"); | |
3330 | ||
3331 | /* | |
3332 | * Set chunk size | |
3333 | */ | |
3334 | if (achunk) { | |
3335 | if (achunk < 0 || achunk > 30) | |
3336 | achunk = 13; /* Use default */ | |
3337 | AFS_SETCHUNKSIZE(achunk); | |
3338 | } | |
3339 | ||
3340 | if (!aDentries) | |
3341 | aDentries = DDSIZE; | |
3342 | ||
3343 | /* afs_dhashsize defaults to 1024 */ | |
3344 | if (aDentries > 512) | |
3345 | afs_dhashsize = 2048; | |
3346 | /* Try to keep the average chain length around two unless the table | |
3347 | * would be ridiculously big. */ | |
3348 | if (aDentries > 4096) { | |
3349 | afs_dhashbits = opr_fls(aDentries) - 3; | |
3350 | /* Cap the hash tables to 32k entries. */ | |
3351 | if (afs_dhashbits > 15) | |
3352 | afs_dhashbits = 15; | |
3353 | afs_dhashsize = opr_jhash_size(afs_dhashbits); | |
3354 | } | |
3355 | /* initialize hash tables */ | |
3356 | afs_dvhashTbl = afs_osi_Alloc(afs_dhashsize * sizeof(afs_int32)); | |
3357 | osi_Assert(afs_dvhashTbl != NULL); | |
3358 | afs_dchashTbl = afs_osi_Alloc(afs_dhashsize * sizeof(afs_int32)); | |
3359 | osi_Assert(afs_dchashTbl != NULL); | |
3360 | for (i = 0; i < afs_dhashsize; i++) { | |
3361 | afs_dvhashTbl[i] = NULLIDX; | |
3362 | afs_dchashTbl[i] = NULLIDX; | |
3363 | } | |
3364 | afs_dvnextTbl = afs_osi_Alloc(afiles * sizeof(afs_int32)); | |
3365 | osi_Assert(afs_dvnextTbl != NULL); | |
3366 | afs_dcnextTbl = afs_osi_Alloc(afiles * sizeof(afs_int32)); | |
3367 | osi_Assert(afs_dcnextTbl != NULL); | |
3368 | for (i = 0; i < afiles; i++) { | |
3369 | afs_dvnextTbl[i] = NULLIDX; | |
3370 | afs_dcnextTbl[i] = NULLIDX; | |
3371 | } | |
3372 | ||
3373 | /* Allocate and zero the pointer array to the dcache entries */ | |
3374 | afs_indexTable = afs_osi_Alloc(sizeof(struct dcache *) * afiles); | |
3375 | osi_Assert(afs_indexTable != NULL); | |
3376 | memset(afs_indexTable, 0, sizeof(struct dcache *) * afiles); | |
3377 | afs_indexTimes = afs_osi_Alloc(afiles * sizeof(afs_hyper_t)); | |
3378 | osi_Assert(afs_indexTimes != NULL); | |
3379 | memset(afs_indexTimes, 0, afiles * sizeof(afs_hyper_t)); | |
3380 | afs_indexUnique = afs_osi_Alloc(afiles * sizeof(afs_uint32)); | |
3381 | osi_Assert(afs_indexUnique != NULL); | |
3382 | memset(afs_indexUnique, 0, afiles * sizeof(afs_uint32)); | |
3383 | afs_indexFlags = afs_osi_Alloc(afiles * sizeof(u_char)); | |
3384 | osi_Assert(afs_indexFlags != NULL); | |
3385 | memset(afs_indexFlags, 0, afiles * sizeof(char)); | |
3386 | ||
3387 | /* Allocate and thread the struct dcache entries themselves */ | |
3388 | tdp = afs_Initial_freeDSList = | |
3389 | afs_osi_Alloc(aDentries * sizeof(struct dcache)); | |
3390 | osi_Assert(tdp != NULL); | |
3391 | memset(tdp, 0, aDentries * sizeof(struct dcache)); | |
3392 | #ifdef KERNEL_HAVE_PIN | |
3393 | pin((char *)afs_indexTable, sizeof(struct dcache *) * afiles); /* XXX */ | |
3394 | pin((char *)afs_indexTimes, sizeof(afs_hyper_t) * afiles); /* XXX */ | |
3395 | pin((char *)afs_indexFlags, sizeof(char) * afiles); /* XXX */ | |
3396 | pin((char *)afs_indexUnique, sizeof(afs_int32) * afiles); /* XXX */ | |
3397 | pin((char *)tdp, aDentries * sizeof(struct dcache)); /* XXX */ | |
3398 | pin((char *)afs_dvhashTbl, sizeof(afs_int32) * afs_dhashsize); /* XXX */ | |
3399 | pin((char *)afs_dchashTbl, sizeof(afs_int32) * afs_dhashsize); /* XXX */ | |
3400 | pin((char *)afs_dcnextTbl, sizeof(afs_int32) * afiles); /* XXX */ | |
3401 | pin((char *)afs_dvnextTbl, sizeof(afs_int32) * afiles); /* XXX */ | |
3402 | #endif | |
3403 | ||
3404 | afs_freeDSList = &tdp[0]; | |
3405 | for (i = 0; i < aDentries - 1; i++) { | |
3406 | tdp[i].lruq.next = (struct afs_q *)(&tdp[i + 1]); | |
3407 | AFS_RWLOCK_INIT(&tdp[i].lock, "dcache lock"); | |
3408 | AFS_RWLOCK_INIT(&tdp[i].tlock, "dcache tlock"); | |
3409 | AFS_RWLOCK_INIT(&tdp[i].mflock, "dcache flock"); | |
3410 | } | |
3411 | tdp[aDentries - 1].lruq.next = (struct afs_q *)0; | |
3412 | AFS_RWLOCK_INIT(&tdp[aDentries - 1].lock, "dcache lock"); | |
3413 | AFS_RWLOCK_INIT(&tdp[aDentries - 1].tlock, "dcache tlock"); | |
3414 | AFS_RWLOCK_INIT(&tdp[aDentries - 1].mflock, "dcache flock"); | |
3415 | ||
3416 | afs_stats_cmperf.cacheBlocksOrig = afs_stats_cmperf.cacheBlocksTotal = | |
3417 | afs_cacheBlocks = ablocks; | |
3418 | afs_ComputeCacheParms(); /* compute parms based on cache size */ | |
3419 | ||
3420 | afs_dcentries = aDentries; | |
3421 | afs_blocksUsed = 0; | |
3422 | afs_stats_cmperf.cacheBucket0_Discarded = | |
3423 | afs_stats_cmperf.cacheBucket1_Discarded = | |
3424 | afs_stats_cmperf.cacheBucket2_Discarded = 0; | |
3425 | afs_DCSizeInit(); | |
3426 | QInit(&afs_DLRU); | |
3427 | ||
3428 | if (aflags & AFSCALL_INIT_MEMCACHE) { | |
3429 | /* | |
3430 | * Use a memory cache instead of a disk cache | |
3431 | */ | |
3432 | cacheDiskType = AFS_FCACHE_TYPE_MEM; | |
3433 | afs_cacheType = &afs_MemCacheOps; | |
3434 | afiles = (afiles < aDentries) ? afiles : aDentries; /* min */ | |
3435 | ablocks = afiles * (AFS_FIRSTCSIZE / 1024); | |
3436 | /* ablocks is reported in 1K blocks */ | |
3437 | code = afs_InitMemCache(afiles, AFS_FIRSTCSIZE, aflags); | |
3438 | if (code != 0) { | |
3439 | afs_warn("afsd: memory cache too large for available memory.\n"); | |
3440 | afs_warn("afsd: AFS files cannot be accessed.\n\n"); | |
3441 | dcacheDisabled = 1; | |
3442 | return code; | |
3443 | } else | |
3444 | afs_warn("Memory cache: Allocating %d dcache entries...", | |
3445 | aDentries); | |
3446 | } else { | |
3447 | cacheDiskType = AFS_FCACHE_TYPE_UFS; | |
3448 | afs_cacheType = &afs_UfsCacheOps; | |
3449 | } | |
3450 | return 0; | |
3451 | } | |
3452 | ||
3453 | /*! | |
3454 | * Shuts down the cache. | |
3455 | * | |
3456 | */ | |
3457 | void | |
3458 | shutdown_dcache(void) | |
3459 | { | |
3460 | int i; | |
3461 | ||
3462 | #ifdef AFS_CACHE_VNODE_PATH | |
3463 | if (cacheDiskType != AFS_FCACHE_TYPE_MEM) { | |
3464 | struct dcache *tdc; | |
3465 | for (i = 0; i < afs_cacheFiles; i++) { | |
3466 | tdc = afs_indexTable[i]; | |
3467 | if (tdc) { | |
3468 | afs_osi_FreeStr(tdc->f.inode.ufs); | |
3469 | } | |
3470 | } | |
3471 | } | |
3472 | #endif | |
3473 | ||
3474 | afs_osi_Free(afs_dvnextTbl, afs_cacheFiles * sizeof(afs_int32)); | |
3475 | afs_osi_Free(afs_dcnextTbl, afs_cacheFiles * sizeof(afs_int32)); | |
3476 | afs_osi_Free(afs_indexTable, afs_cacheFiles * sizeof(struct dcache *)); | |
3477 | afs_osi_Free(afs_indexTimes, afs_cacheFiles * sizeof(afs_hyper_t)); | |
3478 | afs_osi_Free(afs_indexUnique, afs_cacheFiles * sizeof(afs_uint32)); | |
3479 | afs_osi_Free(afs_indexFlags, afs_cacheFiles * sizeof(u_char)); | |
3480 | afs_osi_Free(afs_Initial_freeDSList, | |
3481 | afs_dcentries * sizeof(struct dcache)); | |
3482 | #ifdef KERNEL_HAVE_PIN | |
3483 | unpin((char *)afs_dcnextTbl, afs_cacheFiles * sizeof(afs_int32)); | |
3484 | unpin((char *)afs_dvnextTbl, afs_cacheFiles * sizeof(afs_int32)); | |
3485 | unpin((char *)afs_indexTable, afs_cacheFiles * sizeof(struct dcache *)); | |
3486 | unpin((char *)afs_indexTimes, afs_cacheFiles * sizeof(afs_hyper_t)); | |
3487 | unpin((char *)afs_indexUnique, afs_cacheFiles * sizeof(afs_uint32)); | |
3488 | unpin((u_char *) afs_indexFlags, afs_cacheFiles * sizeof(u_char)); | |
3489 | unpin(afs_Initial_freeDSList, afs_dcentries * sizeof(struct dcache)); | |
3490 | #endif | |
3491 | ||
3492 | ||
3493 | for (i = 0; i < afs_dhashsize; i++) { | |
3494 | afs_dvhashTbl[i] = NULLIDX; | |
3495 | afs_dchashTbl[i] = NULLIDX; | |
3496 | } | |
3497 | ||
3498 | afs_osi_Free(afs_dvhashTbl, afs_dhashsize * sizeof(afs_int32)); | |
3499 | afs_osi_Free(afs_dchashTbl, afs_dhashsize * sizeof(afs_int32)); | |
3500 | ||
3501 | afs_blocksUsed = afs_dcentries = 0; | |
3502 | afs_stats_cmperf.cacheBucket0_Discarded = | |
3503 | afs_stats_cmperf.cacheBucket1_Discarded = | |
3504 | afs_stats_cmperf.cacheBucket2_Discarded = 0; | |
3505 | hzero(afs_indexCounter); | |
3506 | ||
3507 | afs_freeDCCount = 0; | |
3508 | afs_freeDCList = NULLIDX; | |
3509 | afs_discardDCList = NULLIDX; | |
3510 | afs_freeDSList = afs_Initial_freeDSList = 0; | |
3511 | ||
3512 | LOCK_INIT(&afs_xdcache, "afs_xdcache"); | |
3513 | QInit(&afs_DLRU); | |
3514 | ||
3515 | } | |
3516 | ||
3517 | /*! | |
3518 | * Get a dcache ready for writing, respecting the current cache size limits | |
3519 | * | |
3520 | * len is required because afs_GetDCache with flag == 4 expects the length | |
3521 | * field to be filled. It decides from this whether it's necessary to fetch | |
3522 | * data into the chunk before writing or not (when the whole chunk is | |
3523 | * overwritten!). | |
3524 | * | |
3525 | * \param avc The vcache to fetch a dcache for | |
3526 | * \param filePos The start of the section to be written | |
3527 | * \param len The length of the section to be written | |
3528 | * \param areq | |
3529 | * \param noLock | |
3530 | * | |
3531 | * \return If successful, a reference counted dcache with tdc->lock held. Lock | |
3532 | * must be released and afs_PutDCache() called to free dcache. | |
3533 | * NULL on failure | |
3534 | * | |
3535 | * \note avc->lock must be held on entry. Function may release and reobtain | |
3536 | * avc->lock and GLOCK. | |
3537 | */ | |
3538 | ||
3539 | struct dcache * | |
3540 | afs_ObtainDCacheForWriting(struct vcache *avc, afs_size_t filePos, | |
3541 | afs_size_t len, struct vrequest *areq, | |
3542 | int noLock) | |
3543 | { | |
3544 | struct dcache *tdc = NULL; | |
3545 | afs_size_t offset; | |
3546 | ||
3547 | /* read the cached info */ | |
3548 | if (noLock) { | |
3549 | tdc = afs_FindDCache(avc, filePos); | |
3550 | if (tdc) | |
3551 | ObtainWriteLock(&tdc->lock, 657); | |
3552 | } else if (afs_blocksUsed > | |
3553 | PERCENT(CM_WAITFORDRAINPCT, afs_cacheBlocks)) { | |
3554 | tdc = afs_FindDCache(avc, filePos); | |
3555 | if (tdc) { | |
3556 | ObtainWriteLock(&tdc->lock, 658); | |
3557 | if (!hsame(tdc->f.versionNo, avc->f.m.DataVersion) | |
3558 | || (tdc->dflags & DFFetching)) { | |
3559 | ReleaseWriteLock(&tdc->lock); | |
3560 | afs_PutDCache(tdc); | |
3561 | tdc = NULL; | |
3562 | } | |
3563 | } | |
3564 | if (!tdc) { | |
3565 | afs_MaybeWakeupTruncateDaemon(); | |
3566 | while (afs_blocksUsed > | |
3567 | PERCENT(CM_WAITFORDRAINPCT, afs_cacheBlocks)) { | |
3568 | ReleaseWriteLock(&avc->lock); | |
3569 | if (afs_blocksUsed - afs_blocksDiscarded > | |
3570 | PERCENT(CM_WAITFORDRAINPCT, afs_cacheBlocks)) { | |
3571 | afs_WaitForCacheDrain = 1; | |
3572 | afs_osi_Sleep(&afs_WaitForCacheDrain); | |
3573 | } | |
3574 | afs_MaybeFreeDiscardedDCache(); | |
3575 | afs_MaybeWakeupTruncateDaemon(); | |
3576 | ObtainWriteLock(&avc->lock, 509); | |
3577 | } | |
3578 | avc->f.states |= CDirty; | |
3579 | tdc = afs_GetDCache(avc, filePos, areq, &offset, &len, 4); | |
3580 | if (tdc) | |
3581 | ObtainWriteLock(&tdc->lock, 659); | |
3582 | } | |
3583 | } else { | |
3584 | tdc = afs_GetDCache(avc, filePos, areq, &offset, &len, 4); | |
3585 | if (tdc) | |
3586 | ObtainWriteLock(&tdc->lock, 660); | |
3587 | } | |
3588 | if (tdc) { | |
3589 | if (!(afs_indexFlags[tdc->index] & IFDataMod)) { | |
3590 | afs_stats_cmperf.cacheCurrDirtyChunks++; | |
3591 | afs_indexFlags[tdc->index] |= IFDataMod; /* so it doesn't disappear */ | |
3592 | } | |
3593 | if (!(tdc->f.states & DWriting)) { | |
3594 | /* don't mark entry as mod if we don't have to */ | |
3595 | tdc->f.states |= DWriting; | |
3596 | tdc->dflags |= DFEntryMod; | |
3597 | } | |
3598 | } | |
3599 | return tdc; | |
3600 | } | |
3601 | ||
3602 | /*! | |
3603 | * Make a shadow copy of a dir's dcache. It's used for disconnected | |
3604 | * operations like remove/create/rename to keep the original directory data. | |
3605 | * On reconnection, we can diff the original data with the server and get the | |
3606 | * server changes and with the local data to get the local changes. | |
3607 | * | |
3608 | * \param avc The dir vnode. | |
3609 | * \param adc The dir dcache. | |
3610 | * | |
3611 | * \return 0 for success. | |
3612 | * | |
3613 | * \note The vcache entry must be write locked. | |
3614 | * \note The dcache entry must be read locked. | |
3615 | */ | |
3616 | int | |
3617 | afs_MakeShadowDir(struct vcache *avc, struct dcache *adc) | |
3618 | { | |
3619 | int i, code, ret_code = 0, written, trans_size; | |
3620 | struct dcache *new_dc = NULL; | |
3621 | struct osi_file *tfile_src, *tfile_dst; | |
3622 | struct VenusFid shadow_fid; | |
3623 | char *data; | |
3624 | ||
3625 | /* Is this a dir? */ | |
3626 | if (vType(avc) != VDIR) | |
3627 | return ENOTDIR; | |
3628 | ||
3629 | if (avc->f.shadow.vnode || avc->f.shadow.unique) | |
3630 | return EEXIST; | |
3631 | ||
3632 | /* Generate a fid for the shadow dir. */ | |
3633 | shadow_fid.Cell = avc->f.fid.Cell; | |
3634 | shadow_fid.Fid.Volume = avc->f.fid.Fid.Volume; | |
3635 | afs_GenShadowFid(&shadow_fid); | |
3636 | ||
3637 | ObtainWriteLock(&afs_xdcache, 716); | |
3638 | ||
3639 | /* Get a fresh dcache. */ | |
3640 | new_dc = afs_AllocDCache(avc, 0, 0, &shadow_fid); | |
3641 | osi_Assert(new_dc); | |
3642 | ||
3643 | ObtainReadLock(&adc->mflock); | |
3644 | ||
3645 | /* Set up the new fid. */ | |
3646 | /* Copy interesting data from original dir dcache. */ | |
3647 | new_dc->mflags = adc->mflags; | |
3648 | new_dc->dflags = adc->dflags; | |
3649 | new_dc->f.modTime = adc->f.modTime; | |
3650 | new_dc->f.versionNo = adc->f.versionNo; | |
3651 | new_dc->f.states = adc->f.states; | |
3652 | new_dc->f.chunk= adc->f.chunk; | |
3653 | new_dc->f.chunkBytes = adc->f.chunkBytes; | |
3654 | ||
3655 | ReleaseReadLock(&adc->mflock); | |
3656 | ||
3657 | /* Now add to the two hash chains */ | |
3658 | i = DCHash(&shadow_fid, 0); | |
3659 | afs_dcnextTbl[new_dc->index] = afs_dchashTbl[i]; | |
3660 | afs_dchashTbl[i] = new_dc->index; | |
3661 | ||
3662 | i = DVHash(&shadow_fid); | |
3663 | afs_dvnextTbl[new_dc->index] = afs_dvhashTbl[i]; | |
3664 | afs_dvhashTbl[i] = new_dc->index; | |
3665 | ||
3666 | ReleaseWriteLock(&afs_xdcache); | |
3667 | ||
3668 | /* Alloc a 4k block. */ | |
3669 | data = afs_osi_Alloc(4096); | |
3670 | if (!data) { | |
3671 | afs_warn("afs_MakeShadowDir: could not alloc data\n"); | |
3672 | ret_code = ENOMEM; | |
3673 | goto done; | |
3674 | } | |
3675 | ||
3676 | /* Open the files. */ | |
3677 | tfile_src = afs_CFileOpen(&adc->f.inode); | |
3678 | tfile_dst = afs_CFileOpen(&new_dc->f.inode); | |
3679 | osi_Assert(tfile_src); | |
3680 | osi_Assert(tfile_dst); | |
3681 | ||
3682 | /* And now copy dir dcache data into this dcache, | |
3683 | * 4k at a time. | |
3684 | */ | |
3685 | written = 0; | |
3686 | while (written < adc->f.chunkBytes) { | |
3687 | trans_size = adc->f.chunkBytes - written; | |
3688 | if (trans_size > 4096) | |
3689 | trans_size = 4096; | |
3690 | ||
3691 | /* Read a chunk from the dcache. */ | |
3692 | code = afs_CFileRead(tfile_src, written, data, trans_size); | |
3693 | if (code < trans_size) { | |
3694 | ret_code = EIO; | |
3695 | break; | |
3696 | } | |
3697 | ||
3698 | /* Write it to the new dcache. */ | |
3699 | code = afs_CFileWrite(tfile_dst, written, data, trans_size); | |
3700 | if (code < trans_size) { | |
3701 | ret_code = EIO; | |
3702 | break; | |
3703 | } | |
3704 | ||
3705 | written+=trans_size; | |
3706 | } | |
3707 | ||
3708 | afs_CFileClose(tfile_dst); | |
3709 | afs_CFileClose(tfile_src); | |
3710 | ||
3711 | afs_osi_Free(data, 4096); | |
3712 | ||
3713 | ReleaseWriteLock(&new_dc->lock); | |
3714 | afs_PutDCache(new_dc); | |
3715 | ||
3716 | if (!ret_code) { | |
3717 | ObtainWriteLock(&afs_xvcache, 763); | |
3718 | ObtainWriteLock(&afs_disconDirtyLock, 765); | |
3719 | QAdd(&afs_disconShadow, &avc->shadowq); | |
3720 | osi_Assert((afs_RefVCache(avc) == 0)); | |
3721 | ReleaseWriteLock(&afs_disconDirtyLock); | |
3722 | ReleaseWriteLock(&afs_xvcache); | |
3723 | ||
3724 | avc->f.shadow.vnode = shadow_fid.Fid.Vnode; | |
3725 | avc->f.shadow.unique = shadow_fid.Fid.Unique; | |
3726 | } | |
3727 | ||
3728 | done: | |
3729 | return ret_code; | |
3730 | } | |
3731 | ||
3732 | /*! | |
3733 | * Delete the dcaches of a shadow dir. | |
3734 | * | |
3735 | * \param avc The vcache containing the shadow fid. | |
3736 | * | |
3737 | * \note avc must be write locked. | |
3738 | */ | |
3739 | void | |
3740 | afs_DeleteShadowDir(struct vcache *avc) | |
3741 | { | |
3742 | struct dcache *tdc; | |
3743 | struct VenusFid shadow_fid; | |
3744 | ||
3745 | shadow_fid.Cell = avc->f.fid.Cell; | |
3746 | shadow_fid.Fid.Volume = avc->f.fid.Fid.Volume; | |
3747 | shadow_fid.Fid.Vnode = avc->f.shadow.vnode; | |
3748 | shadow_fid.Fid.Unique = avc->f.shadow.unique; | |
3749 | ||
3750 | tdc = afs_FindDCacheByFid(&shadow_fid); | |
3751 | if (tdc) { | |
3752 | afs_HashOutDCache(tdc, 1); | |
3753 | afs_DiscardDCache(tdc); | |
3754 | afs_PutDCache(tdc); | |
3755 | } | |
3756 | avc->f.shadow.vnode = avc->f.shadow.unique = 0; | |
3757 | ObtainWriteLock(&afs_disconDirtyLock, 708); | |
3758 | QRemove(&avc->shadowq); | |
3759 | ReleaseWriteLock(&afs_disconDirtyLock); | |
3760 | afs_PutVCache(avc); /* Because we held it when we added to the queue */ | |
3761 | } | |
3762 | ||
3763 | /*! | |
3764 | * Populate a dcache with empty chunks up to a given file size, | |
3765 | * used before extending a file in order to avoid 'holes' which | |
3766 | * we can't access in disconnected mode. | |
3767 | * | |
3768 | * \param avc The vcache which is being extended (locked) | |
3769 | * \param alen The new length of the file | |
3770 | * | |
3771 | */ | |
3772 | void | |
3773 | afs_PopulateDCache(struct vcache *avc, afs_size_t apos, struct vrequest *areq) | |
3774 | { | |
3775 | struct dcache *tdc; | |
3776 | afs_size_t len, offset; | |
3777 | afs_int32 start, end; | |
3778 | ||
3779 | /* We're doing this to deal with the situation where we extend | |
3780 | * by writing after lseek()ing past the end of the file . If that | |
3781 | * extension skips chunks, then those chunks won't be created, and | |
3782 | * GetDCache will assume that they have to be fetched from the server. | |
3783 | * So, for each chunk between the current file position, and the new | |
3784 | * length we GetDCache for that chunk. | |
3785 | */ | |
3786 | ||
3787 | if (AFS_CHUNK(apos) == 0 || apos <= avc->f.m.Length) | |
3788 | return; | |
3789 | ||
3790 | if (avc->f.m.Length == 0) | |
3791 | start = 0; | |
3792 | else | |
3793 | start = AFS_CHUNK(avc->f.m.Length)+1; | |
3794 | ||
3795 | end = AFS_CHUNK(apos); | |
3796 | ||
3797 | while (start<end) { | |
3798 | len = AFS_CHUNKTOSIZE(start); | |
3799 | tdc = afs_GetDCache(avc, AFS_CHUNKTOBASE(start), areq, &offset, &len, 4); | |
3800 | if (tdc) | |
3801 | afs_PutDCache(tdc); | |
3802 | start++; | |
3803 | } | |
3804 | } |