Import Upstream version 1.8.5
[hcoop/debian/openafs.git] / src / afs / afs_bypasscache.c
1 /*
2 * COPYRIGHT © 2000
3 * THE REGENTS OF THE UNIVERSITY OF MICHIGAN
4 * ALL RIGHTS RESERVED
5 *
6 * Permission is granted to use, copy, create derivative works
7 * and redistribute this software and such derivative works
8 * for any purpose, so long as the name of The University of
9 * Michigan is not used in any advertising or publicity
10 * pertaining to the use of distribution of this software
11 * without specific, written prior authorization. If the
12 * above copyright notice or any other identification of the
13 * University of Michigan is included in any copy of any
14 * portion of this software, then the disclaimer below must
15 * also be included.
16 *
17 * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION
18 * FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY
19 * PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY O
20 * MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
21 * WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
22 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
23 * REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE
24 * FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR
25 * CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING
26 * OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN
27 * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGES.
29 */
30
31 /*
32 * Portions Copyright (c) 2008
33 * The Linux Box Corporation
34 * ALL RIGHTS RESERVED
35 *
36 * Permission is granted to use, copy, create derivative works
37 * and redistribute this software and such derivative works
38 * for any purpose, so long as the name of the Linux Box
39 * Corporation is not used in any advertising or publicity
40 * pertaining to the use or distribution of this software
41 * without specific, written prior authorization. If the
42 * above copyright notice or any other identification of the
43 * Linux Box Corporation is included in any copy of any
44 * portion of this software, then the disclaimer below must
45 * also be included.
46 *
47 * This software is provided as is, without representation
48 * from the Linux Box Corporation as to its fitness for any
49 * purpose, and without warranty by the Linux Box Corporation
50 * of any kind, either express or implied, including
51 * without limitation the implied warranties of
52 * merchantability and fitness for a particular purpose. The
53 * Linux Box Corporation shall not be liable for any damages,
54 * including special, indirect, incidental, or consequential
55 * damages, with respect to any claim arising out of or in
56 * connection with the use of the software, even if it has been
57 * or is hereafter advised of the possibility of such damages.
58 */
59
60
61 #include <afsconfig.h>
62 #include "afs/param.h"
63 #if defined(AFS_CACHE_BYPASS) || defined(UKERNEL)
64 #include "afs/afs_bypasscache.h"
65
66 /*
67 * afs_bypasscache.c
68 *
69 */
70 #include "afs/sysincludes.h" /* Standard vendor system headers */
71 #include "afs/afsincludes.h" /* Afs-based standard headers */
72 #include "afs/afs_stats.h" /* statistics */
73 #include "afs/nfsclient.h"
74 #include "rx/rx_globals.h"
75
76 #ifndef afs_min
77 #define afs_min(A,B) ((A)<(B)) ? (A) : (B)
78 #endif
79
80 /* conditional GLOCK macros */
81 #define COND_GLOCK(var) \
82 do { \
83 var = ISAFS_GLOCK(); \
84 if(!var) \
85 RX_AFS_GLOCK(); \
86 } while(0)
87
88 #define COND_RE_GUNLOCK(var) \
89 do { \
90 if(var) \
91 RX_AFS_GUNLOCK(); \
92 } while(0)
93
94
95 /* conditional GUNLOCK macros */
96
97 #define COND_GUNLOCK(var) \
98 do { \
99 var = ISAFS_GLOCK(); \
100 if(var) \
101 RX_AFS_GUNLOCK(); \
102 } while(0)
103
104 #define COND_RE_GLOCK(var) \
105 do { \
106 if(var) \
107 RX_AFS_GLOCK(); \
108 } while(0)
109
110
111 int cache_bypass_strategy = NEVER_BYPASS_CACHE;
112 afs_size_t cache_bypass_threshold = AFS_CACHE_BYPASS_DISABLED; /* file size > threshold triggers bypass */
113 int cache_bypass_prefetch = 1; /* Should we do prefetching ? */
114
115 extern afs_rwlock_t afs_xcbhash;
116
117 /*
118 * This is almost exactly like the PFlush() routine in afs_pioctl.c,
119 * but that routine is static. We are about to change a file from
120 * normal caching to bypass it's caching. Therefore, we want to
121 * free up any cache space in use by the file, and throw out any
122 * existing VM pages for the file. We keep track of the number of
123 * times we go back and forth from caching to bypass.
124 */
125 void
126 afs_TransitionToBypass(struct vcache *avc,
127 afs_ucred_t *acred, int aflags)
128 {
129
130 afs_int32 code;
131 int setDesire = 0;
132 int setManual = 0;
133
134 if (!avc)
135 return;
136
137 if (aflags & TRANSChangeDesiredBit)
138 setDesire = 1;
139 if (aflags & TRANSSetManualBit)
140 setManual = 1;
141
142 AFS_GLOCK();
143
144 ObtainWriteLock(&avc->lock, 925);
145 /*
146 * Someone may have beat us to doing the transition - we had no lock
147 * when we checked the flag earlier. No cause to panic, just return.
148 */
149 if (avc->cachingStates & FCSBypass)
150 goto done;
151
152 /* If we never cached this, just change state */
153 if (setDesire && (!(avc->cachingStates & FCSBypass))) {
154 avc->cachingStates |= FCSBypass;
155 goto done;
156 }
157
158 /* cg2v, try to store any chunks not written 20071204 */
159 if (avc->execsOrWriters > 0) {
160 struct vrequest *treq = NULL;
161
162 code = afs_CreateReq(&treq, acred);
163 if (!code) {
164 code = afs_StoreAllSegments(avc, treq, AFS_SYNC | AFS_LASTSTORE);
165 afs_DestroyReq(treq);
166 }
167 }
168
169 /* also cg2v, don't dequeue the callback */
170 /* next reference will re-stat */
171 afs_StaleVCacheFlags(avc, AFS_STALEVC_NOCB, CDirty);
172 /* now find the disk cache entries */
173 afs_TryToSmush(avc, acred, 1);
174 if (avc->linkData && !(avc->f.states & CCore)) {
175 afs_osi_Free(avc->linkData, strlen(avc->linkData) + 1);
176 avc->linkData = NULL;
177 }
178
179 avc->cachingStates |= FCSBypass; /* Set the bypass flag */
180 if(setDesire)
181 avc->cachingStates |= FCSDesireBypass;
182 if(setManual)
183 avc->cachingStates |= FCSManuallySet;
184 avc->cachingTransitions++;
185
186 done:
187 ReleaseWriteLock(&avc->lock);
188 AFS_GUNLOCK();
189 }
190
191 /*
192 * This is almost exactly like the PFlush() routine in afs_pioctl.c,
193 * but that routine is static. We are about to change a file from
194 * bypassing caching to normal caching. Therefore, we want to
195 * throw out any existing VM pages for the file. We keep track of
196 * the number of times we go back and forth from caching to bypass.
197 */
198 void
199 afs_TransitionToCaching(struct vcache *avc,
200 afs_ucred_t *acred,
201 int aflags)
202 {
203 int resetDesire = 0;
204 int setManual = 0;
205
206 if (!avc)
207 return;
208
209 if (aflags & TRANSChangeDesiredBit)
210 resetDesire = 1;
211 if (aflags & TRANSSetManualBit)
212 setManual = 1;
213
214 AFS_GLOCK();
215 ObtainWriteLock(&avc->lock, 926);
216 /*
217 * Someone may have beat us to doing the transition - we had no lock
218 * when we checked the flag earlier. No cause to panic, just return.
219 */
220 if (!(avc->cachingStates & FCSBypass))
221 goto done;
222
223 /* Ok, we actually do need to flush */
224 /* next reference will re-stat cache entry */
225 afs_StaleVCacheFlags(avc, 0, CDirty);
226
227 /* now find the disk cache entries */
228 afs_TryToSmush(avc, acred, 1);
229 if (avc->linkData && !(avc->f.states & CCore)) {
230 afs_osi_Free(avc->linkData, strlen(avc->linkData) + 1);
231 avc->linkData = NULL;
232 }
233
234 avc->cachingStates &= ~(FCSBypass); /* Reset the bypass flag */
235 if (resetDesire)
236 avc->cachingStates &= ~(FCSDesireBypass);
237 if (setManual)
238 avc->cachingStates |= FCSManuallySet;
239 avc->cachingTransitions++;
240
241 done:
242 ReleaseWriteLock(&avc->lock);
243 AFS_GUNLOCK();
244 }
245
246 /* In the case where there's an error in afs_NoCacheFetchProc or
247 * afs_PrefetchNoCache, all of the pages they've been passed need
248 * to be unlocked.
249 */
250 #ifdef UKERNEL
251 typedef void * bypass_page_t;
252
253 #define unlock_and_release_pages(auio)
254 #define release_full_page(pp, pageoff)
255
256 #else
257 typedef struct page * bypass_page_t;
258
259 #define unlock_and_release_pages(auio) \
260 do { \
261 struct iovec *ciov; \
262 bypass_page_t pp; \
263 afs_int32 iovmax; \
264 afs_int32 iovno = 0; \
265 ciov = auio->uio_iov; \
266 iovmax = auio->uio_iovcnt - 1; \
267 pp = (bypass_page_t) ciov->iov_base; \
268 while(1) { \
269 if (pp) { \
270 if (PageLocked(pp)) \
271 unlock_page(pp); \
272 put_page(pp); /* decrement refcount */ \
273 } \
274 iovno++; \
275 if(iovno > iovmax) \
276 break; \
277 ciov = (auio->uio_iov + iovno); \
278 pp = (bypass_page_t) ciov->iov_base; \
279 } \
280 } while(0)
281
282 #define release_full_page(pp, pageoff) \
283 do { \
284 /* this is appropriate when no caller intends to unlock \
285 * and release the page */ \
286 SetPageUptodate(pp); \
287 if(PageLocked(pp)) \
288 unlock_page(pp); \
289 else \
290 afs_warn("afs_NoCacheFetchProc: page not locked!\n"); \
291 put_page(pp); /* decrement refcount */ \
292 } while(0)
293 #endif
294
295 static void
296 afs_bypass_copy_page(bypass_page_t pp, int pageoff, struct iovec *rxiov,
297 int iovno, int iovoff, struct uio *auio, int curiov, int partial)
298 {
299 char *address;
300 int dolen;
301
302 if (partial)
303 dolen = auio->uio_iov[curiov].iov_len - pageoff;
304 else
305 dolen = rxiov[iovno].iov_len - iovoff;
306
307 #if !defined(UKERNEL)
308 # if defined(KMAP_ATOMIC_TAKES_NO_KM_TYPE)
309 address = kmap_atomic(pp);
310 # else
311 address = kmap_atomic(pp, KM_USER0);
312 # endif
313 #else
314 address = pp;
315 #endif
316 memcpy(address + pageoff, (char *)(rxiov[iovno].iov_base) + iovoff, dolen);
317 #if !defined(UKERNEL)
318 # if defined(KMAP_ATOMIC_TAKES_NO_KM_TYPE)
319 kunmap_atomic(address);
320 # else
321 kunmap_atomic(address, KM_USER0);
322 # endif
323 #endif
324 }
325
326 /* no-cache prefetch routine */
327 static afs_int32
328 afs_NoCacheFetchProc(struct rx_call *acall,
329 struct vcache *avc,
330 struct uio *auio,
331 afs_int32 release_pages,
332 afs_int32 size)
333 {
334 afs_int32 length;
335 afs_int32 code;
336 int moredata, iovno, iovoff, iovmax, result, locked;
337 struct iovec *ciov;
338 struct iovec *rxiov;
339 int nio = 0;
340 bypass_page_t pp;
341
342 int curpage, bytes;
343 int pageoff;
344
345 rxiov = osi_AllocSmallSpace(sizeof(struct iovec) * RX_MAXIOVECS);
346 ciov = auio->uio_iov;
347 pp = (bypass_page_t) ciov->iov_base;
348 iovmax = auio->uio_iovcnt - 1;
349 iovno = iovoff = result = 0;
350
351 do {
352 COND_GUNLOCK(locked);
353 code = rx_Read(acall, (char *)&length, sizeof(afs_int32));
354 COND_RE_GLOCK(locked);
355 if (code != sizeof(afs_int32)) {
356 result = EIO;
357 afs_warn("Preread error. code: %d instead of %d\n",
358 code, (int)sizeof(afs_int32));
359 unlock_and_release_pages(auio);
360 goto done;
361 } else
362 length = ntohl(length);
363
364 if (length > size) {
365 result = EIO;
366 afs_warn("Preread error. Got length %d, which is greater than size %d\n",
367 length, size);
368 unlock_and_release_pages(auio);
369 goto done;
370 }
371
372 /* If we get a 0 length reply, time to cleanup and return */
373 if (length == 0) {
374 unlock_and_release_pages(auio);
375 result = 0;
376 goto done;
377 }
378
379 /*
380 * The fetch protocol is extended for the AFS/DFS translator
381 * to allow multiple blocks of data, each with its own length,
382 * to be returned. As long as the top bit is set, there are more
383 * blocks expected.
384 *
385 * We do not do this for AFS file servers because they sometimes
386 * return large negative numbers as the transfer size.
387 */
388 if (avc->f.states & CForeign) {
389 moredata = length & 0x80000000;
390 length &= ~0x80000000;
391 } else {
392 moredata = 0;
393 }
394
395 for (curpage = 0; curpage <= iovmax; curpage++) {
396 pageoff = 0;
397 /* properly, this should track uio_resid, not a fixed page size! */
398 while (pageoff < auio->uio_iov[curpage].iov_len) {
399 /* If no more iovs, issue new read. */
400 if (iovno >= nio) {
401 COND_GUNLOCK(locked);
402 bytes = rx_Readv(acall, rxiov, &nio, RX_MAXIOVECS, length);
403 COND_RE_GLOCK(locked);
404 if (bytes < 0) {
405 afs_warn("afs_NoCacheFetchProc: rx_Read error. Return code was %d\n", bytes);
406 result = bytes;
407 unlock_and_release_pages(auio);
408 goto done;
409 } else if (bytes == 0) {
410 /* we failed to read the full length */
411 result = EIO;
412 afs_warn("afs_NoCacheFetchProc: rx_Read returned zero. Aborting.\n");
413 unlock_and_release_pages(auio);
414 goto done;
415 }
416 size -= bytes;
417 auio->uio_resid -= bytes;
418 iovno = 0;
419 }
420 pp = (bypass_page_t)auio->uio_iov[curpage].iov_base;
421 if (pageoff + (rxiov[iovno].iov_len - iovoff) <= auio->uio_iov[curpage].iov_len) {
422 /* Copy entire (or rest of) current iovec into current page */
423 if (pp)
424 afs_bypass_copy_page(pp, pageoff, rxiov, iovno, iovoff, auio, curpage, 0);
425 length -= (rxiov[iovno].iov_len - iovoff);
426 pageoff += rxiov[iovno].iov_len - iovoff;
427 iovno++;
428 iovoff = 0;
429 } else {
430 /* Copy only what's needed to fill current page */
431 if (pp)
432 afs_bypass_copy_page(pp, pageoff, rxiov, iovno, iovoff, auio, curpage, 1);
433 length -= (auio->uio_iov[curpage].iov_len - pageoff);
434 iovoff += auio->uio_iov[curpage].iov_len - pageoff;
435 pageoff = auio->uio_iov[curpage].iov_len;
436 }
437
438 /* we filled a page, or this is the last page. conditionally release it */
439 if (pp && ((pageoff == auio->uio_iov[curpage].iov_len &&
440 release_pages) || (length == 0 && iovno >= nio)))
441 release_full_page(pp, pageoff);
442
443 if (length == 0 && iovno >= nio)
444 goto done;
445 }
446 }
447 } while (moredata);
448
449 done:
450 osi_FreeSmallSpace(rxiov);
451 return result;
452 }
453
454
455 /* dispatch a no-cache read request */
456 afs_int32
457 afs_ReadNoCache(struct vcache *avc,
458 struct nocache_read_request *bparms,
459 afs_ucred_t *acred)
460 {
461 afs_int32 code;
462 afs_int32 bcnt;
463 struct brequest *breq;
464 struct vrequest *areq = NULL;
465
466 if (avc->vc_error) {
467 code = EIO;
468 afs_warn("afs_ReadNoCache VCache Error!\n");
469 goto cleanup;
470 }
471
472 AFS_GLOCK();
473 /* the receiver will free areq */
474 code = afs_CreateReq(&areq, acred);
475 if (code) {
476 afs_warn("afs_ReadNoCache afs_CreateReq error!\n");
477 } else {
478 code = afs_VerifyVCache(avc, areq);
479 if (code) {
480 afs_warn("afs_ReadNoCache Failed to verify VCache!\n");
481 }
482 }
483 AFS_GUNLOCK();
484
485 if (code) {
486 code = afs_CheckCode(code, areq, 11); /* failed to get it */
487 goto cleanup;
488 }
489
490 bparms->areq = areq;
491
492 /* and queue this one */
493 bcnt = 1;
494 AFS_GLOCK();
495 while(bcnt < 20) {
496 breq = afs_BQueue(BOP_FETCH_NOCACHE, avc, B_DONTWAIT, 0, acred, 1, 1,
497 bparms, (void *)0, (void *)0);
498 if(breq != 0) {
499 code = 0;
500 break;
501 }
502 afs_osi_Wait(10 * bcnt, 0, 0);
503 }
504 AFS_GUNLOCK();
505
506 if(!breq) {
507 code = EBUSY;
508 goto cleanup;
509 }
510
511 return code;
512
513 cleanup:
514 /* If there's a problem before we queue the request, we need to
515 * do everything that would normally happen when the request was
516 * processed, like unlocking the pages and freeing memory.
517 */
518 unlock_and_release_pages(bparms->auio);
519 AFS_GLOCK();
520 afs_DestroyReq(areq);
521 AFS_GUNLOCK();
522 osi_Free(bparms->auio->uio_iov,
523 bparms->auio->uio_iovcnt * sizeof(struct iovec));
524 osi_Free(bparms->auio, sizeof(struct uio));
525 osi_Free(bparms, sizeof(struct nocache_read_request));
526 return code;
527 }
528
529
530 /* Cannot have static linkage--called from BPrefetch (afs_daemons) */
531 afs_int32
532 afs_PrefetchNoCache(struct vcache *avc,
533 afs_ucred_t *acred,
534 struct nocache_read_request *bparms)
535 {
536 struct uio *auio;
537 #ifndef UKERNEL
538 struct iovec *iovecp;
539 #endif
540 struct vrequest *areq;
541 afs_int32 code = 0;
542 struct rx_connection *rxconn;
543 #ifdef AFS_64BIT_CLIENT
544 afs_int32 length_hi, bytes, locked;
545 #endif
546
547 struct afs_conn *tc;
548 struct rx_call *tcall;
549 struct tlocal1 {
550 struct AFSVolSync tsync;
551 struct AFSFetchStatus OutStatus;
552 struct AFSCallBack CallBack;
553 };
554 struct tlocal1 *tcallspec;
555
556 auio = bparms->auio;
557 areq = bparms->areq;
558 #ifndef UKERNEL
559 iovecp = auio->uio_iov;
560 #endif
561
562 tcallspec = osi_Alloc(sizeof(struct tlocal1));
563 do {
564 tc = afs_Conn(&avc->f.fid, areq, SHARED_LOCK /* ignored */, &rxconn);
565 if (tc) {
566 avc->callback = tc->parent->srvr->server;
567 tcall = rx_NewCall(rxconn);
568 #ifdef AFS_64BIT_CLIENT
569 if (!afs_serverHasNo64Bit(tc)) {
570 code = StartRXAFS_FetchData64(tcall,
571 (struct AFSFid *) &avc->f.fid.Fid,
572 auio->uio_offset,
573 bparms->length);
574 if (code == 0) {
575 COND_GUNLOCK(locked);
576 bytes = rx_Read(tcall, (char *)&length_hi,
577 sizeof(afs_int32));
578 COND_RE_GLOCK(locked);
579
580 if (bytes != sizeof(afs_int32)) {
581 length_hi = 0;
582 COND_GUNLOCK(locked);
583 code = rx_EndCall(tcall, RX_PROTOCOL_ERROR);
584 COND_RE_GLOCK(locked);
585 tcall = NULL;
586 }
587 }
588 } /* afs_serverHasNo64Bit */
589 if (code == RXGEN_OPCODE || afs_serverHasNo64Bit(tc)) {
590 if (auio->uio_offset > 0x7FFFFFFF) {
591 code = EFBIG;
592 } else {
593 afs_int32 pos;
594 pos = auio->uio_offset;
595 COND_GUNLOCK(locked);
596 if (!tcall)
597 tcall = rx_NewCall(rxconn);
598 code = StartRXAFS_FetchData(tcall,
599 (struct AFSFid *) &avc->f.fid.Fid,
600 pos, bparms->length);
601 COND_RE_GLOCK(locked);
602 }
603 afs_serverSetNo64Bit(tc);
604 }
605 #else
606 code = StartRXAFS_FetchData(tcall,
607 (struct AFSFid *) &avc->f.fid.Fid,
608 auio->uio_offset, bparms->length);
609 #endif
610 if (code == 0) {
611 code = afs_NoCacheFetchProc(tcall, avc, auio,
612 1 /* release_pages */,
613 bparms->length);
614 } else {
615 afs_warn("BYPASS: StartRXAFS_FetchData failed: %d\n", code);
616 unlock_and_release_pages(auio);
617 (void)afs_Analyze(tc, rxconn, code, &avc->f.fid, areq,
618 AFS_STATS_FS_RPCIDX_FETCHDATA,
619 SHARED_LOCK, NULL);
620 goto done;
621 }
622 if (code == 0) {
623 code = EndRXAFS_FetchData(tcall, &tcallspec->OutStatus,
624 &tcallspec->CallBack,
625 &tcallspec->tsync);
626 } else {
627 afs_warn("BYPASS: NoCacheFetchProc failed: %d\n", code);
628 }
629 code = rx_EndCall(tcall, code);
630 } else {
631 afs_warn("BYPASS: No connection.\n");
632 code = -1;
633 unlock_and_release_pages(auio);
634 (void)afs_Analyze(tc, rxconn, code, &avc->f.fid, areq,
635 AFS_STATS_FS_RPCIDX_FETCHDATA,
636 SHARED_LOCK, NULL);
637 goto done;
638 }
639 } while (afs_Analyze(tc, rxconn, code, &avc->f.fid, areq,
640 AFS_STATS_FS_RPCIDX_FETCHDATA,
641 SHARED_LOCK,0));
642 done:
643 /*
644 * Copy appropriate fields into vcache
645 */
646
647 if (!code)
648 afs_ProcessFS(avc, &tcallspec->OutStatus, areq);
649
650 afs_DestroyReq(areq);
651 osi_Free(tcallspec, sizeof(struct tlocal1));
652 osi_Free(bparms, sizeof(struct nocache_read_request));
653 #ifndef UKERNEL
654 /* in UKERNEL, the "pages" are passed in */
655 osi_Free(iovecp, auio->uio_iovcnt * sizeof(struct iovec));
656 osi_Free(auio, sizeof(struct uio));
657 #endif
658 return code;
659 }
660 #endif