Import Upstream version 1.8.5
[hcoop/debian/openafs.git] / src / dir / buffer.c
1 /*
2 * Copyright 2000, International Business Machines Corporation and others.
3 * All Rights Reserved.
4 *
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
8 */
9
10 #include <afsconfig.h>
11 #include <afs/param.h>
12
13 #include <roken.h>
14 #include <afs/opr.h>
15
16 #include <lock.h>
17
18 #include "dir.h"
19
20 #ifdef AFS_64BIT_IOPS_ENV
21 #define BUFFER_FID_SIZE (9*sizeof(int) + 2*sizeof(char*))
22 #else
23 #define BUFFER_FID_SIZE (6*sizeof(int) + 2*sizeof(char*))
24 #endif
25
26 struct buffer {
27 /* fid is used for Unique cache key + i/o addressing.
28 * fid size is based on 4 + size of inode and size of pointer
29 */
30 char fid[BUFFER_FID_SIZE];
31 afs_int32 page;
32 afs_int32 accesstime;
33 struct buffer *hashNext;
34 void *data;
35 char lockers;
36 char dirty;
37 char hashIndex;
38 struct Lock lock;
39 };
40
41 static_inline dir_file_t
42 bufferDir(struct buffer *b)
43 {
44 return (dir_file_t) &b->fid;
45 }
46
47 struct Lock afs_bufferLock;
48
49 /* page size */
50 #define BUFFER_PAGE_SIZE 2048
51 /* log page size */
52 #define LOGPS 11
53 /* page hash table size */
54 #define PHSIZE 32
55 /* The hash table should be somewhat efficient even if there are only
56 * a few partitions (less than 32). So the hash for the fileserver is now
57 * based on the volume id. This means this macro is dependent upon the
58 * layout of DirHandle in viced/viced.h, vol/salvage.h and volser/salvage.h.
59 */
60 #define pHash(fid) (((afs_int32 *)fid)[0] & (PHSIZE-1))
61 #define vHash(vid) (vid & (PHSIZE-1))
62
63 /* admittedly system dependent, this is the maximum signed 32-bit value */
64 #define BUFFER_LONG_MAX 2147483647
65 #ifndef NULL
66 #define NULL 0
67 #endif
68
69 static struct buffer **Buffers;
70
71 char *BufferData;
72
73 static struct buffer *phTable[PHSIZE]; /* page hash table */
74 static struct buffer *LastBuffer;
75 int nbuffers;
76 int timecounter;
77 static int calls = 0, ios = 0;
78
79 struct buffer *newslot(dir_file_t dir, afs_int32 apage,
80 struct buffer *lp);
81
82 /* XXX - This sucks. The correct prototypes for these functions are ...
83 *
84 * extern void FidZero(DirHandle *);
85 * extern int FidEq(DirHandle *a, DirHandle *b);
86 * extern int ReallyRead(DirHandle *a, int block, char *data);
87 */
88
89 extern void FidZero(dir_file_t);
90 extern int FidEq(dir_file_t, dir_file_t);
91 extern int ReallyRead(dir_file_t, int block, char *data);
92 extern int ReallyWrite(dir_file_t, int block, char *data);
93 extern void FidZap(dir_file_t);
94 extern int FidVolEq(dir_file_t, afs_int32 vid);
95 extern void FidCpy(dir_file_t, dir_file_t fromfile);
96
97 int
98 DStat(int *abuffers, int *acalls, int *aios)
99 {
100 *abuffers = nbuffers;
101 *acalls = calls;
102 *aios = ios;
103 return 0;
104 }
105
106 /**
107 * initialize the directory package.
108 *
109 * @param[in] abuffers size of directory buffer cache
110 *
111 * @return operation status
112 * @retval 0 success
113 */
114 void
115 DInit(int abuffers)
116 {
117 /* Initialize the venus buffer system. */
118 int i, tsize;
119 struct buffer *tb;
120 char *tp;
121
122 Lock_Init(&afs_bufferLock);
123 /* Align each element of Buffers on a doubleword boundary */
124 tsize = (sizeof(struct buffer) + 7) & ~7;
125 tp = malloc(abuffers * tsize);
126 Buffers = malloc(abuffers * sizeof(struct buffer *));
127 BufferData = malloc(abuffers * BUFFER_PAGE_SIZE);
128 timecounter = 0;
129 LastBuffer = (struct buffer *)tp;
130 nbuffers = abuffers;
131 for (i = 0; i < PHSIZE; i++)
132 phTable[i] = 0;
133 for (i = 0; i < abuffers; i++) {
134 /* Fill in each buffer with an empty indication. */
135 tb = (struct buffer *)tp;
136 Buffers[i] = tb;
137 tp += tsize;
138 FidZero(bufferDir(tb));
139 tb->accesstime = tb->lockers = 0;
140 tb->data = &BufferData[BUFFER_PAGE_SIZE * i];
141 tb->hashIndex = 0;
142 tb->dirty = 0;
143 Lock_Init(&tb->lock);
144 }
145 return;
146 }
147
148 /**
149 * read a page out of a directory object.
150 *
151 * @param[in] fid directory object fid
152 * @param[in] page page in hash table to be read
153 *
154 * @return pointer to requested page in directory cache
155 * @retval NULL read failed
156 */
157 int
158 DRead(dir_file_t fid, int page, struct DirBuffer *entry)
159 {
160 /* Read a page from the disk. */
161 struct buffer *tb, *tb2, **bufhead;
162
163 memset(entry, 0, sizeof(struct DirBuffer));
164
165 ObtainWriteLock(&afs_bufferLock);
166 calls++;
167
168 #define bufmatch(tb,fid) (tb->page == page && FidEq(bufferDir(tb), fid))
169 #define buf_Front(head,parent,p) {(parent)->hashNext = (p)->hashNext; (p)->hashNext= *(head);*(head)=(p);}
170
171 /* this apparently-complicated-looking code is simply an example of
172 * a little bit of loop unrolling, and is a standard linked-list
173 * traversal trick. It saves a few assignments at the the expense
174 * of larger code size. This could be simplified by better use of
175 * macros. With the use of these LRU queues, the old one-cache is
176 * probably obsolete.
177 */
178 if ((tb = phTable[pHash(fid)])) { /* ASSMT HERE */
179 if (bufmatch(tb, fid)) {
180 ObtainWriteLock(&tb->lock);
181 tb->lockers++;
182 ReleaseWriteLock(&afs_bufferLock);
183 tb->accesstime = ++timecounter;
184 ReleaseWriteLock(&tb->lock);
185 entry->buffer = tb;
186 entry->data = tb->data;
187 return 0;
188 } else {
189 bufhead = &(phTable[pHash(fid)]);
190 while ((tb2 = tb->hashNext)) {
191 if (bufmatch(tb2, fid)) {
192 buf_Front(bufhead, tb, tb2);
193 ObtainWriteLock(&tb2->lock);
194 tb2->lockers++;
195 ReleaseWriteLock(&afs_bufferLock);
196 tb2->accesstime = ++timecounter;
197 ReleaseWriteLock(&tb2->lock);
198 entry->buffer = tb2;
199 entry->data = tb2->data;
200 return 0;
201 }
202 if ((tb = tb2->hashNext)) { /* ASSIGNMENT HERE! */
203 if (bufmatch(tb, fid)) {
204 buf_Front(bufhead, tb2, tb);
205 ObtainWriteLock(&tb->lock);
206 tb->lockers++;
207 ReleaseWriteLock(&afs_bufferLock);
208 tb->accesstime = ++timecounter;
209 ReleaseWriteLock(&tb->lock);
210 entry->buffer = tb;
211 entry->data = tb->data;
212 return 0;
213 }
214 } else
215 break;
216 }
217 }
218 } else
219 tb2 = NULL;
220
221 /* can't find it */
222 /* The last thing we looked at was either tb or tb2 (or nothing). That
223 * is at least the oldest buffer on one particular hash chain, so it's
224 * a pretty good place to start looking for the truly oldest buffer.
225 */
226 tb = newslot(fid, page, (tb ? tb : tb2));
227 ios++;
228 ObtainWriteLock(&tb->lock);
229 tb->lockers++;
230 ReleaseWriteLock(&afs_bufferLock);
231 if (ReallyRead(bufferDir(tb), tb->page, tb->data)) {
232 tb->lockers--;
233 FidZap(bufferDir(tb)); /* disaster */
234 ReleaseWriteLock(&tb->lock);
235 return EIO;
236 }
237 /* Note that findslot sets the page field in the buffer equal to
238 * what it is searching for.
239 */
240 ReleaseWriteLock(&tb->lock);
241 entry->buffer = tb;
242 entry->data = tb->data;
243 return 0;
244 }
245
246
247 static int
248 FixupBucket(struct buffer *ap)
249 {
250 struct buffer **lp, *tp;
251 int i;
252
253 /* first try to get it out of its current hash bucket, in which it might not be */
254 i = ap->hashIndex;
255 lp = &phTable[i];
256 for (tp = *lp; tp; tp = tp->hashNext) {
257 if (tp == ap) {
258 *lp = tp->hashNext;
259 break;
260 }
261 lp = &tp->hashNext;
262 }
263 /* now figure the new hash bucket */
264 i = pHash(ap);
265 ap->hashIndex = i; /* remember where we are for deletion */
266 ap->hashNext = phTable[i]; /* add us to the list */
267 phTable[i] = ap; /* at the front, since it's LRU */
268 return 0;
269 }
270
271 struct buffer *
272 newslot(dir_file_t dir, afs_int32 apage, struct buffer *lp)
273 {
274 /* Find a usable buffer slot */
275 afs_int32 i;
276 afs_int32 lt;
277 struct buffer **tbp;
278
279 if (lp && (lp->lockers == 0)) {
280 lt = lp->accesstime;
281 } else {
282 lp = 0;
283 lt = BUFFER_LONG_MAX;
284 }
285
286 tbp = Buffers;
287 for (i = 0; i < nbuffers; i++, tbp++) {
288 if ((*tbp)->lockers == 0) {
289 if ((*tbp)->accesstime < lt) {
290 lp = (*tbp);
291 lt = (*tbp)->accesstime;
292 }
293 }
294 }
295
296 /* There are no unlocked buffers */
297 if (lp == 0) {
298 if (lt < 0)
299 Die("accesstime counter wrapped");
300 else
301 Die("all buffers locked");
302 }
303
304 /* We do not need to lock the buffer here because it has no lockers
305 * and the afs_bufferLock prevents other threads from zapping this
306 * buffer while we are writing it out */
307 if (lp->dirty) {
308 if (ReallyWrite(bufferDir(lp), lp->page, lp->data))
309 Die("writing bogus buffer");
310 lp->dirty = 0;
311 }
312
313 /* Now fill in the header. */
314 FidZap(bufferDir(lp));
315 FidCpy(bufferDir(lp), dir); /* set this */
316 memset(lp->data, 0, BUFFER_PAGE_SIZE); /* Don't leak stale data. */
317 lp->page = apage;
318 lp->accesstime = ++timecounter;
319
320 FixupBucket(lp); /* move to the right hash bucket */
321
322 return lp;
323 }
324
325 /* Release a buffer, specifying whether or not the buffer has been modified
326 * by the locker. */
327 void
328 DRelease(struct DirBuffer *entry, int flag)
329 {
330 struct buffer *bp;
331
332 bp = (struct buffer *) entry->buffer;
333 if (bp == NULL)
334 return;
335 ObtainWriteLock(&bp->lock);
336 bp->lockers--;
337 if (flag)
338 bp->dirty = 1;
339 ReleaseWriteLock(&bp->lock);
340 }
341
342 /* Return the byte within a file represented by a buffer pointer. */
343 int
344 DVOffset(struct DirBuffer *entry)
345 {
346 struct buffer *bp;
347
348 bp = entry->buffer;
349 return BUFFER_PAGE_SIZE * bp->page + (char *)entry->data - (char *)bp->data;
350 }
351
352 void
353 DZap(dir_file_t dir)
354 {
355 /* Destroy all buffers pertaining to a particular fid. */
356 struct buffer *tb;
357 ObtainReadLock(&afs_bufferLock);
358 for (tb = phTable[pHash(dir)]; tb; tb = tb->hashNext)
359 if (FidEq(bufferDir(tb), dir)) {
360 ObtainWriteLock(&tb->lock);
361 FidZap(bufferDir(tb));
362 tb->dirty = 0;
363 ReleaseWriteLock(&tb->lock);
364 }
365 ReleaseReadLock(&afs_bufferLock);
366 }
367
368 int
369 DFlushVolume(afs_int32 vid)
370 {
371 /* Flush all data and release all inode handles for a particular volume */
372 struct buffer *tb;
373 int code, rcode = 0;
374 ObtainReadLock(&afs_bufferLock);
375 for (tb = phTable[vHash(vid)]; tb; tb = tb->hashNext)
376 if (FidVolEq(bufferDir(tb), vid)) {
377 ObtainWriteLock(&tb->lock);
378 if (tb->dirty) {
379 code = ReallyWrite(bufferDir(tb), tb->page, tb->data);
380 if (code && !rcode)
381 rcode = code;
382 tb->dirty = 0;
383 }
384 FidZap(bufferDir(tb));
385 ReleaseWriteLock(&tb->lock);
386 }
387 ReleaseReadLock(&afs_bufferLock);
388 return rcode;
389 }
390
391 int
392 DFlushEntry(dir_file_t fid)
393 {
394 /* Flush pages modified by one entry. */
395 struct buffer *tb;
396 int code;
397
398 ObtainReadLock(&afs_bufferLock);
399 for (tb = phTable[pHash(fid)]; tb; tb = tb->hashNext)
400 if (FidEq(bufferDir(tb), fid) && tb->dirty) {
401 ObtainWriteLock(&tb->lock);
402 if (tb->dirty) {
403 code = ReallyWrite(bufferDir(tb), tb->page, tb->data);
404 if (code) {
405 ReleaseWriteLock(&tb->lock);
406 ReleaseReadLock(&afs_bufferLock);
407 return code;
408 }
409 tb->dirty = 0;
410 }
411 ReleaseWriteLock(&tb->lock);
412 }
413 ReleaseReadLock(&afs_bufferLock);
414 return 0;
415 }
416
417 int
418 DFlush(void)
419 {
420 /* Flush all the modified buffers. */
421 int i;
422 struct buffer **tbp;
423 afs_int32 code, rcode;
424
425 rcode = 0;
426 tbp = Buffers;
427 ObtainReadLock(&afs_bufferLock);
428 for (i = 0; i < nbuffers; i++, tbp++) {
429 if ((*tbp)->dirty) {
430 ObtainWriteLock(&(*tbp)->lock);
431 (*tbp)->lockers++;
432 ReleaseReadLock(&afs_bufferLock);
433 if ((*tbp)->dirty) {
434 code = ReallyWrite(bufferDir(*tbp), (*tbp)->page, (*tbp)->data);
435 if (!code)
436 (*tbp)->dirty = 0; /* Clear the dirty flag */
437 if (code && !rcode) {
438 rcode = code;
439 }
440 }
441 (*tbp)->lockers--;
442 ReleaseWriteLock(&(*tbp)->lock);
443 ObtainReadLock(&afs_bufferLock);
444 }
445 }
446 ReleaseReadLock(&afs_bufferLock);
447 return rcode;
448 }
449
450 /* Same as read, only do *not* even try to read the page,
451 * since it probably doesn't exist.
452 */
453 int
454 DNew(dir_file_t dir, int page, struct DirBuffer *entry)
455 {
456 struct buffer *tb;
457
458 memset(entry,0, sizeof(struct DirBuffer));
459
460 ObtainWriteLock(&afs_bufferLock);
461 if ((tb = newslot(dir, page, 0)) == 0) {
462 ReleaseWriteLock(&afs_bufferLock);
463 return EIO;
464 }
465 ObtainWriteLock(&tb->lock);
466 tb->lockers++;
467 ReleaseWriteLock(&afs_bufferLock);
468 ReleaseWriteLock(&tb->lock);
469
470 entry->buffer = tb;
471 entry->data = tb->data;
472
473 return 0;
474 }