Import Upstream version 1.8.5
[hcoop/debian/openafs.git] / src / vol / vg_scan.c
1 /*
2 * Copyright 2009-2010, Sine Nomine Associates and others.
3 * All Rights Reserved.
4 *
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
8 */
9
10 /*
11 * demand attach fs
12 * volume group membership cache
13 * asynchronous partition scanner
14 */
15
16 #include <afsconfig.h>
17 #include <afs/param.h>
18
19 #include <roken.h>
20
21 #ifdef HAVE_SYS_FILE_H
22 #include <sys/file.h>
23 #endif
24
25 #ifdef AFS_DEMAND_ATTACH_FS
26
27 #include <afs/opr.h>
28 #include <rx/rx_queue.h>
29 #include <opr/lock.h>
30 #include <lock.h>
31 #include <afs/afsutil.h>
32 #include "nfs.h"
33 #include <afs/afsint.h>
34 #include "ihandle.h"
35 #include "vnode.h"
36 #include "volume.h"
37 #include "viceinode.h"
38 #include "voldefs.h"
39 #include "partition.h"
40 #include <afs/errors.h>
41
42 #define __VOL_VG_CACHE_IMPL 1
43
44 #include "vg_cache.h"
45 #include "vg_cache_impl.h"
46
47 static int _VVGC_scan_table_init(VVGCache_scan_table_t * tbl);
48 static int _VVGC_scan_table_add(VVGCache_scan_table_t * tbl,
49 struct DiskPartition64 * dp,
50 VolumeId volid,
51 VolumeId parent);
52 static int _VVGC_scan_table_flush(VVGCache_scan_table_t * tbl,
53 struct DiskPartition64 * dp);
54 static void * _VVGC_scanner_thread(void *);
55 static int _VVGC_scan_partition(struct DiskPartition64 * part);
56 static VVGCache_dlist_entry_t * _VVGC_dlist_lookup_r(struct DiskPartition64 *dp,
57 VolumeId parent,
58 VolumeId child);
59 static void _VVGC_flush_dlist(struct DiskPartition64 *dp);
60
61 /**
62 * init a thread-local scan table.
63 *
64 * @param[in] tbl scan table
65 *
66 * @return operation status
67 * @retval 0 success
68 *
69 * @internal
70 */
71 static int
72 _VVGC_scan_table_init(VVGCache_scan_table_t * tbl)
73 {
74 memset(tbl, 0, sizeof(*tbl));
75
76 return 0;
77 }
78
79 /**
80 * add an entry to the thread-local scan table.
81 *
82 * @param[in] tbl scan table
83 * @param[in] dp disk partition object
84 * @param[in] volid volume id
85 * @param[in] parent parent volume id
86 *
87 * @pre VOL_LOCK is NOT held
88 *
89 * @note if the table is full, this routine will acquire
90 * VOL_LOCK and flush the table to the global one.
91 *
92 * @return operation status
93 * @retval 0 success
94 * @retval nonzero a VVGCache_entry_add_r operation failed during a
95 * flush of the thread-local table
96 *
97 * @internal
98 */
99 static int
100 _VVGC_scan_table_add(VVGCache_scan_table_t * tbl,
101 struct DiskPartition64 * dp,
102 VolumeId volid,
103 VolumeId parent)
104 {
105 int code = 0;
106
107 if (tbl->idx == VVGC_SCAN_TBL_LEN) {
108 code = _VVGC_scan_table_flush(tbl, dp);
109 }
110
111 tbl->entries[tbl->idx].volid = volid;
112 tbl->entries[tbl->idx].parent = parent;
113 tbl->idx++;
114
115 return code;
116 }
117
118 /**
119 * flush thread-local scan table to the global VG cache.
120 *
121 * @param[in] tbl scan table
122 * @param[in] dp disk partition object
123 *
124 * @pre VOL_LOCK is NOT held
125 *
126 * @return operation status
127 * @retval 0 success
128 * @retval nonzero a VVGCache_entry_add_r operation failed during a
129 * flush of the thread-local table
130 *
131 * @internal
132 */
133 static int
134 _VVGC_scan_table_flush(VVGCache_scan_table_t * tbl,
135 struct DiskPartition64 * dp)
136 {
137 int code = 0, res, i;
138 afs_int32 newvg = 0;
139 unsigned long newvols, newvgs;
140
141 newvols = tbl->newvols;
142 newvgs = tbl->newvgs;
143
144 VOL_LOCK;
145
146 for (i = 0; i < tbl->idx; i++) {
147 /*
148 * We need to check the 'to-delete' list and prevent adding any entries
149 * that are on it. The volser could potentially create a volume in one
150 * VG, then delete it and put it on another VG. If we are doing a scan
151 * when that happens, tbl->entries could have the entries for trying to
152 * put the vol on both VGs, though at least one of them will also be on
153 * the dlist. If we put everything in tbl->entries on the VGC then try
154 * to delete afterwards, putting one entry on the VGC cause an error,
155 * and we'll fail to add it. So instead, avoid adding any new VGC
156 * entries if it is on the dlist.
157 */
158 if (_VVGC_dlist_lookup_r(dp, tbl->entries[i].parent,
159 tbl->entries[i].volid)) {
160 continue;
161 }
162 res = VVGCache_entry_add_r(dp,
163 tbl->entries[i].parent,
164 tbl->entries[i].volid,
165 &newvg);
166 if (res) {
167 code = res;
168 } else {
169 newvols++;
170 newvgs += newvg;
171 }
172 }
173
174 /* flush the to-delete list while we're here. We don't need to preserve
175 * the list across the entire scan, and flushing it each time we flush
176 * a scan table will keep the size of the dlist down */
177 _VVGC_flush_dlist(dp);
178
179 VOL_UNLOCK;
180
181 ViceLog(125, ("VVGC_scan_table_flush: flushed %d entries from "
182 "scan table to global VG cache\n", tbl->idx));
183 ViceLog(125, ("VVGC_scan_table_flush: %s total: %lu vols, %lu groups\n",
184 VPartitionPath(dp), newvols, newvgs));
185
186 res = _VVGC_scan_table_init(tbl);
187 if (res) {
188 code = res;
189 }
190
191 tbl->newvols = newvols;
192 tbl->newvgs = newvgs;
193
194 return code;
195 }
196
197 /**
198 * record a volume header found by VWalkVolumeHeaders in a VGC scan table.
199 *
200 * @param[in] dp the disk partition
201 * @param[in] name full path to the .vol header (unused)
202 * @param[in] hdr the header data
203 * @param[in] last whether this is the last try or not (unused)
204 * @param[in] rock actually a VVGCache_scan_table_t* to add the volume to
205 *
206 * @return operation status
207 * @retval 0 success
208 * @retval -1 fatal error adding vol to the scan table
209 */
210 static int
211 _VVGC_RecordHeader(struct DiskPartition64 *dp, const char *name,
212 struct VolumeDiskHeader *hdr, int last, void *rock)
213 {
214 int code;
215 VVGCache_scan_table_t *tbl;
216 tbl = (VVGCache_scan_table_t *)rock;
217
218 code = _VVGC_scan_table_add(tbl, dp, hdr->id, hdr->parent);
219 if (code) {
220 ViceLog(0, ("VVGC_scan_partition: error %d adding volume %s to scan table\n",
221 code, name));
222 return -1;
223 }
224 return 0;
225 }
226
227 /**
228 * unlink a faulty volume header found by VWalkVolumeHeaders.
229 *
230 * @param[in] dp the disk partition (unused)
231 * @param[in] name the full path to the .vol header
232 * @param[in] hdr the header data (unused)
233 * @param[in] rock unused
234 */
235 static void
236 _VVGC_UnlinkHeader(struct DiskPartition64 *dp, const char *name,
237 struct VolumeDiskHeader *hdr, void *rock)
238 {
239 ViceLog(0, ("%s is not a legitimate volume header file; deleted\n", name));
240 if (unlink(name)) {
241 ViceLog(0, ("Unable to unlink %s (errno = %d)\n",
242 name, errno));
243 }
244 }
245
246 /**
247 * scan a disk partition for .vol files
248 *
249 * @param[in] part disk partition object
250 *
251 * @pre VOL_LOCK is NOT held
252 *
253 * @return operation status
254 * @retval 0 success
255 * @retval -1 invalid disk partition object
256 * @retval -2 failed to flush stale entries for this partition
257 *
258 * @internal
259 */
260 static int
261 _VVGC_scan_partition(struct DiskPartition64 * part)
262 {
263 int code, res;
264 DIR *dirp = NULL;
265 VVGCache_scan_table_t tbl;
266 char *part_path = NULL;
267
268 code = _VVGC_scan_table_init(&tbl);
269 if (code) {
270 ViceLog(0, ("VVGC_scan_partition: could not init scan table; error = %d\n",
271 code));
272 goto done;
273 }
274 part_path = VPartitionPath(part);
275 if (part_path == NULL) {
276 ViceLog(0, ("VVGC_scan_partition: invalid partition object given; aborting scan\n"));
277 code = -1;
278 goto done;
279 }
280
281 VOL_LOCK;
282 res = _VVGC_flush_part_r(part);
283 if (res) {
284 ViceLog(0, ("VVGC_scan_partition: error flushing partition %s; error = %d\n",
285 VPartitionPath(part), res));
286 code = -2;
287 }
288 VOL_UNLOCK;
289 if (code) {
290 goto done;
291 }
292
293 dirp = opendir(part_path);
294 if (dirp == NULL) {
295 ViceLog(0, ("VVGC_scan_partition: could not open %s, aborting scan; error = %d\n",
296 part_path, errno));
297 code = -1;
298 goto done;
299 }
300
301 ViceLog(5, ("VVGC_scan_partition: scanning partition %s for VG cache\n",
302 part_path));
303
304 code = VWalkVolumeHeaders(part, part_path, _VVGC_RecordHeader,
305 _VVGC_UnlinkHeader, &tbl);
306 if (code < 0) {
307 goto done;
308 }
309
310 _VVGC_scan_table_flush(&tbl, part);
311
312 done:
313 if (dirp) {
314 closedir(dirp);
315 dirp = NULL;
316 }
317 if (code) {
318 ViceLog(0, ("VVGC_scan_partition: error %d while scanning %s\n",
319 code, part_path));
320 } else {
321 ViceLog(0, ("VVGC_scan_partition: finished scanning %s: %lu volumes in %lu groups\n",
322 part_path, tbl.newvols, tbl.newvgs));
323 }
324
325 VOL_LOCK;
326
327 _VVGC_flush_dlist(part);
328 free(VVGCache.part[part->index].dlist_hash_buckets);
329 VVGCache.part[part->index].dlist_hash_buckets = NULL;
330
331 if (code) {
332 _VVGC_state_change(part, VVGC_PART_STATE_INVALID);
333 } else {
334 _VVGC_state_change(part, VVGC_PART_STATE_VALID);
335 }
336
337 VOL_UNLOCK;
338
339 return code;
340 }
341
342 /**
343 * scanner thread.
344 */
345 static void *
346 _VVGC_scanner_thread(void * args)
347 {
348 struct DiskPartition64 *part = args;
349 int code;
350
351 code = _VVGC_scan_partition(part);
352 if (code) {
353 ViceLog(0, ("Error: _VVGC_scan_partition failed with code %d for partition %s\n",
354 code, VPartitionPath(part)));
355 }
356
357 return NULL;
358 }
359
360 /**
361 * start a background scan.
362 *
363 * @param[in] dp disk partition object
364 *
365 * @return operation status
366 * @retval 0 success
367 * @retval -1 internal error
368 * @retval -3 racing against another thread
369 *
370 * @internal
371 */
372 int
373 _VVGC_scan_start(struct DiskPartition64 * dp)
374 {
375 int code = 0;
376 pthread_t tid;
377 pthread_attr_t attrs;
378 int i;
379
380 if (_VVGC_state_change(dp,
381 VVGC_PART_STATE_UPDATING)
382 == VVGC_PART_STATE_UPDATING) {
383 /* race */
384 ViceLog(0, ("VVGC_scan_partition: race detected; aborting scanning partition %s\n",
385 VPartitionPath(dp)));
386 code = -3;
387 goto error;
388 }
389
390 /* initialize partition's to-delete list */
391 VVGCache.part[dp->index].dlist_hash_buckets =
392 malloc(VolumeHashTable.Size * sizeof(struct rx_queue));
393 if (!VVGCache.part[dp->index].dlist_hash_buckets) {
394 code = -1;
395 goto error;
396 }
397 for (i = 0; i < VolumeHashTable.Size; i++) {
398 queue_Init(&VVGCache.part[dp->index].dlist_hash_buckets[i]);
399 }
400
401 code = pthread_attr_init(&attrs);
402 if (code) {
403 goto error;
404 }
405
406 code = pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
407 if (code) {
408 goto error;
409 }
410
411 code = pthread_create(&tid, &attrs, &_VVGC_scanner_thread, dp);
412
413 if (code) {
414 VVGCache_part_state_t old_state;
415
416 ViceLog(0, ("_VVGC_scan_start: pthread_create failed with %d\n", code));
417
418 old_state = _VVGC_state_change(dp, VVGC_PART_STATE_INVALID);
419 opr_Assert(old_state == VVGC_PART_STATE_UPDATING);
420 }
421
422 error:
423 if (code) {
424 ViceLog(0, ("_VVGC_scan_start failed with code %d for partition %s\n",
425 code, VPartitionPath(dp)));
426 if (VVGCache.part[dp->index].dlist_hash_buckets) {
427 free(VVGCache.part[dp->index].dlist_hash_buckets);
428 VVGCache.part[dp->index].dlist_hash_buckets = NULL;
429 }
430 }
431
432 return code;
433 }
434
435 /**
436 * looks up an entry on the to-delete list, if it exists.
437 *
438 * @param[in] dp the partition whose dlist we are looking at
439 * @param[in] parent the parent volume ID we're looking for
440 * @param[in] child the child volume ID we're looking for
441 *
442 * @return a pointer to the entry in the dlist for that entry
443 * @retval NULL the requested entry does not exist in the dlist
444 */
445 static VVGCache_dlist_entry_t *
446 _VVGC_dlist_lookup_r(struct DiskPartition64 *dp, VolumeId parent,
447 VolumeId child)
448 {
449 int bucket = VVGC_HASH(child);
450 VVGCache_dlist_entry_t *ent, *nent;
451
452 for (queue_Scan(&VVGCache.part[dp->index].dlist_hash_buckets[bucket],
453 ent, nent,
454 VVGCache_dlist_entry)) {
455
456 if (ent->child == child && ent->parent == parent) {
457 return ent;
458 }
459 }
460
461 return NULL;
462 }
463
464 /**
465 * delete all of the entries in the dlist from the VGC.
466 *
467 * Traverses the to-delete list for the specified partition, and deletes
468 * the specified entries from the global VGC. Also deletes the entries from
469 * the dlist itself as it goes along.
470 *
471 * @param[in] dp the partition whose dlist we are flushing
472 */
473 static void
474 _VVGC_flush_dlist(struct DiskPartition64 *dp)
475 {
476 int i;
477 VVGCache_dlist_entry_t *ent, *nent;
478
479 for (i = 0; i < VolumeHashTable.Size; i++) {
480 for (queue_Scan(&VVGCache.part[dp->index].dlist_hash_buckets[i],
481 ent, nent,
482 VVGCache_dlist_entry)) {
483
484 _VVGC_entry_purge_r(dp, ent->parent, ent->child);
485 queue_Remove(ent);
486 free(ent);
487 }
488 }
489 }
490
491 /**
492 * add a VGC entry to the partition's to-delete list.
493 *
494 * This adds a VGC entry (a parent/child pair) to a list of VGC entries to
495 * be deleted from the VGC at the end of a VGC scan. This is necessary,
496 * while a VGC scan is ocurring, volumes may be deleted. Since a VGC scan
497 * scans a partition in VVGC_SCAN_TBL_LEN chunks, a VGC delete operation
498 * may delete a volume, only for it to be added again when the VGC scan's
499 * table adds it to the VGC. So when a VGC entry is deleted and a VGC scan
500 * is running, this function must be called to ensure it does not come
501 * back onto the VGC.
502 *
503 * @param[in] dp the partition to whose dlist we are adding
504 * @param[in] parent the parent volumeID of the VGC entry
505 * @param[in] child the child volumeID of the VGC entry
506 *
507 * @return operation status
508 * @retval 0 success
509 * @retval ENOMEM memory allocation error
510 *
511 * @pre VVGCache.part[dp->index].state == VVGC_PART_STATE_UPDATING
512 *
513 * @internal VGC use only
514 */
515 int
516 _VVGC_dlist_add_r(struct DiskPartition64 *dp, VolumeId parent,
517 VolumeId child)
518 {
519 int bucket = VVGC_HASH(child);
520 VVGCache_dlist_entry_t *entry;
521
522 entry = malloc(sizeof(*entry));
523 if (!entry) {
524 return ENOMEM;
525 }
526
527 entry->child = child;
528 entry->parent = parent;
529
530 queue_Append(&VVGCache.part[dp->index].dlist_hash_buckets[bucket],
531 entry);
532 return 0;
533 }
534
535 /**
536 * delete a VGC entry from the partition's to-delete list.
537 *
538 * When a VGC scan is ocurring, and a volume is removed, but then created
539 * again, we need to ensure that it does not get deleted from being on the
540 * dlist. Call this function whenever adding a new entry to the VGC during
541 * a VGC scan to ensure it doesn't get deleted later.
542 *
543 * @param[in] dp the partition from whose dlist we are deleting
544 * @param[in] parent the parent volumeID of the VGC entry
545 * @param[in] child the child volumeID of the VGC entry
546 *
547 * @return operation status
548 * @retval 0 success
549 * @retval ENOENT the specified VGC entry is not on the dlist
550 *
551 * @pre VVGCache.part[dp->index].state == VVGC_PART_STATE_UPDATING
552 *
553 * @internal VGC use only
554 *
555 * @see _VVGC_dlist_add_r
556 */
557 int
558 _VVGC_dlist_del_r(struct DiskPartition64 *dp, VolumeId parent,
559 VolumeId child)
560 {
561 VVGCache_dlist_entry_t *ent;
562
563 ent = _VVGC_dlist_lookup_r(dp, parent, child);
564 if (!ent) {
565 return ENOENT;
566 }
567
568 queue_Remove(ent);
569 free(ent);
570
571 return 0;
572 }
573
574 #endif /* AFS_DEMAND_ATTACH_FS */