Import Upstream version 1.8.5
[hcoop/debian/openafs.git] / src / volser / vol_split.c
1 /*
2 * Copyright (c) 2007, Hartmut Reuter,
3 * RZG, Max-Planck-Institut f. Plasmaphysik.
4 * All Rights Reserved.
5 *
6 */
7
8 #include <afsconfig.h>
9 #include <afs/param.h>
10
11 #include <roken.h>
12
13 #ifdef HAVE_SYS_FILE_H
14 #include <sys/file.h>
15 #endif
16
17 #if defined(AFS_NAMEI_ENV) && !defined(AFS_NT40_ENV)
18 #include <afs/dir.h>
19 #include <rx/xdr.h>
20 #include <rx/rx_queue.h>
21 #include <afs/afsint.h>
22 #include <afs/nfs.h>
23 #include <lwp.h>
24 #include <lock.h>
25 #include <afs/afssyscalls.h>
26 #include <afs/ihandle.h>
27 #include <afs/vnode.h>
28 #include <afs/volume.h>
29 #include <afs/partition.h>
30 #include <afs/viceinode.h>
31
32 #include "vol.h"
33 #include "volint.h"
34 #include "volser.h"
35 #include "physio.h"
36 #include "volser_internal.h"
37 #ifdef AFS_RXOSD_SUPPORT
38 #include "rxosd.h"
39 #include "vol_osd.h"
40 #include "../vol/vol_osd_prototypes.h"
41 #endif
42
43 #define NEEDED 1
44 #define PARENT 2
45 #define CHANGEPARENT 4
46
47 #define NAMEI_VNODEMASK 0x03ffffff
48 #define NAMEI_TAGMASK 0x7
49 #define NAMEI_TAGSHIFT 26
50 #define NAMEI_UNIQMASK 0xffffffff
51 #define NAMEI_UNIQSHIFT 32
52
53 struct VnodeExtract {
54 afs_uint32 vN;
55 afs_uint32 parent;
56 afs_uint32 flag;
57 };
58
59 struct Msg {
60 struct rx_call * call;
61 int verbose;
62 char line[1024];
63 };
64
65 static afs_int32
66 ExtractVnodes(struct Msg *m, Volume *vol, afs_int32 class,
67 struct VnodeExtract **list,
68 afs_uint32 *length, afs_uint32 where,
69 struct VnodeDiskObject *vd,
70 afs_uint32 *parent, struct VnodeDiskObject *parentvd)
71 {
72 afs_int32 code = 0;
73 char buf[SIZEOF_LARGEDISKVNODE];
74 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)&buf;
75 FdHandle_t *fdP = 0;
76 StreamHandle_t *stream = 0;
77 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
78 struct VnodeExtract *e;
79 afs_sfsize_t size;
80 afs_foff_t offset;
81
82 *length = 0;
83 if (parent)
84 *parent = 0;
85
86 fdP = IH_OPEN(vol->vnodeIndex[class].handle);
87 if (!fdP) {
88 sprintf(m->line, "Couldn't open %s Index of volume %" AFS_VOLID_FMT "\n",
89 class ? "small":"large", afs_printable_VolumeId_lu(V_id(vol)));
90 rx_Write(m->call, m->line, strlen(m->line));
91 code = EIO;
92 goto Bad_Extract;
93 }
94 size = FDH_SIZE(fdP);
95 *list = calloc(size / vcp->diskSize, sizeof(struct VnodeExtract));
96 if (!(*list)) {
97 code = ENOMEM;
98 goto Bad_Extract;
99 }
100 stream = FDH_FDOPEN(fdP, "r");
101 if (!stream) {
102 sprintf(m->line, "Couldn't stream open %s Index of volume %" AFS_VOLID_FMT "\n",
103 class ? "small":"large", afs_printable_VolumeId_lu(V_id(vol)));
104 rx_Write(m->call, m->line, strlen(m->line));
105 code = EIO;
106 goto Bad_Extract;
107 }
108 code = STREAM_ASEEK(stream, vcp->diskSize);
109 if (code)
110 goto Bad_Extract;
111
112 offset = vcp->diskSize;
113 e = *list;
114 while (!STREAM_EOF(stream)) {
115 afs_int32 vN = (offset >> (vcp->logSize -1)) - 1 + class;
116 if (STREAM_READ(vnode, vcp->diskSize, 1, stream) == 1) {
117 if (vnode->type != vNull) {
118 e->vN = vN;
119 e->parent = vnode->parent;
120 if (vN == where && class == vLarge) {
121 memcpy(vd, vnode, vcp->diskSize);
122 *parent = vnode->parent;
123 }
124 e++;
125 }
126 offset += vcp->diskSize;
127 }
128 }
129 *length = (e - *list);
130 if (class == vLarge) {
131 if (*parent) {
132 offset = (*parent + 1 - class) << (vcp->logSize -1);
133 code = STREAM_ASEEK(stream, offset);
134 if (STREAM_READ(vnode, vcp->diskSize, 1, stream) == 1)
135 memcpy(parentvd, vnode, vcp->diskSize);
136 else
137 code = EIO;
138 } else {
139 sprintf(m->line, "SplitVolume: extract didn't see directory %u\n", where);
140 rx_Write(m->call, m->line, strlen(m->line));
141 code = ENOENT;
142 }
143 }
144 if (m->verbose) {
145 sprintf(m->line, "Volume %" AFS_VOLID_FMT " has %u %s vnodes in volume %" AFS_VOLID_FMT "\n",
146 afs_printable_VolumeId_lu(V_parentId(vol)), *length,
147 class? "small":"large", afs_printable_VolumeId_lu(V_id(vol)));
148 rx_Write(m->call, m->line, strlen(m->line));
149 }
150
151 Bad_Extract:
152 if (stream)
153 STREAM_CLOSE(stream);
154 if (fdP)
155 FDH_CLOSE(fdP);
156 if (code) {
157 free(*list);
158 *list = 0;
159 }
160 return code;
161 }
162
163 static afs_int32
164 FindVnodes(struct Msg *m, afs_uint32 where,
165 struct VnodeExtract *list, afs_int32 length,
166 struct VnodeExtract *dlist, afs_int32 dlength,
167 afs_uint32 *needed, afs_int32 class)
168 {
169 afs_int32 i, j, found = 0;
170 afs_int32 parent = 0;
171
172 *needed = 0;
173 for (i=0; i<length; i++) {
174 if (list[i].vN == where) { /* dir to be replaced by mount point */
175 list[i].flag |= NEEDED;
176 parent = list[i].parent;
177 found = 1;
178 (*needed)++;
179 }
180 if (list[i].parent == where) { /* all 1st generation children */
181 list[i].flag |= (NEEDED + CHANGEPARENT);
182 (*needed)++;
183 }
184 }
185 if (list[0].vN & 1) { /* only for directories */
186 if (!found) {
187 sprintf(m->line,
188 "SplitVolume: directory %u where to start new volume not found\n",
189 where);
190 rx_Write(m->call, m->line, strlen(m->line));
191 return ENOENT;
192 }
193 found = 0;
194 for (i=0; i<length; i++) {
195 if (list[i].vN == parent) { /* dir where to create mount point */
196 list[i].flag |= PARENT;
197 found = 1;
198 break;
199 }
200 }
201 if (!found) {
202 sprintf(m->line, "SplitVolume: parent directory %u not found\n",
203 parent);
204 rx_Write(m->call, m->line, strlen(m->line));
205 return ENOENT;
206 }
207 }
208 found = 1;
209 while (found) {
210 found = 0;
211 for (i=0; i<dlength; i++) {
212 if (!(dlist[i].flag & NEEDED)) /* dirs to remain in old volume */
213 continue;
214 for (j=0; j<length; j++) {
215 if (list[j].parent == dlist[i].vN && !(list[j].flag & NEEDED)) {
216 list[j].flag |= NEEDED;
217 (*needed)++;
218 found = 1;
219 }
220 }
221 }
222 }
223 if (m->verbose) {
224 sprintf(m->line, "%u %s vnodes will go into the new volume\n",
225 *needed, class ? "small" : "large");
226 rx_Write(m->call, m->line, strlen(m->line));
227 }
228 return 0;
229 }
230
231 static afs_int32
232 copyDir(struct Msg *m, IHandle_t *inh, IHandle_t *outh)
233 {
234 FdHandle_t *infdP, *outfdP;
235 char *tbuf;
236 afs_sfsize_t size;
237 afs_foff_t offset;
238
239 infdP = IH_OPEN(inh);
240 if (!infdP) {
241 sprintf(m->line, "Couldn't open input directory %" AFS_VOLID_FMT ".%u.%u\n",
242 afs_printable_VolumeId_lu(inh->ih_vid),
243 (afs_uint32)(inh->ih_ino & NAMEI_VNODEMASK),
244 (afs_uint32)(inh->ih_ino >> NAMEI_UNIQSHIFT));
245 rx_Write(m->call, m->line, strlen(m->line));
246 return EIO;
247 }
248 outfdP = IH_OPEN(outh);
249 /*
250 * In case that a file with the same (NAMEI) name existed before and is still
251 * open outfdP may point to the wrong (unlinked) file. To make sure we write
252 * into the correct file it's safer to 1st FDH_REALLYCLOSE it and then to
253 * re-open it.
254 */
255 if (outfdP)
256 FDH_REALLYCLOSE(outfdP);
257 outfdP = IH_OPEN(outh);
258 if (!outfdP) {
259 sprintf(m->line, "Couldn't open output directory %" AFS_VOLID_FMT ".%u.%u\n",
260 afs_printable_VolumeId_lu(outh->ih_vid),
261 (afs_uint32)(outh->ih_ino & NAMEI_VNODEMASK),
262 (afs_uint32)(outh->ih_ino >> NAMEI_UNIQSHIFT));
263 rx_Write(m->call, m->line, strlen(m->line));
264 FDH_REALLYCLOSE(infdP);
265 return EIO;
266 }
267 tbuf = malloc(2048);
268 offset = 0;
269 size = FDH_SIZE(infdP);
270 while (size) {
271 size_t tlen;
272 tlen = size > 2048 ? 2048 : size;
273 if (FDH_PREAD(infdP, tbuf, tlen, offset) != tlen) {
274 sprintf(m->line, "Couldn't read directory %" AFS_VOLID_FMT ".%u.%u\n",
275 afs_printable_VolumeId_lu(infdP->fd_ih->ih_vid),
276 (afs_uint32)(infdP->fd_ih->ih_ino & NAMEI_VNODEMASK),
277 (afs_uint32)(infdP->fd_ih->ih_ino >> NAMEI_UNIQSHIFT));
278 rx_Write(m->call, m->line, strlen(m->line));
279 FDH_REALLYCLOSE(infdP);
280 FDH_REALLYCLOSE(outfdP);
281 free(tbuf);
282 return EIO;
283 }
284 if (FDH_PWRITE(outfdP, tbuf, tlen, offset) != tlen) {
285 sprintf(m->line, "Couldn't write directory %" AFS_VOLID_FMT ".%u.%u\n",
286 afs_printable_VolumeId_lu(outfdP->fd_ih->ih_vid),
287 (afs_uint32)(outfdP->fd_ih->ih_ino & NAMEI_VNODEMASK),
288 (afs_uint32)(outfdP->fd_ih->ih_ino >> NAMEI_UNIQSHIFT));
289 rx_Write(m->call, m->line, strlen(m->line));
290 FDH_REALLYCLOSE(infdP);
291 FDH_REALLYCLOSE(outfdP);
292 free(tbuf);
293 return EIO;
294 }
295 size -= tlen;
296 offset += tlen;
297 }
298 free(tbuf);
299 FDH_CLOSE(outfdP);
300 FDH_REALLYCLOSE(infdP);
301 return 0;
302 }
303
304 afs_int32 copyVnodes(struct Msg *m, Volume *vol, Volume *newvol,
305 afs_int32 class,
306 struct VnodeExtract *list, afs_int32 length,
307 afs_int32 where, afs_uint64 *blocks,
308 struct VnodeDiskObject *parVnode)
309 {
310 afs_int32 i, code = 0;
311 char buf[SIZEOF_LARGEDISKVNODE];
312 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)&buf;
313 FdHandle_t *fdP = 0;
314 FdHandle_t *newfdP = 0;
315 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
316 struct VnodeExtract *e;
317 afs_uint64 offset;
318 Inode ino, newino;
319
320 fdP = IH_OPEN(vol->vnodeIndex[class].handle);
321 if (!fdP) {
322 Log("Couldn't open %s Index of volume %" AFS_VOLID_FMT "\n",
323 class ? "small":"large", afs_printable_VolumeId_lu(V_id(vol)));
324 code = EIO;
325 goto Bad_Copy;
326 }
327 newfdP = IH_OPEN(newvol->vnodeIndex[class].handle);
328 if (!newfdP) {
329 Log("Couldn't open %s Index of volume %" AFS_VOLID_FMT "\n",
330 class ? "small":"large", afs_printable_VolumeId_lu(V_id(newvol)));
331 code = EIO;
332 goto Bad_Copy;
333 }
334
335 for (i=0; i<length; i++) {
336 e = &list[i];
337 if (e->flag) {
338 afs_uint64 size;
339 offset = (e->vN + 1 - class) << (vcp->logSize -1);
340 if (FDH_PREAD(fdP, vnode, vcp->diskSize, offset) != vcp->diskSize) {
341 Log("Couldn't read in %s Index of volume %" AFS_VOLID_FMT " at offset %llu\n",
342 class ? "small":"large",
343 afs_printable_VolumeId_lu(V_id(vol)), offset);
344 code = EIO;
345 goto Bad_Copy;
346 }
347 if (e->flag & PARENT) {
348 /*
349 * do a preventive copy on write for later update
350 */
351 IHandle_t *newh = 0;
352 IHandle_t *h = 0;
353 #if defined(NEARINODE_HINT) && !defined(AFS_NAMEI_ENV)
354 Inode nearInode;
355 V_pref(vol,nearInode)
356 #endif
357
358 newino = IH_CREATE(V_linkHandle(vol), V_device(vol),
359 VPartitionPath(V_partition(vol)),
360 nearInode, V_parentId(vol),
361 e->vN, vnode->uniquifier,
362 vnode->dataVersion);
363 IH_INIT(newh, V_device(vol), V_parentId(vol), newino);
364 ino = VNDISK_GET_INO(vnode);
365 IH_INIT(h, V_device(vol), V_parentId(vol), ino);
366 code = copyDir(m, h, newh);
367 if (code)
368 goto Bad_Copy;
369 /* Now update the vnode and write it back to disk */
370 VNDISK_SET_INO(vnode, newino);
371 vnode->cloned = 0;
372 if (FDH_PWRITE(fdP, vnode, vcp->diskSize, offset) != vcp->diskSize) {
373 Log("Couldn't write in %s Index of volume %" AFS_VOLID_FMT " at offset %llu\n",
374 class ? "small":"large", afs_printable_VolumeId_lu(V_id(vol)), offset);
375 code = EIO;
376 goto Bad_Copy;
377 }
378 if (parVnode != NULL)
379 memcpy(parVnode, vnode, sizeof(struct VnodeDiskObject));
380 }
381 if (e->flag & NEEDED && e->vN != where) {
382 VNDISK_GET_LEN(size, vnode);
383 *blocks += (size + 0x3ff) >> 10;
384 ino = VNDISK_GET_INO(vnode);
385 if (ino) {
386 IHandle_t *h, *newh;
387 Inode AFS_UNUSED nearInode;
388 #if defined(NEARINODE_HINT) && !defined(AFS_NAMEI_ENV)
389 V_pref(vol,nearInode)
390 #endif
391 IH_INIT(h, vol->device, V_parentId(vol), ino);
392 if (e->parent == where)
393 vnode->parent = 1;
394 newino = IH_CREATE(V_linkHandle(newvol), V_device(newvol),
395 VPartitionPath(V_partition(newvol)),
396 nearInode, V_parentId(newvol),
397 e->vN, vnode->uniquifier,
398 vnode->dataVersion);
399 if (!VALID_INO(newino)) {
400 Log("IH_CREATE failed for %" AFS_VOLID_FMT ".%u.%u\n",
401 afs_printable_VolumeId_lu(V_id(newvol)), e->vN, vnode->uniquifier);
402 code = EIO;
403 goto Bad_Copy;
404 }
405 nearInode = newino;
406 IH_INIT(newh, newvol->device, V_parentId(newvol), newino);
407 code = namei_replace_file_by_hardlink(newh, h);
408 VNDISK_SET_INO(vnode, newino);
409 #ifdef AFS_RXOSD_SUPPORT
410 } else {
411 code = osd_split_objects(vol, newvol, vnode, e->vN);
412 #endif /* AFS_RXOSD_SUPPORT */
413 }
414 if (code)
415 goto Bad_Copy;
416 if (e->flag & CHANGEPARENT)
417 vnode->parent = 1; /* in new root-directory */
418 vnode->cloned = 0;
419 if (FDH_PWRITE(newfdP, vnode, vcp->diskSize, offset) != vcp->diskSize) {
420 Log("Couldn't write in %s Index of volume %" AFS_VOLID_FMT " to offset %llu\n",
421 class ? "small":"large", afs_printable_VolumeId_lu(V_id(newvol)), offset);
422 code = EIO;
423 goto Bad_Copy;
424 }
425 }
426 }
427 }
428 /*
429 * Now copy the root directory from old to new volume
430 */
431 if (class == vLarge) {
432 IHandle_t *h, *newh;
433 char buf2[SIZEOF_LARGEDISKVNODE];
434 struct VnodeDiskObject *vnode2 = (struct VnodeDiskObject *)&buf2;
435 afs_uint64 newoffset, size;
436
437 newoffset = vcp->diskSize;
438 if (FDH_PREAD(newfdP, vnode2, vcp->diskSize, newoffset) != vcp->diskSize) {
439 Log("splitvolume: couldn't read in large Index of new volume %" AFS_VOLID_FMT " at offset %u\n",
440 afs_printable_VolumeId_lu(V_id(newvol)), vcp->diskSize);
441 code = EIO;
442 goto Bad_Copy;
443 }
444 offset = (where + 1 - class) << (vcp->logSize -1);
445 if (FDH_PREAD(fdP, vnode, vcp->diskSize, offset) != vcp->diskSize) {
446 Log("Couldn't read in large Index of old volume %" AFS_VOLID_FMT " at offset %llu\n",
447 afs_printable_VolumeId_lu(V_id(vol)), offset);
448 code = EIO;
449 goto Bad_Copy;
450 }
451 VNDISK_GET_LEN(size, vnode);
452 *blocks += (size + 0x3ff) >> 10;
453 ino = VNDISK_GET_INO(vnode);
454 IH_INIT(h, vol->device, V_parentId(vol), ino);
455 newino = VNDISK_GET_INO(vnode2);
456 IH_INIT(newh, newvol->device, V_parentId(newvol), newino);
457 code = copyDir(m, h, newh);
458 if (code) {
459 Log("splitvolume: copyDir failed for new root from "
460 "%" AFS_VOLID_FMT ".%u.%u to %" AFS_VOLID_FMT ".1.1\n",
461 afs_printable_VolumeId_lu(V_id(vol)), where, vnode->uniquifier,
462 afs_printable_VolumeId_lu(V_id(newvol)));
463 code = EIO;
464 goto Bad_Copy;
465 }
466 VNDISK_SET_INO(vnode, newino);
467 vnode->uniquifier = 1;
468 vnode->cloned = 0;
469 vnode->parent = vnode2->parent;
470 vnode->serverModifyTime = vnode2->serverModifyTime;
471 if (FDH_PWRITE(newfdP, vnode, vcp->diskSize, newoffset) != vcp->diskSize) {
472 Log("splitvolume: couldn't write in large Index of %" AFS_VOLID_FMT " at offset %u\n",
473 afs_printable_VolumeId_lu(V_id(newvol)), vcp->diskSize);
474 code = EIO;
475 }
476 }
477 Bad_Copy:
478 if (fdP)
479 FDH_CLOSE(fdP);
480 if (newfdP)
481 FDH_CLOSE(newfdP);
482 return code;
483 }
484
485 static afs_int32
486 findName(Volume *vol, struct VnodeDiskObject *vd, afs_uint32 vN,
487 afs_uint32 un, char *name,afs_int32 length)
488 {
489 afs_int32 code;
490 Inode ino;
491 DirHandle dir;
492
493 ino = VNDISK_GET_INO(vd);
494 SetSalvageDirHandle(&dir, V_id(vol), V_device(vol), ino);
495
496 code = afs_dir_InverseLookup(&dir, vN, un, name, length);
497 FidZap(&dir);
498 return code;
499 }
500
501 static afs_int32
502 createMountpoint(Volume *vol, Volume *newvol, struct VnodeDiskObject *parent,
503 afs_uint32 vN, struct VnodeDiskObject *vd, char *name)
504 {
505 afs_int32 code;
506 Inode ino, newino;
507 DirHandle dir;
508 IHandle_t *h, *hp;
509 struct VnodeDiskObject vnode;
510 FdHandle_t *fdP, *fdP2;
511 afs_uint64 size;
512 afs_foff_t offset;
513 afs_int32 class = vSmall;
514 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
515 #if defined(NEARINODE_HINT) && !defined(AFS_NAMEI_ENV)
516 Inode nearInode = 0;
517 #endif
518 AFSFid fid;
519 struct timeval now;
520 afs_uint32 newvN;
521 char symlink[VNAMESIZE + 1];
522 ssize_t rc;
523
524 FT_GetTimeOfDay(&now, 0);
525 fdP = IH_OPEN(vol->vnodeIndex[vSmall].handle);
526 if (!fdP) {
527 Log("split volume: error opening small vnode index of %" AFS_VOLID_FMT "\n", afs_printable_VolumeId_lu(V_id(vol)));
528 return EIO;
529 }
530 offset = vcp->diskSize;
531 while (1) {
532 rc = FDH_PREAD(fdP, &vnode, vcp->diskSize, offset);
533 if (rc != vcp->diskSize) {
534 if (rc < 0) {
535 Log("split volume: error reading small vnode index of %" AFS_VOLID_FMT "\n", afs_printable_VolumeId_lu(V_id(vol)));
536 return EIO;
537 }
538 if (rc == 0)
539 break;
540 if (rc < vcp->diskSize)
541 break;
542 }
543 if (vnode.type == vNull)
544 break;
545 offset += vcp->diskSize;
546 }
547 memset(&vnode, 0, sizeof(vnode));
548 vnode.type = vSymlink;
549 V_nextVnodeUnique(vol)++;
550 vnode.uniquifier = V_nextVnodeUnique(vol);
551 vnode.author = vd->author;
552 vnode.owner = vd->owner;
553 vnode.group = vd->group;
554 vnode.modeBits = 0644;
555 vnode.unixModifyTime = now.tv_sec;
556 vnode.serverModifyTime = now.tv_sec;
557 vnode.dataVersion = 1;
558 vnode.linkCount = 1;
559 vnode.parent = vN;
560
561 newvN = (offset >> (VnodeClassInfo[class].logSize - 1)) - 1 + class;
562 #if defined(NEARINODE_HINT) && !defined(AFS_NAMEI_ENV)
563 V_pref(vol,nearInode)
564 #endif
565 newino = IH_CREATE(V_linkHandle(vol), V_device(vol),
566 VPartitionPath(V_partition(vol)), nearInode,
567 V_parentId(vol), newvN, vnode.uniquifier, 1);
568
569 IH_INIT(h, V_device(vol), V_parentId(vol), newino);
570 fdP2 = IH_OPEN(h);
571 if (!fdP2) {
572 Log("split volume: couldn't open inode for mountpoint %" AFS_VOLID_FMT ".%u.%u\n",
573 afs_printable_VolumeId_lu(V_id(vol)), newvN, vnode.uniquifier);
574 return EIO;
575 }
576 sprintf(symlink, "#%s", V_name(newvol));
577 size = strlen(symlink) + 1;
578 if (FDH_PWRITE(fdP2, symlink, size, 0) != size) {
579 Log("split volume: couldn't write mountpoint %" AFS_VOLID_FMT ".%u.%u\n",
580 afs_printable_VolumeId_lu(V_id(vol)), newvN, vnode.uniquifier);
581 return EIO;
582 }
583 FDH_REALLYCLOSE(fdP2);
584 IH_RELEASE(h);
585 VNDISK_SET_INO(&vnode, newino);
586 VNDISK_SET_LEN(&vnode, size);
587 #ifndef AFS_RXOSD_SUPPORT
588 vnode.vnodeMagic = SMALLVNODEMAGIC;
589 #endif
590 if (FDH_PWRITE(fdP, &vnode, vcp->diskSize, offset) != vcp->diskSize) {
591 Log("split volume: couldn't write vnode for mountpoint %" AFS_VOLID_FMT ".%u.%u\n",
592 afs_printable_VolumeId_lu(V_id(vol)), newvN, vnode.uniquifier);
593 return EIO;
594 }
595 FDH_REALLYCLOSE(fdP);
596
597 fid.Volume = V_id(vol);
598 fid.Vnode = newvN;
599 fid.Unique = vnode.uniquifier;
600
601 /*
602 * Now update the parent directory.
603 */
604
605 ino = VNDISK_GET_INO(parent);
606 SetSalvageDirHandle(&dir, V_id(vol), V_device(vol), ino);
607
608 code = afs_dir_Delete(&dir, name);
609 if (code) {
610 Log("splitvolume: couldn't delete directory entry for %s in %" AFS_VOLID_FMT ".%u.%u, code = %d\n",
611 name, afs_printable_VolumeId_lu(V_id(vol)), vN, parent->uniquifier, code);
612 return code;
613 }
614 code = afs_dir_Create(&dir, name, &fid);
615 FidZap(&dir);
616
617 /* Make sure the directory file doesn't remain open */
618 IH_INIT(hp, V_device(vol), V_parentId(vol), ino);
619 fdP = IH_OPEN(hp);
620 if (fdP)
621 FDH_REALLYCLOSE(fdP);
622 IH_RELEASE(hp);
623
624 class = vLarge;
625 vcp = &VnodeClassInfo[class];
626 fdP = IH_OPEN(vol->vnodeIndex[class].handle);
627 offset = (vN + 1 - class) << (vcp->logSize -1);
628 parent->dataVersion++;
629 if (FDH_PWRITE(fdP, parent, vcp->diskSize, offset) != vcp->diskSize) {
630 Log("split volume: couldn't write vnode for parent directory %" AFS_VOLID_FMT ".%u.%u\n",
631 afs_printable_VolumeId_lu(V_id(vol)), vN, parent->uniquifier);
632 return EIO;
633 }
634 FDH_REALLYCLOSE(fdP);
635 return code;
636 }
637
638 static afs_int32
639 deleteVnodes(Volume *vol, afs_int32 class,
640 struct VnodeExtract *list, afs_int32 length,
641 afs_uint64 *blocks)
642 {
643 afs_int32 i, code = 0;
644 char buf[SIZEOF_LARGEDISKVNODE];
645 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)&buf;
646 FdHandle_t *fdP = 0;
647 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
648 struct VnodeExtract *e;
649 afs_sfsize_t size;
650 afs_uint64 offset;
651 Inode ino;
652
653 fdP = IH_OPEN(vol->vnodeIndex[class].handle);
654 if (!fdP) {
655 Log("Couldn't open %s Index of volume %" AFS_VOLID_FMT "\n",
656 class ? "small":"large", afs_printable_VolumeId_lu(V_id(vol)));
657 code = EIO;
658 goto Bad_Delete;
659 }
660
661 for (i=0; i<length; i++) {
662 e = &list[i];
663 if (e->flag & NEEDED) {
664 offset = (e->vN + 1 - class) << (vcp->logSize -1);
665 if (FDH_PREAD(fdP, vnode, vcp->diskSize, offset) != vcp->diskSize) {
666 Log("Couldn't read in %s Index of volume %" AFS_VOLID_FMT " at offset %llu\n",
667 class ? "small":"large", afs_printable_VolumeId_lu(V_id(vol)), offset);
668 code = EIO;
669 goto Bad_Delete;
670 }
671 VNDISK_GET_LEN(size, vnode);
672 *blocks += (size + 0x3ff) >> 10;
673 ino = VNDISK_GET_INO(vnode);
674 if (ino) {
675 IHandle_t *h;
676 IH_INIT(h, vol->device, V_parentId(vol), ino);
677 IH_DEC(h, ino, V_parentId(vol));
678 #ifdef AFS_RXOSD_SUPPORT
679 } else {
680 code = osdRemove(vol, vnode, e->vN);
681 #endif /* AFS_RXOSD_SUPPORT */
682 }
683 memset(vnode, 0, vcp->diskSize);
684 vnode->type = vNull;
685 if (FDH_PWRITE(fdP, vnode, vcp->diskSize, offset) != vcp->diskSize) {
686 Log("Couldn't write in %s Index of volume %" AFS_VOLID_FMT " to offset %llu\n",
687 class ? "small":"large", afs_printable_VolumeId_lu(V_id(vol)), offset);
688 }
689 }
690 }
691 Bad_Delete:
692 if (fdP)
693 FDH_CLOSE(fdP);
694 return code;
695 }
696
697 afs_int32
698 split_volume(struct rx_call *call, Volume *vol, Volume *newvol,
699 afs_uint32 where, afs_int32 verbose)
700 {
701 Error code = 0;
702 struct VnodeExtract *dirList = 0;
703 struct VnodeExtract *fileList = 0;
704 afs_uint64 blocks = 0;
705 afs_uint32 filesNeeded, dirsNeeded;
706 afs_uint32 dl, fl;
707 char buf[SIZEOF_LARGEDISKVNODE];
708 char buf2[SIZEOF_LARGEDISKVNODE];
709 struct VnodeDiskObject *rootVnode = (struct VnodeDiskObject *)&buf;
710 struct VnodeDiskObject *parVnode = (struct VnodeDiskObject *)&buf2;
711 char name[256];
712 afs_uint32 parent;
713 struct Msg *m;
714
715 m = calloc(1, sizeof(struct Msg));
716 m->call = call;
717 m->verbose = verbose;
718
719 /*
720 * First step: planning
721 *
722 * Find out which directories will belong to the new volume
723 *
724 */
725 if (verbose) {
726 sprintf(m->line,
727 "1st step: extract vnode essence from large vnode file\n");
728 rx_Write(m->call, m->line, strlen(m->line));
729 }
730
731 code = ExtractVnodes(m, vol, vLarge, &dirList, &dl, where, rootVnode,
732 &parent, parVnode);
733 if (code) {
734 sprintf(m->line,
735 "ExtractVnodes failed for %" AFS_VOLID_FMT " for directories with code %d\n",
736 afs_printable_VolumeId_lu(V_id(vol)), code);
737 rx_Write(m->call, m->line, strlen(m->line));
738 return code;
739 }
740
741 if (verbose) {
742 sprintf(m->line, "2nd step: look for name of vnode %u in directory %" AFS_VOLID_FMT ".%u.%u\n",
743 where, afs_printable_VolumeId_lu(V_id(vol)), parent, parVnode->uniquifier);
744 rx_Write(m->call, m->line, strlen(m->line));
745 }
746 code = findName(vol, parVnode, where, rootVnode->uniquifier,
747 name, sizeof(name));
748 if (code) {
749 sprintf(m->line,
750 "splitvolume: could'nt find name of %u in directory %" AFS_VOLID_FMT ".%u.%u.\n",
751 where, afs_printable_VolumeId_lu(V_id(vol)), parent, parVnode->uniquifier);
752 rx_Write(m->call, m->line, strlen(m->line));
753 return code;
754 }
755 if (verbose) {
756 sprintf(m->line, "name of %u is %s\n", where, name);
757 rx_Write(m->call, m->line, strlen(m->line));
758 }
759
760 if (verbose) {
761 sprintf(m->line, "3rd step: find all directory vnodes belonging to the subtree under %u \"%s\"\n",
762 where, name);
763 rx_Write(m->call, m->line, strlen(m->line));
764 }
765 code = FindVnodes(m, where, dirList, dl, dirList, dl, &dirsNeeded, 1);
766 if (code) {
767 sprintf(m->line,
768 "FindVnodes for directories failed with code %d\n", code);
769 rx_Write(m->call, m->line, strlen(m->line));
770 return code;
771 }
772
773 if (verbose) {
774 sprintf(m->line, "4th step extract vnode essence from small vnode file\n");
775 rx_Write(m->call, m->line, strlen(m->line));
776 }
777 code = ExtractVnodes(m, vol, vSmall, &fileList, &fl, where, 0, 0, 0);
778 if (code) {
779 sprintf(m->line,
780 "ExtractVnodes failed for %" AFS_VOLID_FMT " for files with code %d\n",
781 afs_printable_VolumeId_lu(V_id(vol)), code);
782 rx_Write(m->call, m->line, strlen(m->line));
783 return code;
784 }
785 if (verbose) {
786 sprintf(m->line, "5th step: find all small vnodes belonging to the subtree under %u \"%s\"\n",
787 where, name);
788 rx_Write(m->call, m->line, strlen(m->line));
789 }
790 FindVnodes(m, where, fileList, fl, dirList, dl, &filesNeeded, 0);
791
792 /*
793 * Third step: create hard links for all files needed
794 *
795 */
796
797 V_destroyMe(newvol) = DESTROY_ME;
798 V_inService(newvol) = 0;
799 if (verbose) {
800 sprintf(m->line, "6th step: create hard links in the AFSIDat tree between files of the old and new volume\n");
801 rx_Write(m->call, m->line, strlen(m->line));
802 }
803 code = copyVnodes(m, vol, newvol, 1, fileList, fl, where, &blocks, 0);
804 if (code) {
805 sprintf(m->line, "copyVnodes for files failed with code %d\n", code);
806 rx_Write(m->call, m->line, strlen(m->line));
807 return code;
808 }
809
810 /*
811 * Forth step: create hard links for all directories and copy
812 * split directory to new root directory
813 */
814
815 if (verbose) {
816 sprintf(m->line, "7th step: create hard links in the AFSIDat tree between directories of the old and new volume and make dir %u to new volume's root directory.\n",
817 where);
818 rx_Write(m->call, m->line, strlen(m->line));
819 }
820 code = copyVnodes(m, vol, newvol, 0, dirList, dl, where, &blocks, parVnode);
821 if (code) {
822 sprintf(m->line, "copyVnodes for directories failed with code %d\n", code);
823 rx_Write(m->call, m->line, strlen(m->line));
824 return code;
825 }
826
827 /*
828 * Finalize new volume
829 *
830 */
831 if (verbose) {
832 sprintf(m->line, "8th step: write new volume's metadata to disk\n");
833 rx_Write(m->call, m->line, strlen(m->line));
834 }
835
836 V_diskused(newvol) = blocks;
837 #ifdef AFS_RXOSD_SUPPORT
838 V_osdFlag(newvol) = V_osdFlag(vol);
839 #endif
840 V_filecount(newvol) = filesNeeded + dirsNeeded;
841 V_destroyMe(newvol) = 0;
842 V_maxquota(newvol) = V_maxquota(vol);
843 V_uniquifier(newvol) = V_uniquifier(vol);
844 V_inService(newvol) = 1;
845 VUpdateVolume(&code, newvol);
846
847 /*
848 * Sixth step: change directory entry in old volume:
849 * rename old tree and create mount point for new volume.
850 */
851 if (verbose) {
852 sprintf(m->line, "9th step: create mountpoint \"%s\" for new volume in old volume's directory %u.\n", name, parent);
853 rx_Write(m->call, m->line, strlen(m->line));
854 }
855
856 code = createMountpoint(vol, newvol, parVnode, parent, rootVnode, name);
857 if (code) {
858 sprintf(m->line, "createMountpoint failed with code %d\n", code);
859 rx_Write(m->call, m->line, strlen(m->line));
860 return code;
861 }
862 /*
863 * Now both volumes should be ready and consistent, but the old volume
864 * contains still the vnodes and data we transferred into the new one.
865 * Delete orphaned vnodes and data.
866 */
867
868 blocks = 0;
869 if (verbose) {
870 sprintf(m->line, "10th step: delete large vnodes belonging to subtree in the old volume.\n");
871 rx_Write(m->call, m->line, strlen(m->line));
872 }
873 deleteVnodes(vol, vLarge, dirList, dl, &blocks);
874 if (verbose) {
875 sprintf(m->line, "11th step: delete small vnodes belonging to subtree in the old volume.\n");
876 rx_Write(m->call, m->line, strlen(m->line));
877 }
878 deleteVnodes(vol, vSmall, fileList, fl, &blocks);
879 V_diskused(vol) -= blocks;
880 V_filecount(vol) -= (filesNeeded + dirsNeeded + 1);
881 VUpdateVolume(&code, vol);
882
883 sprintf(m->line, "Finished!\n");
884 rx_Write(m->call, m->line, strlen(m->line));
885 m->line[0] = 0;
886 m->line[1] = 0;
887 m->line[2] = 0;
888 m->line[3] = 0;
889 rx_Write(m->call, m->line, 4);
890 free(m);
891 return code;
892 }
893 #endif