Commit | Line | Data |
---|---|---|
805e021f CE |
1 | /* |
2 | * Copyright 2000, International Business Machines Corporation and others. | |
3 | * All Rights Reserved. | |
4 | * | |
5 | * This software has been released under the terms of the IBM Public | |
6 | * License. For details, see the LICENSE file in the top-level source | |
7 | * directory or online at http://www.openafs.org/dl/license10.html | |
8 | */ | |
9 | ||
10 | /* | |
11 | * Implements: | |
12 | * afs_lookup | |
13 | * EvalMountPoint | |
14 | * afs_DoBulkStat | |
15 | */ | |
16 | ||
17 | #include <afsconfig.h> | |
18 | #include "afs/param.h" | |
19 | ||
20 | #include "afs/sysincludes.h" /* Standard vendor system headers */ | |
21 | #include "afsincludes.h" /* Afs-based standard headers */ | |
22 | #include "afs/afs_stats.h" /* statistics */ | |
23 | #include "afs/afs_cbqueue.h" | |
24 | #include "afs/nfsclient.h" | |
25 | #include "afs/exporter.h" | |
26 | #include "afs/afs_osidnlc.h" | |
27 | #include "afs/afs_dynroot.h" | |
28 | ||
29 | extern struct vcache *afs_globalVp; | |
30 | ||
31 | afs_int32 afs_bkvolpref = 0; | |
32 | afs_int32 afs_bulkStatsDone; | |
33 | static int bulkStatCounter = 0; /* counter for bulk stat seq. numbers */ | |
34 | int afs_fakestat_enable = 0; /* 1: fakestat-all, 2: fakestat-crosscell */ | |
35 | ||
36 | ||
37 | /* this would be faster if it did comparison as int32word, but would be | |
38 | * dependant on byte-order and alignment, and I haven't figured out | |
39 | * what "@sys" is in binary... */ | |
40 | #define AFS_EQ_ATSYS(name) (((name)[0]=='@')&&((name)[1]=='s')&&((name)[2]=='y')&&((name)[3]=='s')&&(!(name)[4])) | |
41 | ||
42 | /* call under write lock, evaluate mvid.target_root field from a mt pt. | |
43 | * avc is the vnode of the mount point object; must be write-locked. | |
44 | * advc is the vnode of the containing directory (optional; if NULL and | |
45 | * EvalMountPoint succeeds, caller must initialize *avolpp->dotdot) | |
46 | * avolpp is where we return a pointer to the volume named by the mount pt, if success | |
47 | * areq is the identity of the caller. | |
48 | * | |
49 | * NOTE: this function returns a held volume structure in *volpp if it returns 0! | |
50 | */ | |
51 | static int | |
52 | EvalMountData(char type, char *data, afs_uint32 states, afs_uint32 cellnum, | |
53 | struct volume **avolpp, struct vrequest *areq, | |
54 | afs_uint32 *acellidxp, afs_uint32 *avolnump, | |
55 | afs_uint32 *avnoidp, afs_uint32 *auniqp) | |
56 | { | |
57 | struct volume *tvp = 0; | |
58 | struct VenusFid tfid; | |
59 | struct cell *tcell; | |
60 | char *cpos, *volnamep = NULL; | |
61 | char *buf, *endptr; | |
62 | afs_int32 prefetch; /* 1=>None 2=>RO 3=>BK */ | |
63 | afs_int32 mtptCell, assocCell = 0, hac = 0; | |
64 | afs_int32 samecell, roname, len; | |
65 | afs_uint32 volid = 0, cellidx, vnoid = 0, uniq = 0; | |
66 | ||
67 | /* Start by figuring out and finding the cell */ | |
68 | cpos = afs_strchr(data, ':'); /* if cell name present */ | |
69 | if (cpos) { | |
70 | afs_uint32 mtptCellnum; | |
71 | volnamep = cpos + 1; | |
72 | *cpos = 0; | |
73 | if ((afs_strtoi_r(data, &endptr, &mtptCellnum) == 0) && | |
74 | (endptr == cpos)) { | |
75 | tcell = afs_GetCell(mtptCellnum, READ_LOCK); | |
76 | } else { | |
77 | tcell = afs_GetCellByName(data, READ_LOCK); | |
78 | } | |
79 | *cpos = ':'; | |
80 | } else if (cellnum) { | |
81 | volnamep = data; | |
82 | tcell = afs_GetCell(cellnum, READ_LOCK); | |
83 | } else { | |
84 | /* No cellname or cellnum; return ENODEV */ | |
85 | return ENODEV; | |
86 | } | |
87 | if (!tcell) { | |
88 | /* no cell found; return ENODEV */ | |
89 | return ENODEV; | |
90 | } | |
91 | ||
92 | cellidx = tcell->cellIndex; | |
93 | mtptCell = tcell->cellNum; /* The cell for the mountpoint */ | |
94 | if (tcell->lcellp) { | |
95 | hac = 1; /* has associated cell */ | |
96 | assocCell = tcell->lcellp->cellNum; /* The associated cell */ | |
97 | } | |
98 | afs_PutCell(tcell, READ_LOCK); | |
99 | ||
100 | /* If there's nothing to look up, we can't proceed */ | |
101 | if (!*volnamep) | |
102 | return ENODEV; | |
103 | ||
104 | /* cell found. figure out volume */ | |
105 | cpos = afs_strchr(volnamep, ':'); | |
106 | if (cpos) | |
107 | *cpos = 0; | |
108 | ||
109 | /* Look for an all-numeric volume ID */ | |
110 | if ((afs_strtoi_r(volnamep, &endptr, &volid) == 0) && | |
111 | ((endptr == cpos) || (!*endptr))) | |
112 | { | |
113 | /* Ok. Is there a vnode and uniq? */ | |
114 | if (cpos) { | |
115 | char *vnodep = (char *)(cpos + 1); | |
116 | char *uniqp = NULL; | |
117 | if ((!*vnodep) /* no vnode after colon */ | |
118 | || !(uniqp = afs_strchr(vnodep, ':')) /* no colon for uniq */ | |
119 | || (!*(++uniqp)) /* no uniq after colon */ | |
120 | || (afs_strtoi_r(vnodep, &endptr, &vnoid) != 0) /* bad vno */ | |
121 | || (*endptr != ':') /* bad vnode field */ | |
122 | || (afs_strtoi_r(uniqp, &endptr, &uniq) != 0) /* bad uniq */ | |
123 | || (*endptr)) /* anything after uniq */ | |
124 | { | |
125 | *cpos = ':'; | |
126 | /* sorry. vnode and uniq, or nothing */ | |
127 | return ENODEV; | |
128 | } | |
129 | } | |
130 | } else | |
131 | volid = 0; | |
132 | ||
133 | /* | |
134 | * If the volume ID was all-numeric, and they didn't ask for a | |
135 | * pointer to the volume structure, then just return the number | |
136 | * as-is. This is currently only used for handling name lookups | |
137 | * in the dynamic mount directory. | |
138 | */ | |
139 | if (volid && !avolpp) { | |
140 | if (cpos) | |
141 | *cpos = ':'; | |
142 | goto done; | |
143 | } | |
144 | ||
145 | /* | |
146 | * If the volume ID was all-numeric, and the type was '%', then | |
147 | * assume whoever made the mount point knew what they were doing, | |
148 | * and don't second-guess them by forcing use of a RW volume when | |
149 | * they gave the ID of something else. | |
150 | */ | |
151 | if (volid && type == '%') { | |
152 | tfid.Fid.Volume = volid; /* remember BK volume */ | |
153 | tfid.Cell = mtptCell; | |
154 | tvp = afs_GetVolume(&tfid, areq, WRITE_LOCK); /* get the new one */ | |
155 | if (cpos) /* one way or another we're done */ | |
156 | *cpos = ':'; | |
157 | if (!tvp) | |
158 | return ENODEV; /* afs_GetVolume failed; return ENODEV */ | |
159 | goto done; | |
160 | } | |
161 | ||
162 | /* Is volume name a "<n>.backup" or "<n>.readonly" name */ | |
163 | len = strlen(volnamep); | |
164 | roname = ((len > 9) && (strcmp(&volnamep[len - 9], ".readonly") == 0)) | |
165 | || ((len > 7) && (strcmp(&volnamep[len - 7], ".backup") == 0)); | |
166 | ||
167 | /* When we cross mountpoint, do we stay in the same cell */ | |
168 | samecell = (cellnum == mtptCell) || (hac && (cellnum == assocCell)); | |
169 | ||
170 | /* Decide whether to prefetch the BK, or RO. Also means we want the BK or | |
171 | * RO. | |
172 | * If this is a regular mountpoint with a RW volume name | |
173 | * - If BK preference is enabled AND we remain within the same cell AND | |
174 | * start from a BK volume, then we will want to prefetch the BK volume. | |
175 | * - If we cross a cell boundary OR start from a RO volume, then we will | |
176 | * want to prefetch the RO volume. | |
177 | */ | |
178 | if ((type == '#') && !roname) { | |
179 | if (afs_bkvolpref && samecell && (states & CBackup)) | |
180 | prefetch = 3; /* Prefetch the BK */ | |
181 | else if (!samecell || (states & CRO)) | |
182 | prefetch = 2; /* Prefetch the RO */ | |
183 | else | |
184 | prefetch = 1; /* Do not prefetch */ | |
185 | } else { | |
186 | prefetch = 1; /* Do not prefetch */ | |
187 | } | |
188 | ||
189 | /* Get the volume struct. Unless this volume name has ".readonly" or | |
190 | * ".backup" in it, this will get the volume struct for the RW volume. | |
191 | * The RO volume will be prefetched if requested (but not returned). | |
192 | * Set up to use volname first. | |
193 | */ | |
194 | tvp = afs_GetVolumeByName(volnamep, mtptCell, prefetch, areq, WRITE_LOCK); | |
195 | ||
196 | /* If no volume was found in this cell, try the associated linked cell */ | |
197 | if (!tvp && hac && areq->volumeError) { | |
198 | tvp = | |
199 | afs_GetVolumeByName(volnamep, assocCell, prefetch, areq, | |
200 | WRITE_LOCK); | |
201 | } | |
202 | ||
203 | /* Still not found. If we are looking for the RO, then perhaps the RW | |
204 | * doesn't exist? Try adding ".readonly" to volname and look for that. | |
205 | * Don't know why we do this. Would have still found it in above call - jpm. | |
206 | */ | |
207 | if (!tvp && (prefetch == 2) && len < AFS_SMALLOCSIZ - 10) { | |
208 | buf = osi_AllocSmallSpace(len + 10); | |
209 | ||
210 | strcpy(buf, volnamep); | |
211 | afs_strcat(buf, ".readonly"); | |
212 | ||
213 | tvp = afs_GetVolumeByName(buf, mtptCell, 1, areq, WRITE_LOCK); | |
214 | ||
215 | /* Try the associated linked cell if failed */ | |
216 | if (!tvp && hac && areq->volumeError) { | |
217 | tvp = afs_GetVolumeByName(buf, assocCell, 1, areq, WRITE_LOCK); | |
218 | } | |
219 | osi_FreeSmallSpace(buf); | |
220 | } | |
221 | /* done with volname */ | |
222 | if (cpos) | |
223 | *cpos = ':'; | |
224 | if (!tvp) | |
225 | return ENODEV; /* Couldn't find the volume */ | |
226 | else | |
227 | volid = tvp->volume; | |
228 | ||
229 | /* Don't cross mountpoint from a BK to a BK volume */ | |
230 | if ((states & CBackup) && (tvp->states & VBackup)) { | |
231 | afs_PutVolume(tvp, WRITE_LOCK); | |
232 | return ENODEV; | |
233 | } | |
234 | ||
235 | /* If we want (prefetched) the BK and it exists, then drop the RW volume | |
236 | * and get the BK. | |
237 | * Otherwise, if we want (prefetched0 the RO and it exists, then drop the | |
238 | * RW volume and get the RO. | |
239 | * Otherwise, go with the RW. | |
240 | */ | |
241 | if ((prefetch == 3) && tvp->backVol) { | |
242 | tfid.Fid.Volume = tvp->backVol; /* remember BK volume */ | |
243 | tfid.Cell = tvp->cell; | |
244 | afs_PutVolume(tvp, WRITE_LOCK); /* release old volume */ | |
245 | tvp = afs_GetVolume(&tfid, areq, WRITE_LOCK); /* get the new one */ | |
246 | if (!tvp) | |
247 | return ENODEV; /* oops, can't do it */ | |
248 | } else if ((prefetch >= 2) && tvp->roVol) { | |
249 | tfid.Fid.Volume = tvp->roVol; /* remember RO volume */ | |
250 | tfid.Cell = tvp->cell; | |
251 | afs_PutVolume(tvp, WRITE_LOCK); /* release old volume */ | |
252 | tvp = afs_GetVolume(&tfid, areq, WRITE_LOCK); /* get the new one */ | |
253 | if (!tvp) | |
254 | return ENODEV; /* oops, can't do it */ | |
255 | } | |
256 | ||
257 | done: | |
258 | if (acellidxp) | |
259 | *acellidxp = cellidx; | |
260 | if (avolnump) | |
261 | *avolnump = volid; | |
262 | if (avnoidp) | |
263 | *avnoidp = vnoid; | |
264 | if (auniqp) | |
265 | *auniqp = uniq; | |
266 | if (avolpp) | |
267 | *avolpp = tvp; | |
268 | else if (tvp) | |
269 | afs_PutVolume(tvp, WRITE_LOCK); | |
270 | return 0; | |
271 | } | |
272 | ||
273 | int | |
274 | EvalMountPoint(struct vcache *avc, struct vcache *advc, | |
275 | struct volume **avolpp, struct vrequest *areq) | |
276 | { | |
277 | afs_int32 code; | |
278 | afs_uint32 avnoid, auniq; | |
279 | ||
280 | AFS_STATCNT(EvalMountPoint); | |
281 | #ifdef notdef | |
282 | if (avc->mvid.target_root && (avc->f.states & CMValid)) | |
283 | return 0; /* done while racing */ | |
284 | #endif | |
285 | *avolpp = NULL; | |
286 | code = afs_HandleLink(avc, areq); | |
287 | if (code) | |
288 | return code; | |
289 | ||
290 | /* Determine which cell and volume the mointpoint goes to */ | |
291 | code = EvalMountData(avc->linkData[0], avc->linkData + 1, | |
292 | avc->f.states, avc->f.fid.Cell, avolpp, areq, 0, 0, | |
293 | &avnoid, &auniq); | |
294 | if (code) return code; | |
295 | ||
296 | if (!avnoid) | |
297 | avnoid = 1; | |
298 | ||
299 | if (!auniq) | |
300 | auniq = 1; | |
301 | ||
302 | if (avc->mvid.target_root == NULL) | |
303 | avc->mvid.target_root = osi_AllocSmallSpace(sizeof(struct VenusFid)); | |
304 | avc->mvid.target_root->Cell = (*avolpp)->cell; | |
305 | avc->mvid.target_root->Fid.Volume = (*avolpp)->volume; | |
306 | avc->mvid.target_root->Fid.Vnode = avnoid; | |
307 | avc->mvid.target_root->Fid.Unique = auniq; | |
308 | avc->f.states |= CMValid; | |
309 | ||
310 | /* Used to: if the mount point is stored within a backup volume, | |
311 | * then we should only update the parent pointer information if | |
312 | * there's none already set, so as to avoid updating a volume's .. | |
313 | * info with something in an OldFiles directory. | |
314 | * | |
315 | * Next two lines used to be under this if: | |
316 | * | |
317 | * if (!(avc->f.states & CBackup) || tvp->dotdot.Fid.Volume == 0) | |
318 | * | |
319 | * Now: update mount point back pointer on every call, so that we handle | |
320 | * multiple mount points better. This way, when du tries to go back | |
321 | * via chddir(".."), it will end up exactly where it started, yet | |
322 | * cd'ing via a new path to a volume will reset the ".." pointer | |
323 | * to the new path. | |
324 | */ | |
325 | (*avolpp)->mtpoint = avc->f.fid; /* setup back pointer to mtpoint */ | |
326 | ||
327 | if (advc) | |
328 | (*avolpp)->dotdot = advc->f.fid; | |
329 | ||
330 | return 0; | |
331 | } | |
332 | ||
333 | /* | |
334 | * afs_InitFakeStat | |
335 | * | |
336 | * Must be called on an afs_fakestat_state object before calling | |
337 | * afs_EvalFakeStat or afs_PutFakeStat. Calling afs_PutFakeStat | |
338 | * without calling afs_EvalFakeStat is legal, as long as this | |
339 | * function is called. | |
340 | */ | |
341 | void | |
342 | afs_InitFakeStat(struct afs_fakestat_state *state) | |
343 | { | |
344 | if (!afs_fakestat_enable) | |
345 | return; | |
346 | ||
347 | state->valid = 1; | |
348 | state->did_eval = 0; | |
349 | state->need_release = 0; | |
350 | } | |
351 | ||
352 | /* | |
353 | * afs_EvalFakeStat_int | |
354 | * | |
355 | * The actual implementation of afs_EvalFakeStat and afs_TryEvalFakeStat, | |
356 | * which is called by those wrapper functions. | |
357 | * | |
358 | * Only issues RPCs if canblock is non-zero. | |
359 | */ | |
360 | static int | |
361 | afs_EvalFakeStat_int(struct vcache **avcp, struct afs_fakestat_state *state, | |
362 | struct vrequest *areq, int canblock) | |
363 | { | |
364 | struct vcache *tvc, *root_vp; | |
365 | struct volume *tvolp = NULL; | |
366 | int code = 0; | |
367 | ||
368 | if (!afs_fakestat_enable) | |
369 | return 0; | |
370 | ||
371 | osi_Assert(state->valid == 1); | |
372 | osi_Assert(state->did_eval == 0); | |
373 | state->did_eval = 1; | |
374 | ||
375 | tvc = *avcp; | |
376 | if (tvc->mvstat != AFS_MVSTAT_MTPT) | |
377 | return 0; | |
378 | ||
379 | if (canblock) { | |
380 | /* Is the call to VerifyVCache really necessary? */ | |
381 | code = afs_VerifyVCache(tvc, areq); | |
382 | if (code) | |
383 | goto done; | |
384 | ||
385 | ObtainWriteLock(&tvc->lock, 599); | |
386 | code = EvalMountPoint(tvc, NULL, &tvolp, areq); | |
387 | ReleaseWriteLock(&tvc->lock); | |
388 | if (code) | |
389 | goto done; | |
390 | if (tvolp) { | |
391 | tvolp->dotdot = tvc->f.fid; | |
392 | tvolp->dotdot.Fid.Vnode = tvc->f.parent.vnode; | |
393 | tvolp->dotdot.Fid.Unique = tvc->f.parent.unique; | |
394 | } | |
395 | } | |
396 | if (tvc->mvid.target_root && (tvc->f.states & CMValid)) { | |
397 | if (!canblock) { | |
398 | afs_int32 retry; | |
399 | ||
400 | do { | |
401 | retry = 0; | |
402 | ObtainWriteLock(&afs_xvcache, 597); | |
403 | root_vp = afs_FindVCache(tvc->mvid.target_root, &retry, IS_WLOCK); | |
404 | if (root_vp && retry) { | |
405 | ReleaseWriteLock(&afs_xvcache); | |
406 | afs_PutVCache(root_vp); | |
407 | } | |
408 | } while (root_vp && retry); | |
409 | ReleaseWriteLock(&afs_xvcache); | |
410 | } else { | |
411 | root_vp = afs_GetVCache(tvc->mvid.target_root, areq, NULL, NULL); | |
412 | } | |
413 | if (!root_vp) { | |
414 | code = canblock ? EIO : 0; | |
415 | goto done; | |
416 | } | |
417 | #ifdef AFS_DARWIN80_ENV | |
418 | root_vp->f.m.Type = VDIR; | |
419 | AFS_GUNLOCK(); | |
420 | code = afs_darwin_finalizevnode(root_vp, NULL, NULL, 0, 0); | |
421 | AFS_GLOCK(); | |
422 | if (code) goto done; | |
423 | vnode_ref(AFSTOV(root_vp)); | |
424 | #endif | |
425 | if (tvolp && !afs_InReadDir(root_vp)) { | |
426 | /* Is this always kosher? Perhaps we should instead use | |
427 | * NBObtainWriteLock to avoid potential deadlock. | |
428 | */ | |
429 | ObtainWriteLock(&root_vp->lock, 598); | |
430 | if (!root_vp->mvid.parent) | |
431 | root_vp->mvid.parent = osi_AllocSmallSpace(sizeof(struct VenusFid)); | |
432 | *root_vp->mvid.parent = tvolp->dotdot; | |
433 | ReleaseWriteLock(&root_vp->lock); | |
434 | } | |
435 | state->need_release = 1; | |
436 | state->root_vp = root_vp; | |
437 | *avcp = root_vp; | |
438 | code = 0; | |
439 | } else { | |
440 | code = canblock ? EIO : 0; | |
441 | } | |
442 | ||
443 | done: | |
444 | if (tvolp) | |
445 | afs_PutVolume(tvolp, WRITE_LOCK); | |
446 | return code; | |
447 | } | |
448 | ||
449 | /* | |
450 | * afs_EvalFakeStat | |
451 | * | |
452 | * Automatically does the equivalent of EvalMountPoint for vcache entries | |
453 | * which are mount points. Remembers enough state to properly release | |
454 | * the volume root vcache when afs_PutFakeStat() is called. | |
455 | * | |
456 | * State variable must be initialized by afs_InitFakeState() beforehand. | |
457 | * | |
458 | * Returns 0 when everything succeeds and *avcp points to the vcache entry | |
459 | * that should be used for the real vnode operation. Returns non-zero if | |
460 | * something goes wrong and the error code should be returned to the user. | |
461 | */ | |
462 | int | |
463 | afs_EvalFakeStat(struct vcache **avcp, struct afs_fakestat_state *state, | |
464 | struct vrequest *areq) | |
465 | { | |
466 | return afs_EvalFakeStat_int(avcp, state, areq, 1); | |
467 | } | |
468 | ||
469 | /* | |
470 | * afs_TryEvalFakeStat | |
471 | * | |
472 | * Same as afs_EvalFakeStat, but tries not to talk to remote servers | |
473 | * and only evaluate the mount point if all the data is already in | |
474 | * local caches. | |
475 | * | |
476 | * Returns 0 if everything succeeds and *avcp points to a valid | |
477 | * vcache entry (possibly evaluated). | |
478 | */ | |
479 | int | |
480 | afs_TryEvalFakeStat(struct vcache **avcp, struct afs_fakestat_state *state, | |
481 | struct vrequest *areq) | |
482 | { | |
483 | return afs_EvalFakeStat_int(avcp, state, areq, 0); | |
484 | } | |
485 | ||
486 | /* | |
487 | * afs_PutFakeStat | |
488 | * | |
489 | * Perform any necessary cleanup at the end of a vnode op, given that | |
490 | * afs_InitFakeStat was previously called with this state. | |
491 | */ | |
492 | void | |
493 | afs_PutFakeStat(struct afs_fakestat_state *state) | |
494 | { | |
495 | if (!afs_fakestat_enable) | |
496 | return; | |
497 | ||
498 | osi_Assert(state->valid == 1); | |
499 | if (state->need_release) | |
500 | afs_PutVCache(state->root_vp); | |
501 | state->valid = 0; | |
502 | } | |
503 | ||
504 | int | |
505 | afs_ENameOK(char *aname) | |
506 | { | |
507 | int tlen; | |
508 | ||
509 | AFS_STATCNT(ENameOK); | |
510 | tlen = strlen(aname); | |
511 | if (tlen >= 4 && strcmp(aname + tlen - 4, "@sys") == 0) | |
512 | return 0; | |
513 | return 1; | |
514 | } | |
515 | ||
516 | static int | |
517 | afs_getsysname(struct vrequest *areq, struct vcache *adp, | |
518 | char *bufp, int *num, char **sysnamelist[]) | |
519 | { | |
520 | struct unixuser *au; | |
521 | afs_int32 error; | |
522 | ||
523 | AFS_STATCNT(getsysname); | |
524 | ||
525 | *sysnamelist = afs_sysnamelist; | |
526 | ||
527 | if (!afs_nfsexporter) | |
528 | strcpy(bufp, (*sysnamelist)[0]); | |
529 | else { | |
530 | au = afs_GetUser(areq->uid, adp->f.fid.Cell, READ_LOCK); | |
531 | if (au->exporter) { | |
532 | error = EXP_SYSNAME(au->exporter, (char *)0, sysnamelist, num, 0); | |
533 | if (error) { | |
534 | strcpy(bufp, "@sys"); | |
535 | afs_PutUser(au, READ_LOCK); | |
536 | return -1; | |
537 | } else { | |
538 | strcpy(bufp, (*sysnamelist)[0]); | |
539 | } | |
540 | } else | |
541 | strcpy(bufp, afs_sysname); | |
542 | afs_PutUser(au, READ_LOCK); | |
543 | } | |
544 | return 0; | |
545 | } | |
546 | ||
547 | void | |
548 | Check_AtSys(struct vcache *avc, const char *aname, | |
549 | struct sysname_info *state, struct vrequest *areq) | |
550 | { | |
551 | int num = 0; | |
552 | char **sysnamelist[MAXNUMSYSNAMES]; | |
553 | ||
554 | if (AFS_EQ_ATSYS(aname)) { | |
555 | state->offset = 0; | |
556 | state->name = osi_AllocLargeSpace(MAXSYSNAME); | |
557 | state->allocked = 1; | |
558 | state->index = | |
559 | afs_getsysname(areq, avc, state->name, &num, sysnamelist); | |
560 | } else { | |
561 | state->offset = -1; | |
562 | state->allocked = 0; | |
563 | state->index = 0; | |
564 | state->name = (char *)aname; | |
565 | } | |
566 | } | |
567 | ||
568 | int | |
569 | Next_AtSys(struct vcache *avc, struct vrequest *areq, | |
570 | struct sysname_info *state) | |
571 | { | |
572 | int num = afs_sysnamecount; | |
573 | char **sysnamelist[MAXNUMSYSNAMES]; | |
574 | ||
575 | if (state->index == -1) | |
576 | return 0; /* No list */ | |
577 | ||
578 | /* Check for the initial state of aname != "@sys" in Check_AtSys */ | |
579 | if (state->offset == -1 && state->allocked == 0) { | |
580 | char *tname; | |
581 | ||
582 | /* Check for .*@sys */ | |
583 | for (tname = state->name; *tname; tname++) | |
584 | /*Move to the end of the string */ ; | |
585 | ||
586 | if ((tname > state->name + 4) && (AFS_EQ_ATSYS(tname - 4))) { | |
587 | state->offset = (tname - 4) - state->name; | |
588 | tname = osi_AllocLargeSpace(AFS_LRALLOCSIZ); | |
589 | strncpy(tname, state->name, state->offset); | |
590 | state->name = tname; | |
591 | state->allocked = 1; | |
592 | num = 0; | |
593 | state->index = | |
594 | afs_getsysname(areq, avc, state->name + state->offset, &num, | |
595 | sysnamelist); | |
596 | return 1; | |
597 | } else | |
598 | return 0; /* .*@sys doesn't match either */ | |
599 | } else { | |
600 | struct unixuser *au; | |
601 | afs_int32 error; | |
602 | ||
603 | *sysnamelist = afs_sysnamelist; | |
604 | ||
605 | if (afs_nfsexporter) { | |
606 | au = afs_GetUser(areq->uid, avc->f.fid.Cell, READ_LOCK); | |
607 | if (au->exporter) { | |
608 | error = | |
609 | EXP_SYSNAME(au->exporter, (char *)0, sysnamelist, &num, 0); | |
610 | if (error) { | |
611 | afs_PutUser(au, READ_LOCK); | |
612 | return 0; | |
613 | } | |
614 | } | |
615 | afs_PutUser(au, READ_LOCK); | |
616 | } | |
617 | if (++(state->index) >= num || !(*sysnamelist)[(unsigned int)state->index]) | |
618 | return 0; /* end of list */ | |
619 | } | |
620 | strcpy(state->name + state->offset, (*sysnamelist)[(unsigned int)state->index]); | |
621 | return 1; | |
622 | } | |
623 | ||
624 | static int | |
625 | afs_CheckBulkStatus(struct afs_conn *tc, int nFids, AFSBulkStats *statParm, | |
626 | AFSCBs *cbParm) | |
627 | { | |
628 | int i; | |
629 | int code; | |
630 | ||
631 | if (statParm->AFSBulkStats_len != nFids || cbParm->AFSCBs_len != nFids) { | |
632 | afs_warn("afs: BulkFetchStatus length %u/%u, expected %u\n", | |
633 | (unsigned)statParm->AFSBulkStats_len, | |
634 | (unsigned)cbParm->AFSCBs_len, nFids); | |
635 | afs_BadFetchStatus(tc); | |
636 | return VBUSY; | |
637 | } | |
638 | for (i = 0; i < nFids; i++) { | |
639 | if (statParm->AFSBulkStats_val[i].errorCode) { | |
640 | continue; | |
641 | } | |
642 | code = afs_CheckFetchStatus(tc, &statParm->AFSBulkStats_val[i]); | |
643 | if (code) { | |
644 | return code; | |
645 | } | |
646 | } | |
647 | ||
648 | return 0; | |
649 | } | |
650 | ||
651 | extern int BlobScan(struct dcache * afile, afs_int32 ablob, afs_int32 *ablobOut); | |
652 | ||
653 | /* called with an unlocked directory and directory cookie. Areqp | |
654 | * describes who is making the call. | |
655 | * Scans the next N (about 30, typically) directory entries, and does | |
656 | * a bulk stat call to stat them all. | |
657 | * | |
658 | * Must be very careful when merging in RPC responses, since we dont | |
659 | * want to overwrite newer info that was added by a file system mutating | |
660 | * call that ran concurrently with our bulk stat call. | |
661 | * | |
662 | * We do that, as described below, by not merging in our info (always | |
663 | * safe to skip the merge) if the status info is valid in the vcache entry. | |
664 | * | |
665 | * If adapt ever implements the bulk stat RPC, then this code will need to | |
666 | * ensure that vcaches created for failed RPC's to older servers have the | |
667 | * CForeign bit set. | |
668 | */ | |
669 | static struct vcache *BStvc = NULL; | |
670 | ||
671 | int | |
672 | afs_DoBulkStat(struct vcache *adp, long dirCookie, struct vrequest *areqp) | |
673 | { | |
674 | int nentries; /* # of entries to prefetch */ | |
675 | int nskip; /* # of slots in the LRU queue to skip */ | |
676 | #ifdef AFS_DARWIN80_ENV | |
677 | int npasses = 0; | |
678 | struct vnode *lruvp; | |
679 | #endif | |
680 | struct vcache *lruvcp; /* vcache ptr of our goal pos in LRU queue */ | |
681 | struct dcache *dcp; /* chunk containing the dir block */ | |
682 | afs_size_t temp; /* temp for holding chunk length, &c. */ | |
683 | struct AFSFid *fidsp; /* file IDs were collecting */ | |
684 | struct AFSCallBack *cbsp; /* call back pointers */ | |
685 | struct AFSCallBack *tcbp; /* temp callback ptr */ | |
686 | struct AFSFetchStatus *statsp; /* file status info */ | |
687 | struct AFSVolSync volSync; /* vol sync return info */ | |
688 | struct vcache *tvcp; /* temp vcp */ | |
689 | struct afs_q *tq; /* temp queue variable */ | |
690 | AFSCBFids fidParm; /* file ID parm for bulk stat */ | |
691 | AFSBulkStats statParm; /* stat info parm for bulk stat */ | |
692 | int fidIndex = 0; /* which file were stating */ | |
693 | struct afs_conn *tcp = 0; /* conn for call */ | |
694 | AFSCBs cbParm; /* callback parm for bulk stat */ | |
695 | struct server *hostp = 0; /* host we got callback from */ | |
696 | long startTime; /* time we started the call, | |
697 | * for callback expiration base | |
698 | */ | |
699 | #if defined(AFS_DARWIN_ENV) | |
700 | int ftype[4] = {VNON, VREG, VDIR, VLNK}; /* verify type is as expected */ | |
701 | #endif | |
702 | afs_size_t statSeqNo = 0; /* Valued of file size to detect races */ | |
703 | int code; /* error code */ | |
704 | afs_int32 newIndex; /* new index in the dir */ | |
705 | struct DirBuffer entry; /* Buffer for dir manipulation */ | |
706 | struct DirEntry *dirEntryp; /* dir entry we are examining */ | |
707 | int i; | |
708 | struct VenusFid afid; /* file ID we are using now */ | |
709 | struct VenusFid tfid; /* another temp. file ID */ | |
710 | afs_int32 retry; /* handle low-level SGI MP race conditions */ | |
711 | long volStates; /* flags from vol structure */ | |
712 | struct volume *volp = 0; /* volume ptr */ | |
713 | struct VenusFid dotdot = {0, {0, 0, 0}}; | |
714 | int flagIndex = 0; /* First file with bulk fetch flag set */ | |
715 | struct rx_connection *rxconn; | |
716 | XSTATS_DECLS; | |
717 | dotdot.Cell = 0; | |
718 | dotdot.Fid.Unique = 0; | |
719 | dotdot.Fid.Vnode = 0; | |
720 | ||
721 | /* first compute some basic parameters. We dont want to prefetch more | |
722 | * than a fraction of the cache in any given call, and we want to preserve | |
723 | * a portion of the LRU queue in any event, so as to avoid thrashing | |
724 | * the entire stat cache (we will at least leave some of it alone). | |
725 | * presently dont stat more than 1/8 the cache in any one call. */ | |
726 | nentries = afs_cacheStats / 8; | |
727 | ||
728 | /* dont bother prefetching more than one calls worth of info */ | |
729 | if (nentries > AFSCBMAX) | |
730 | nentries = AFSCBMAX; | |
731 | ||
732 | /* heuristic to make sure that things fit in 4K. This means that | |
733 | * we shouldnt make it any bigger than 47 entries. I am typically | |
734 | * going to keep it a little lower, since we don't want to load | |
735 | * too much of the stat cache. | |
736 | */ | |
737 | if (nentries > 30) | |
738 | nentries = 30; | |
739 | ||
740 | /* now, to reduce the stack size, well allocate two 4K blocks, | |
741 | * one for fids and callbacks, and one for stat info. Well set | |
742 | * up our pointers to the memory from there, too. | |
743 | */ | |
744 | statsp = osi_Alloc(AFSCBMAX * sizeof(AFSFetchStatus)); | |
745 | fidsp = osi_AllocLargeSpace(nentries * sizeof(AFSFid)); | |
746 | cbsp = osi_Alloc(AFSCBMAX * sizeof(AFSCallBack)); | |
747 | ||
748 | /* next, we must iterate over the directory, starting from the specified | |
749 | * cookie offset (dirCookie), and counting out nentries file entries. | |
750 | * We skip files that already have stat cache entries, since we | |
751 | * dont want to bulk stat files that are already in the cache. | |
752 | */ | |
753 | tagain: | |
754 | code = afs_VerifyVCache(adp, areqp); | |
755 | if (code) | |
756 | goto done2; | |
757 | ||
758 | dcp = afs_GetDCache(adp, (afs_size_t) 0, areqp, &temp, &temp, 1); | |
759 | if (!dcp) { | |
760 | code = EIO; | |
761 | goto done2; | |
762 | } | |
763 | ||
764 | /* lock the directory cache entry */ | |
765 | ObtainReadLock(&adp->lock); | |
766 | ObtainReadLock(&dcp->lock); | |
767 | ||
768 | /* | |
769 | * Make sure that the data in the cache is current. There are two | |
770 | * cases we need to worry about: | |
771 | * 1. The cache data is being fetched by another process. | |
772 | * 2. The cache data is no longer valid | |
773 | */ | |
774 | while ((adp->f.states & CStatd) | |
775 | && (dcp->dflags & DFFetching) | |
776 | && hsame(adp->f.m.DataVersion, dcp->f.versionNo)) { | |
777 | afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, | |
778 | __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, dcp, | |
779 | ICL_TYPE_INT32, dcp->dflags); | |
780 | ReleaseReadLock(&dcp->lock); | |
781 | ReleaseReadLock(&adp->lock); | |
782 | afs_osi_Sleep(&dcp->validPos); | |
783 | ObtainReadLock(&adp->lock); | |
784 | ObtainReadLock(&dcp->lock); | |
785 | } | |
786 | if (!(adp->f.states & CStatd) | |
787 | || !hsame(adp->f.m.DataVersion, dcp->f.versionNo)) { | |
788 | ReleaseReadLock(&dcp->lock); | |
789 | ReleaseReadLock(&adp->lock); | |
790 | afs_PutDCache(dcp); | |
791 | goto tagain; | |
792 | } | |
793 | ||
794 | /* Generate a sequence number so we can tell whether we should | |
795 | * store the attributes when processing the response. This number is | |
796 | * stored in the file size when we set the CBulkFetching bit. If the | |
797 | * CBulkFetching is still set and this value hasn't changed, then | |
798 | * we know we were the last to set CBulkFetching bit for this file, | |
799 | * and it is safe to set the status information for this file. | |
800 | */ | |
801 | statSeqNo = bulkStatCounter++; | |
802 | /* ensure against wrapping */ | |
803 | if (statSeqNo == 0) | |
804 | statSeqNo = bulkStatCounter++; | |
805 | ||
806 | /* now we have dir data in the cache, so scan the dir page */ | |
807 | fidIndex = 0; | |
808 | flagIndex = 0; | |
809 | while (1) { /* Should probably have some constant bound */ | |
810 | /* look for first safe entry to examine in the directory. BlobScan | |
811 | * looks for a the 1st allocated dir after the dirCookie slot. | |
812 | */ | |
813 | code = BlobScan(dcp, (dirCookie >> 5), &newIndex); | |
814 | if (code || newIndex == 0) | |
815 | break; | |
816 | ||
817 | /* remember the updated directory cookie */ | |
818 | dirCookie = newIndex << 5; | |
819 | ||
820 | /* get a ptr to the dir entry */ | |
821 | code = afs_dir_GetBlob(dcp, newIndex, &entry); | |
822 | if (code) | |
823 | break; | |
824 | dirEntryp = (struct DirEntry *)entry.data; | |
825 | ||
826 | /* dont copy more than we have room for */ | |
827 | if (fidIndex >= nentries) { | |
828 | DRelease(&entry, 0); | |
829 | break; | |
830 | } | |
831 | ||
832 | /* now, if the dir entry looks good, copy it out to our list. Vnode | |
833 | * 0 means deleted, although it should also be free were it deleted. | |
834 | */ | |
835 | if (dirEntryp->fid.vnode != 0) { | |
836 | /* dont copy entries we have in our cache. This check will | |
837 | * also make us skip "." and probably "..", unless it has | |
838 | * disappeared from the cache since we did our namei call. | |
839 | */ | |
840 | tfid.Cell = adp->f.fid.Cell; | |
841 | tfid.Fid.Volume = adp->f.fid.Fid.Volume; | |
842 | tfid.Fid.Vnode = ntohl(dirEntryp->fid.vnode); | |
843 | tfid.Fid.Unique = ntohl(dirEntryp->fid.vunique); | |
844 | do { | |
845 | retry = 0; | |
846 | ObtainWriteLock(&afs_xvcache, 130); | |
847 | tvcp = afs_FindVCache(&tfid, &retry, IS_WLOCK /* no stats | LRU */ ); | |
848 | if (tvcp && retry) { | |
849 | ReleaseWriteLock(&afs_xvcache); | |
850 | afs_PutVCache(tvcp); | |
851 | } | |
852 | } while (tvcp && retry); | |
853 | if (!tvcp) { /* otherwise, create manually */ | |
854 | tvcp = afs_NewBulkVCache(&tfid, hostp, statSeqNo); | |
855 | if (tvcp) | |
856 | { | |
857 | ObtainWriteLock(&tvcp->lock, 505); | |
858 | #ifdef AFS_DARWIN80_ENV | |
859 | /* use even/odd hack to guess file versus dir. | |
860 | let links be reaped. oh well. */ | |
861 | if (dirEntryp->fid.vnode & 1) | |
862 | tvcp->f.m.Type = VDIR; | |
863 | else | |
864 | tvcp->f.m.Type = VREG; | |
865 | /* finalize to a best guess */ | |
866 | afs_darwin_finalizevnode(tvcp, AFSTOV(adp), NULL, 0, 1); | |
867 | /* re-acquire usecount that finalizevnode disposed of */ | |
868 | vnode_ref(AFSTOV(tvcp)); | |
869 | #endif | |
870 | ReleaseWriteLock(&afs_xvcache); | |
871 | afs_RemoveVCB(&tfid); | |
872 | ReleaseWriteLock(&tvcp->lock); | |
873 | } else { | |
874 | ReleaseWriteLock(&afs_xvcache); | |
875 | } | |
876 | } else { | |
877 | ReleaseWriteLock(&afs_xvcache); | |
878 | } | |
879 | if (!tvcp) | |
880 | { | |
881 | DRelease(&entry, 0); | |
882 | ReleaseReadLock(&dcp->lock); | |
883 | ReleaseReadLock(&adp->lock); | |
884 | afs_PutDCache(dcp); | |
885 | goto done; /* can happen if afs_NewVCache fails */ | |
886 | } | |
887 | ||
888 | /* WARNING: afs_DoBulkStat uses the Length field to store a | |
889 | * sequence number for each bulk status request. Under no | |
890 | * circumstances should afs_DoBulkStat store a sequence number | |
891 | * if the new length will be ignored when afs_ProcessFS is | |
892 | * called with new stats. */ | |
893 | #ifdef AFS_SGI_ENV | |
894 | if (!(tvcp->f.states & CStatd) | |
895 | && (!((tvcp->f.states & CBulkFetching) && | |
896 | (tvcp->f.m.Length != statSeqNo))) | |
897 | && (tvcp->execsOrWriters <= 0) | |
898 | && !afs_DirtyPages(tvcp) | |
899 | && !AFS_VN_MAPPED((vnode_t *) tvcp)) | |
900 | #else | |
901 | if (!(tvcp->f.states & CStatd) | |
902 | && (!((tvcp->f.states & CBulkFetching) && | |
903 | (tvcp->f.m.Length != statSeqNo))) | |
904 | && (tvcp->execsOrWriters <= 0) | |
905 | && !afs_DirtyPages(tvcp)) | |
906 | #endif | |
907 | ||
908 | { | |
909 | /* this entry doesnt exist in the cache, and is not | |
910 | * already being fetched by someone else, so add it to the | |
911 | * list of file IDs to obtain. | |
912 | * | |
913 | * We detect a callback breaking race condition by checking the | |
914 | * CBulkFetching state bit and the value in the file size. | |
915 | * It is safe to set the status only if the CBulkFetching | |
916 | * flag is still set and the value in the file size does | |
917 | * not change. NewBulkVCache sets us up for the new ones. | |
918 | * Set up the rest here. | |
919 | * | |
920 | * Don't fetch status for dirty files. We need to | |
921 | * preserve the value of the file size. We could | |
922 | * flush the pages, but it wouldn't be worthwhile. | |
923 | */ | |
924 | if (!(tvcp->f.states & CBulkFetching)) { | |
925 | tvcp->f.states |= CBulkFetching; | |
926 | tvcp->f.m.Length = statSeqNo; | |
927 | } | |
928 | memcpy((char *)(fidsp + fidIndex), (char *)&tfid.Fid, | |
929 | sizeof(*fidsp)); | |
930 | fidIndex++; | |
931 | } | |
932 | afs_PutVCache(tvcp); | |
933 | } | |
934 | ||
935 | /* if dir vnode has non-zero entry */ | |
936 | /* move to the next dir entry by adding in the # of entries | |
937 | * used by this dir entry. | |
938 | */ | |
939 | temp = afs_dir_NameBlobs(dirEntryp->name) << 5; | |
940 | DRelease(&entry, 0); | |
941 | if (temp <= 0) | |
942 | break; | |
943 | dirCookie += temp; | |
944 | } /* while loop over all dir entries */ | |
945 | ||
946 | /* now release the dir lock and prepare to make the bulk RPC */ | |
947 | ReleaseReadLock(&dcp->lock); | |
948 | ReleaseReadLock(&adp->lock); | |
949 | ||
950 | /* release the chunk */ | |
951 | afs_PutDCache(dcp); | |
952 | ||
953 | /* dont make a null call */ | |
954 | if (fidIndex == 0) | |
955 | goto done; | |
956 | ||
957 | do { | |
958 | /* setup the RPC parm structures */ | |
959 | fidParm.AFSCBFids_len = fidIndex; | |
960 | fidParm.AFSCBFids_val = fidsp; | |
961 | statParm.AFSBulkStats_len = fidIndex; | |
962 | statParm.AFSBulkStats_val = statsp; | |
963 | cbParm.AFSCBs_len = fidIndex; | |
964 | cbParm.AFSCBs_val = cbsp; | |
965 | ||
966 | /* start the timer; callback expirations are relative to this */ | |
967 | startTime = osi_Time(); | |
968 | ||
969 | tcp = afs_Conn(&adp->f.fid, areqp, SHARED_LOCK, &rxconn); | |
970 | if (tcp) { | |
971 | hostp = tcp->parent->srvr->server; | |
972 | ||
973 | for (i = 0; i < fidIndex; i++) { | |
974 | /* we must set tvcp->callback before the BulkStatus call, so | |
975 | * we can detect concurrent InitCallBackState's */ | |
976 | ||
977 | afid.Cell = adp->f.fid.Cell; | |
978 | afid.Fid.Volume = adp->f.fid.Fid.Volume; | |
979 | afid.Fid.Vnode = fidsp[i].Vnode; | |
980 | afid.Fid.Unique = fidsp[i].Unique; | |
981 | ||
982 | do { | |
983 | retry = 0; | |
984 | ObtainReadLock(&afs_xvcache); | |
985 | tvcp = afs_FindVCache(&afid, &retry, 0 /* !stats&!lru */); | |
986 | ReleaseReadLock(&afs_xvcache); | |
987 | } while (tvcp && retry); | |
988 | ||
989 | if (!tvcp) { | |
990 | continue; | |
991 | } | |
992 | ||
993 | if ((tvcp->f.states & CBulkFetching) && | |
994 | (tvcp->f.m.Length == statSeqNo)) { | |
995 | tvcp->callback = hostp; | |
996 | } | |
997 | ||
998 | afs_PutVCache(tvcp); | |
999 | tvcp = NULL; | |
1000 | } | |
1001 | ||
1002 | XSTATS_START_TIME(AFS_STATS_FS_RPCIDX_BULKSTATUS); | |
1003 | ||
1004 | if (!(tcp->parent->srvr->server->flags & SNO_INLINEBULK)) { | |
1005 | RX_AFS_GUNLOCK(); | |
1006 | code = | |
1007 | RXAFS_InlineBulkStatus(rxconn, &fidParm, &statParm, | |
1008 | &cbParm, &volSync); | |
1009 | RX_AFS_GLOCK(); | |
1010 | if (code == RXGEN_OPCODE) { | |
1011 | tcp->parent->srvr->server->flags |= SNO_INLINEBULK; | |
1012 | RX_AFS_GUNLOCK(); | |
1013 | code = | |
1014 | RXAFS_BulkStatus(rxconn, &fidParm, &statParm, | |
1015 | &cbParm, &volSync); | |
1016 | RX_AFS_GLOCK(); | |
1017 | } | |
1018 | } else { | |
1019 | RX_AFS_GUNLOCK(); | |
1020 | code = | |
1021 | RXAFS_BulkStatus(rxconn, &fidParm, &statParm, &cbParm, | |
1022 | &volSync); | |
1023 | RX_AFS_GLOCK(); | |
1024 | } | |
1025 | XSTATS_END_TIME; | |
1026 | ||
1027 | if (code == 0) { | |
1028 | code = afs_CheckBulkStatus(tcp, fidIndex, &statParm, &cbParm); | |
1029 | } | |
1030 | } else | |
1031 | code = -1; | |
1032 | /* make sure we give afs_Analyze a chance to retry, | |
1033 | * but if the RPC succeeded we may have entries to merge. | |
1034 | * if we wipe code with one entry's status we get bogus failures. | |
1035 | */ | |
1036 | } while (afs_Analyze | |
1037 | (tcp, rxconn, code ? code : (&statsp[0])->errorCode, | |
1038 | &adp->f.fid, areqp, AFS_STATS_FS_RPCIDX_BULKSTATUS, | |
1039 | SHARED_LOCK, NULL)); | |
1040 | ||
1041 | /* now, if we didnt get the info, bail out. */ | |
1042 | if (code) | |
1043 | goto done; | |
1044 | ||
1045 | /* we need vol flags to create the entries properly */ | |
1046 | dotdot.Fid.Volume = 0; | |
1047 | volp = afs_GetVolume(&adp->f.fid, areqp, READ_LOCK); | |
1048 | if (volp) { | |
1049 | volStates = volp->states; | |
1050 | if (volp->dotdot.Fid.Volume != 0) | |
1051 | dotdot = volp->dotdot; | |
1052 | } else | |
1053 | volStates = 0; | |
1054 | ||
1055 | /* find the place to merge the info into We do this by skipping | |
1056 | * nskip entries in the LRU queue. The more we skip, the more | |
1057 | * we preserve, since the head of the VLRU queue is the most recently | |
1058 | * referenced file. | |
1059 | */ | |
1060 | reskip: | |
1061 | nskip = afs_cacheStats / 2; /* preserved fraction of the cache */ | |
1062 | ObtainReadLock(&afs_xvcache); | |
1063 | #ifdef AFS_DARWIN80_ENV | |
1064 | reskip2: | |
1065 | #endif | |
1066 | if (QEmpty(&VLRU)) { | |
1067 | /* actually a serious error, probably should panic. Probably will | |
1068 | * panic soon, oh well. */ | |
1069 | ReleaseReadLock(&afs_xvcache); | |
1070 | afs_warnuser("afs_DoBulkStat: VLRU empty!"); | |
1071 | goto done; | |
1072 | } | |
1073 | if ((VLRU.next->prev != &VLRU) || (VLRU.prev->next != &VLRU)) { | |
1074 | refpanic("Bulkstat VLRU inconsistent"); | |
1075 | } | |
1076 | for (tq = VLRU.next; tq != &VLRU; tq = QNext(tq)) { | |
1077 | if (--nskip <= 0) { | |
1078 | #ifdef AFS_DARWIN80_ENV | |
1079 | if ((!(QTOV(tq)->f.states & CDeadVnode)&&!(QTOV(tq)->f.states & CVInit))) | |
1080 | #endif | |
1081 | break; | |
1082 | } | |
1083 | if (QNext(QPrev(tq)) != tq) { | |
1084 | BStvc = QTOV(tq); | |
1085 | refpanic("BulkStat VLRU inconsistent"); | |
1086 | } | |
1087 | } | |
1088 | if (tq != &VLRU) | |
1089 | lruvcp = QTOV(tq); | |
1090 | else | |
1091 | lruvcp = QTOV(VLRU.next); | |
1092 | ||
1093 | /* now we have to hold this entry, so that it does not get moved | |
1094 | * into the free list while we're running. It could still get | |
1095 | * moved within the lru queue, but hopefully that will be rare; it | |
1096 | * doesn't hurt nearly as much. | |
1097 | */ | |
1098 | retry = 0; | |
1099 | #ifdef AFS_DARWIN80_ENV | |
1100 | if (((lruvcp->f.states & CDeadVnode)||(lruvcp->f.states & CVInit))) { | |
1101 | if (npasses == 0) { | |
1102 | nskip = 1; | |
1103 | npasses++; | |
1104 | goto reskip2; | |
1105 | } else | |
1106 | panic("Can't find non-dead vnode in VLRU\n"); | |
1107 | } | |
1108 | lruvp = AFSTOV(lruvcp); | |
1109 | if (vnode_get(lruvp)) /* this bumps ref count */ | |
1110 | retry = 1; | |
1111 | else if (vnode_ref(lruvp)) { | |
1112 | AFS_GUNLOCK(); | |
1113 | /* AFSTOV(lruvcp) may be NULL */ | |
1114 | vnode_put(lruvp); | |
1115 | AFS_GLOCK(); | |
1116 | retry = 1; | |
1117 | } | |
1118 | #else | |
1119 | osi_vnhold(lruvcp, &retry); | |
1120 | #endif | |
1121 | ReleaseReadLock(&afs_xvcache); /* could be read lock */ | |
1122 | if (retry) | |
1123 | goto reskip; | |
1124 | ||
1125 | /* otherwise, merge in the info. We have to be quite careful here, | |
1126 | * since we need to ensure that we don't merge old info over newer | |
1127 | * stuff in a stat cache entry. We're very conservative here: we don't | |
1128 | * do the merge at all unless we ourselves create the stat cache | |
1129 | * entry. That's pretty safe, and should work pretty well, since we | |
1130 | * typically expect to do the stat cache creation ourselves. | |
1131 | * | |
1132 | * We also have to take into account racing token revocations. | |
1133 | */ | |
1134 | for (i = 0; i < fidIndex; i++) { | |
1135 | if ((&statsp[i])->errorCode) | |
1136 | continue; | |
1137 | afid.Cell = adp->f.fid.Cell; | |
1138 | afid.Fid.Volume = adp->f.fid.Fid.Volume; | |
1139 | afid.Fid.Vnode = fidsp[i].Vnode; | |
1140 | afid.Fid.Unique = fidsp[i].Unique; | |
1141 | do { | |
1142 | retry = 0; | |
1143 | ObtainReadLock(&afs_xvcache); | |
1144 | tvcp = afs_FindVCache(&afid, &retry, 0/* !stats&!lru */); | |
1145 | ReleaseReadLock(&afs_xvcache); | |
1146 | } while (tvcp && retry); | |
1147 | ||
1148 | /* The entry may no longer exist */ | |
1149 | if (tvcp == NULL) { | |
1150 | continue; | |
1151 | } | |
1152 | ||
1153 | /* now we have the entry held, but we need to fill it in */ | |
1154 | ObtainWriteLock(&tvcp->lock, 131); | |
1155 | ||
1156 | /* if CBulkFetching is not set, or if the file size no longer | |
1157 | * matches the value we placed there when we set the CBulkFetching | |
1158 | * flag, then someone else has done something with this node, | |
1159 | * and we may not have the latest status information for this | |
1160 | * file. Leave the entry alone. There's also a file type | |
1161 | * change here, for OSX bulkstat support. | |
1162 | */ | |
1163 | if (!(tvcp->f.states & CBulkFetching) | |
1164 | || (tvcp->f.m.Length != statSeqNo) | |
1165 | #if defined(AFS_DARWIN_ENV) | |
1166 | || (ftype[(&statsp[i])->FileType] != vType(tvcp)) | |
1167 | #endif | |
1168 | ) { | |
1169 | flagIndex++; | |
1170 | ReleaseWriteLock(&tvcp->lock); | |
1171 | afs_PutVCache(tvcp); | |
1172 | continue; | |
1173 | } | |
1174 | ||
1175 | /* now copy ".." entry back out of volume structure, if necessary */ | |
1176 | if (tvcp->mvstat == AFS_MVSTAT_ROOT && (dotdot.Fid.Volume != 0)) { | |
1177 | if (!tvcp->mvid.parent) | |
1178 | tvcp->mvid.parent = osi_AllocSmallSpace(sizeof(struct VenusFid)); | |
1179 | *tvcp->mvid.parent = dotdot; | |
1180 | } | |
1181 | ||
1182 | #ifdef AFS_DARWIN80_ENV | |
1183 | if (((lruvcp->f.states & CDeadVnode)||(lruvcp->f.states & CVInit))) | |
1184 | panic("vlru control point went dead\n"); | |
1185 | #endif | |
1186 | ||
1187 | ObtainWriteLock(&afs_xvcache, 132); | |
1188 | if ((VLRU.next->prev != &VLRU) || (VLRU.prev->next != &VLRU)) { | |
1189 | refpanic("Bulkstat VLRU inconsistent2"); | |
1190 | } | |
1191 | if ((QNext(QPrev(&tvcp->vlruq)) != &tvcp->vlruq) | |
1192 | || (QPrev(QNext(&tvcp->vlruq)) != &tvcp->vlruq)) { | |
1193 | refpanic("Bulkstat VLRU inconsistent4"); | |
1194 | } | |
1195 | if ((QNext(QPrev(&lruvcp->vlruq)) != &lruvcp->vlruq) | |
1196 | || (QPrev(QNext(&lruvcp->vlruq)) != &lruvcp->vlruq)) { | |
1197 | refpanic("Bulkstat VLRU inconsistent5"); | |
1198 | } | |
1199 | ||
1200 | if (tvcp != lruvcp) { /* if they are == don't move it, don't corrupt vlru */ | |
1201 | QRemove(&tvcp->vlruq); | |
1202 | QAdd(&lruvcp->vlruq, &tvcp->vlruq); | |
1203 | } | |
1204 | ||
1205 | if ((VLRU.next->prev != &VLRU) || (VLRU.prev->next != &VLRU)) { | |
1206 | refpanic("Bulkstat VLRU inconsistent3"); | |
1207 | } | |
1208 | if ((QNext(QPrev(&tvcp->vlruq)) != &tvcp->vlruq) | |
1209 | || (QPrev(QNext(&tvcp->vlruq)) != &tvcp->vlruq)) { | |
1210 | refpanic("Bulkstat VLRU inconsistent5"); | |
1211 | } | |
1212 | if ((QNext(QPrev(&lruvcp->vlruq)) != &lruvcp->vlruq) | |
1213 | || (QPrev(QNext(&lruvcp->vlruq)) != &lruvcp->vlruq)) { | |
1214 | refpanic("Bulkstat VLRU inconsistent6"); | |
1215 | } | |
1216 | ReleaseWriteLock(&afs_xvcache); | |
1217 | ||
1218 | ObtainWriteLock(&afs_xcbhash, 494); | |
1219 | ||
1220 | /* We need to check the flags again. We may have missed | |
1221 | * something while we were waiting for a lock. | |
1222 | */ | |
1223 | if (!(tvcp->f.states & CBulkFetching) || (tvcp->f.m.Length != statSeqNo)) { | |
1224 | flagIndex++; | |
1225 | ReleaseWriteLock(&tvcp->lock); | |
1226 | ReleaseWriteLock(&afs_xcbhash); | |
1227 | afs_PutVCache(tvcp); | |
1228 | continue; | |
1229 | } | |
1230 | ||
1231 | /* now merge in the resulting status back into the vnode. | |
1232 | * We only do this if the entry looks clear. | |
1233 | */ | |
1234 | afs_ProcessFS(tvcp, &statsp[i], areqp); | |
1235 | #if defined(AFS_LINUX22_ENV) | |
1236 | afs_fill_inode(AFSTOV(tvcp), NULL); /* reset inode operations */ | |
1237 | #endif | |
1238 | ||
1239 | /* do some accounting for bulk stats: mark this entry as | |
1240 | * loaded, so we can tell if we use it before it gets | |
1241 | * recycled. | |
1242 | */ | |
1243 | tvcp->f.states |= CBulkStat; | |
1244 | tvcp->f.states &= ~CBulkFetching; | |
1245 | flagIndex++; | |
1246 | afs_bulkStatsDone++; | |
1247 | ||
1248 | /* merge in vol info */ | |
1249 | if (volStates & VRO) | |
1250 | tvcp->f.states |= CRO; | |
1251 | if (volStates & VBackup) | |
1252 | tvcp->f.states |= CBackup; | |
1253 | if (volStates & VForeign) | |
1254 | tvcp->f.states |= CForeign; | |
1255 | ||
1256 | /* merge in the callback info */ | |
1257 | tvcp->f.states |= CTruth; | |
1258 | ||
1259 | /* get ptr to the callback we are interested in */ | |
1260 | tcbp = cbsp + i; | |
1261 | ||
1262 | if (tcbp->ExpirationTime != 0) { | |
1263 | tvcp->cbExpires = tcbp->ExpirationTime + startTime; | |
1264 | tvcp->callback = hostp; | |
1265 | tvcp->f.states |= CStatd; | |
1266 | afs_QueueCallback(tvcp, CBHash(tcbp->ExpirationTime), volp); | |
1267 | } else if (tvcp->f.states & CRO) { | |
1268 | /* ordinary callback on a read-only volume -- AFS 3.2 style */ | |
1269 | tvcp->cbExpires = 3600 + startTime; | |
1270 | tvcp->callback = hostp; | |
1271 | tvcp->f.states |= CStatd; | |
1272 | afs_QueueCallback(tvcp, CBHash(3600), volp); | |
1273 | } else { | |
1274 | afs_StaleVCacheFlags(tvcp, | |
1275 | AFS_STALEVC_CBLOCKED | AFS_STALEVC_CLEARCB, | |
1276 | CUnique); | |
1277 | } | |
1278 | #ifdef AFS_DARWIN80_ENV | |
1279 | /* reclaim->FlushVCache will need xcbhash */ | |
1280 | if (((tvcp->f.states & CDeadVnode)||(tvcp->f.states & CVInit))) { | |
1281 | ReleaseWriteLock(&afs_xcbhash); | |
1282 | /* passing in a parent hangs getting the vnode lock */ | |
1283 | code = afs_darwin_finalizevnode(tvcp, NULL, NULL, 0, 1); | |
1284 | if (code) { | |
1285 | /* It's gonna get recycled - shouldn't happen */ | |
1286 | afs_StaleVCacheFlags(tvcp, | |
1287 | AFS_STALEVC_CBLOCKED | AFS_STALEVC_CLEARCB, | |
1288 | CUnique); | |
1289 | } else | |
1290 | /* re-acquire the usecount that finalizevnode disposed of */ | |
1291 | vnode_ref(AFSTOV(tvcp)); | |
1292 | } else | |
1293 | #endif | |
1294 | ReleaseWriteLock(&afs_xcbhash); | |
1295 | ||
1296 | ReleaseWriteLock(&tvcp->lock); | |
1297 | /* finally, we're done with the entry */ | |
1298 | afs_PutVCache(tvcp); | |
1299 | } /* for all files we got back */ | |
1300 | ||
1301 | /* finally return the pointer into the LRU queue */ | |
1302 | #ifdef AFS_DARWIN80_ENV | |
1303 | if (((lruvcp->f.states & CDeadVnode)||(lruvcp->f.states & CVInit))) | |
1304 | panic("vlru control point went dead before put\n"); | |
1305 | AFS_GUNLOCK(); | |
1306 | vnode_put(lruvp); | |
1307 | vnode_rele(lruvp); | |
1308 | AFS_GLOCK(); | |
1309 | #else | |
1310 | afs_PutVCache(lruvcp); | |
1311 | #endif | |
1312 | ||
1313 | done: | |
1314 | /* Be sure to turn off the CBulkFetching flags */ | |
1315 | for (i = flagIndex; i < fidIndex; i++) { | |
1316 | afid.Cell = adp->f.fid.Cell; | |
1317 | afid.Fid.Volume = adp->f.fid.Fid.Volume; | |
1318 | afid.Fid.Vnode = fidsp[i].Vnode; | |
1319 | afid.Fid.Unique = fidsp[i].Unique; | |
1320 | do { | |
1321 | retry = 0; | |
1322 | ObtainReadLock(&afs_xvcache); | |
1323 | tvcp = afs_FindVCache(&afid, &retry, 0 /* !stats&!lru */); | |
1324 | ReleaseReadLock(&afs_xvcache); | |
1325 | } while (tvcp && retry); | |
1326 | if (tvcp != NULL) { | |
1327 | if ((tvcp->f.states & CBulkFetching) | |
1328 | && (tvcp->f.m.Length == statSeqNo)) { | |
1329 | tvcp->f.states &= ~CBulkFetching; | |
1330 | } | |
1331 | afs_PutVCache(tvcp); | |
1332 | } | |
1333 | } | |
1334 | if (volp) | |
1335 | afs_PutVolume(volp, READ_LOCK); | |
1336 | ||
1337 | done2: | |
1338 | osi_FreeLargeSpace((char *)fidsp); | |
1339 | osi_Free((char *)statsp, AFSCBMAX * sizeof(AFSFetchStatus)); | |
1340 | osi_Free((char *)cbsp, AFSCBMAX * sizeof(AFSCallBack)); | |
1341 | return code; | |
1342 | } | |
1343 | ||
1344 | /* was: (AFS_DEC_ENV) || defined(AFS_OSF30_ENV) || defined(AFS_NCR_ENV) */ | |
1345 | #ifdef AFS_DARWIN80_ENV | |
1346 | int AFSDOBULK = 0; | |
1347 | #else | |
1348 | static int AFSDOBULK = 1; | |
1349 | #endif | |
1350 | ||
1351 | static_inline int | |
1352 | osi_lookup_isdot(const char *aname) | |
1353 | { | |
1354 | #ifdef AFS_SUN5_ENV | |
1355 | if (!aname[0]) { | |
1356 | /* in Solaris, we can get passed "" as a path component if we are the | |
1357 | * root directory, e.g. after a call to chroot. It is equivalent to | |
1358 | * looking up "." */ | |
1359 | return 1; | |
1360 | } | |
1361 | #endif /* AFS_SUN5_ENV */ | |
1362 | if (aname[0] == '.' && !aname[1]) { | |
1363 | return 1; | |
1364 | } | |
1365 | return 0; | |
1366 | } | |
1367 | ||
1368 | int | |
1369 | #if defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV) | |
1370 | afs_lookup(OSI_VC_DECL(adp), char *aname, struct vcache **avcp, struct pathname *pnp, int flags, struct vnode *rdir, afs_ucred_t *acred) | |
1371 | #elif defined(UKERNEL) | |
1372 | afs_lookup(OSI_VC_DECL(adp), char *aname, struct vcache **avcp, afs_ucred_t *acred, int flags) | |
1373 | #else | |
1374 | afs_lookup(OSI_VC_DECL(adp), char *aname, struct vcache **avcp, afs_ucred_t *acred) | |
1375 | #endif | |
1376 | { | |
1377 | struct vrequest *treq = NULL; | |
1378 | char *tname = NULL; | |
1379 | struct vcache *tvc = 0; | |
1380 | afs_int32 code; | |
1381 | afs_int32 bulkcode = 0; | |
1382 | int pass = 0, hit = 0; | |
1383 | int force_eval = afs_fakestat_enable ? 0 : 1; | |
1384 | long dirCookie; | |
1385 | afs_hyper_t versionNo; | |
1386 | int no_read_access = 0; | |
1387 | struct sysname_info sysState; /* used only for @sys checking */ | |
1388 | int dynrootRetry = 1; | |
1389 | struct afs_fakestat_state fakestate; | |
1390 | int tryEvalOnly = 0; | |
1391 | ||
1392 | /* Don't allow ENOENT errors, except for a specific code path where | |
1393 | * 'enoent_prohibited' is cleared below. */ | |
1394 | int enoent_prohibited = 1; | |
1395 | ||
1396 | OSI_VC_CONVERT(adp); | |
1397 | ||
1398 | AFS_STATCNT(afs_lookup); | |
1399 | afs_InitFakeStat(&fakestate); | |
1400 | ||
1401 | AFS_DISCON_LOCK(); | |
1402 | ||
1403 | if ((code = afs_CreateReq(&treq, acred))) | |
1404 | goto done; | |
1405 | ||
1406 | if (afs_fakestat_enable && adp->mvstat == AFS_MVSTAT_MTPT) { | |
1407 | if (strcmp(aname, ".directory") == 0) | |
1408 | tryEvalOnly = 1; | |
1409 | } | |
1410 | ||
1411 | #if defined(AFS_DARWIN_ENV) | |
1412 | /* Workaround for MacOSX Finder, which tries to look for | |
1413 | * .DS_Store and Contents under every directory. | |
1414 | */ | |
1415 | if (afs_fakestat_enable && adp->mvstat == AFS_MVSTAT_MTPT) { | |
1416 | if (strcmp(aname, ".DS_Store") == 0) | |
1417 | tryEvalOnly = 1; | |
1418 | if (strcmp(aname, "Contents") == 0) | |
1419 | tryEvalOnly = 1; | |
1420 | } | |
1421 | if (afs_fakestat_enable && adp->mvstat == AFS_MVSTAT_ROOT) { | |
1422 | if (strncmp(aname, "._", 2) == 0) | |
1423 | tryEvalOnly = 1; | |
1424 | } | |
1425 | #endif | |
1426 | ||
1427 | if (tryEvalOnly) | |
1428 | code = afs_TryEvalFakeStat(&adp, &fakestate, treq); | |
1429 | else | |
1430 | code = afs_EvalFakeStat(&adp, &fakestate, treq); | |
1431 | ||
1432 | /*printf("Code is %d\n", code);*/ | |
1433 | ||
1434 | if (tryEvalOnly && adp->mvstat == AFS_MVSTAT_MTPT) | |
1435 | code = ENODEV; | |
1436 | if (code) | |
1437 | goto done; | |
1438 | ||
1439 | /* come back to here if we encounter a non-existent object in a read-only | |
1440 | * volume's directory */ | |
1441 | redo: | |
1442 | *avcp = NULL; /* Since some callers don't initialize it */ | |
1443 | bulkcode = 0; | |
1444 | ||
1445 | if (!(adp->f.states & CStatd) && !afs_InReadDir(adp)) { | |
1446 | if ((code = afs_VerifyVCache2(adp, treq))) { | |
1447 | goto done; | |
1448 | } | |
1449 | } else | |
1450 | code = 0; | |
1451 | ||
1452 | /* watch for ".." in a volume root */ | |
1453 | if (adp->mvstat == AFS_MVSTAT_ROOT && aname[0] == '.' && aname[1] == '.' && !aname[2]) { | |
1454 | /* looking up ".." in root via special hacks */ | |
1455 | if (adp->mvid.parent == (struct VenusFid *)0 || adp->mvid.parent->Fid.Volume == 0) { | |
1456 | code = ENODEV; | |
1457 | goto done; | |
1458 | } | |
1459 | /* otherwise we have the fid here, so we use it */ | |
1460 | /*printf("Getting vcache\n");*/ | |
1461 | tvc = afs_GetVCache(adp->mvid.parent, treq, NULL, NULL); | |
1462 | afs_Trace3(afs_iclSetp, CM_TRACE_GETVCDOTDOT, ICL_TYPE_FID, adp->mvid.parent, | |
1463 | ICL_TYPE_POINTER, tvc, ICL_TYPE_INT32, code); | |
1464 | *avcp = tvc; | |
1465 | code = (tvc ? 0 : EIO); | |
1466 | hit = 1; | |
1467 | if (tvc && !VREFCOUNT_GT(tvc, 0)) { | |
1468 | osi_Panic("TT1"); | |
1469 | } | |
1470 | if (code) { | |
1471 | /*printf("LOOKUP GETVCDOTDOT -> %d\n", code); */ | |
1472 | } | |
1473 | goto done; | |
1474 | } | |
1475 | ||
1476 | /* now check the access */ | |
1477 | if (treq->uid != adp->last_looker) { | |
1478 | if (!afs_AccessOK(adp, PRSFS_LOOKUP, treq, CHECK_MODE_BITS)) { | |
1479 | *avcp = NULL; | |
1480 | code = EACCES; | |
1481 | goto done; | |
1482 | } else | |
1483 | adp->last_looker = treq->uid; | |
1484 | } | |
1485 | ||
1486 | /* Check for read access as well. We need read access in order to | |
1487 | * stat files, but not to stat subdirectories. */ | |
1488 | if (!afs_AccessOK(adp, PRSFS_READ, treq, CHECK_MODE_BITS)) | |
1489 | no_read_access = 1; | |
1490 | ||
1491 | /* special case lookup of ".". Can we check for it sooner in this code, | |
1492 | * for instance, way up before "redo:" ?? | |
1493 | * I'm not fiddling with the LRUQ here, either, perhaps I should, or else | |
1494 | * invent a lightweight version of GetVCache. | |
1495 | */ | |
1496 | if (osi_lookup_isdot(aname)) { /* special case */ | |
1497 | ObtainReadLock(&afs_xvcache); | |
1498 | osi_vnhold(adp, 0); | |
1499 | ReleaseReadLock(&afs_xvcache); | |
1500 | #ifdef AFS_DARWIN80_ENV | |
1501 | vnode_get(AFSTOV(adp)); | |
1502 | #endif | |
1503 | code = 0; | |
1504 | *avcp = tvc = adp; | |
1505 | hit = 1; | |
1506 | if (adp && !VREFCOUNT_GT(adp, 0)) { | |
1507 | osi_Panic("TT2"); | |
1508 | } | |
1509 | goto done; | |
1510 | } | |
1511 | ||
1512 | /* | |
1513 | * Special case lookup of ".." in the dynamic mount directory. | |
1514 | * The parent of this directory is _always_ the AFS root volume. | |
1515 | */ | |
1516 | if (afs_IsDynrootMount(adp) && | |
1517 | aname[0] == '.' && aname[1] == '.' && !aname[2]) { | |
1518 | ||
1519 | ObtainReadLock(&afs_xvcache); | |
1520 | osi_vnhold(afs_globalVp, 0); | |
1521 | ReleaseReadLock(&afs_xvcache); | |
1522 | #ifdef AFS_DARWIN80_ENV | |
1523 | vnode_get(AFSTOV(afs_globalVp)); | |
1524 | #endif | |
1525 | code = 0; | |
1526 | *avcp = tvc = afs_globalVp; | |
1527 | hit = 1; | |
1528 | goto done; | |
1529 | } | |
1530 | ||
1531 | /* | |
1532 | * Special case lookups in the dynamic mount directory. | |
1533 | * The names here take the form cell:volume, similar to a mount point. | |
1534 | * EvalMountData parses that and returns a cell and volume ID, which | |
1535 | * we use to construct the appropriate dynroot Fid. | |
1536 | */ | |
1537 | if (afs_IsDynrootMount(adp)) { | |
1538 | struct VenusFid tfid; | |
1539 | afs_uint32 cellidx, volid, vnoid, uniq; | |
1540 | ||
1541 | code = EvalMountData('%', aname, 0, 0, NULL, treq, &cellidx, &volid, &vnoid, &uniq); | |
1542 | if (code) | |
1543 | goto done; | |
1544 | /* If a vnode was returned, it's not a real mount point */ | |
1545 | if (vnoid > 1) { | |
1546 | struct cell *tcell = afs_GetCellByIndex(cellidx, READ_LOCK); | |
1547 | tfid.Cell = tcell->cellNum; | |
1548 | afs_PutCell(tcell, READ_LOCK); | |
1549 | tfid.Fid.Vnode = vnoid; | |
1550 | tfid.Fid.Volume = volid; | |
1551 | tfid.Fid.Unique = uniq; | |
1552 | } else { | |
1553 | afs_GetDynrootMountFid(&tfid); | |
1554 | tfid.Fid.Vnode = VNUM_FROM_TYPEID(VN_TYPE_MOUNT, cellidx << 2); | |
1555 | tfid.Fid.Unique = volid; | |
1556 | } | |
1557 | *avcp = tvc = afs_GetVCache(&tfid, treq, NULL, NULL); | |
1558 | code = (tvc ? 0 : EIO); | |
1559 | hit = 1; | |
1560 | goto done; | |
1561 | } | |
1562 | ||
1563 | #ifdef AFS_LINUX26_ENV | |
1564 | /* | |
1565 | * Special case of the dynamic mount volume in a static root. | |
1566 | * This is really unfortunate, but we need this for the translator. | |
1567 | */ | |
1568 | if (adp == afs_globalVp && !afs_GetDynrootEnable() && | |
1569 | !strcmp(aname, AFS_DYNROOT_MOUNTNAME)) { | |
1570 | struct VenusFid tfid; | |
1571 | ||
1572 | afs_GetDynrootMountFid(&tfid); | |
1573 | *avcp = tvc = afs_GetVCache(&tfid, treq, NULL, NULL); | |
1574 | code = 0; | |
1575 | hit = 1; | |
1576 | goto done; | |
1577 | } | |
1578 | #endif | |
1579 | ||
1580 | Check_AtSys(adp, aname, &sysState, treq); | |
1581 | tname = sysState.name; | |
1582 | ||
1583 | /* 1st Check_AtSys and lookup by tname is required here, for now, | |
1584 | * because the dnlc is *not* told to remove entries for the parent | |
1585 | * dir of file/dir op that afs_LocalHero likes, but dnlc is informed | |
1586 | * if the cached entry for the parent dir is invalidated for a | |
1587 | * non-local change. | |
1588 | * Otherwise, we'd be able to do a dnlc lookup on an entry ending | |
1589 | * w/@sys and know the dnlc was consistent with reality. */ | |
1590 | tvc = osi_dnlc_lookup(adp, tname, WRITE_LOCK); | |
1591 | *avcp = tvc; /* maybe wasn't initialized, but it is now */ | |
1592 | if (tvc) { | |
1593 | if (no_read_access && vType(tvc) != VDIR && vType(tvc) != VLNK) { | |
1594 | /* need read access on dir to stat non-directory / non-link */ | |
1595 | afs_PutVCache(tvc); | |
1596 | *avcp = NULL; | |
1597 | code = EACCES; | |
1598 | goto done; | |
1599 | } | |
1600 | #ifdef AFS_LINUX22_ENV | |
1601 | if (tvc->mvstat == AFS_MVSTAT_ROOT) { /* we don't trust the dnlc for root vcaches */ | |
1602 | AFS_RELE(AFSTOV(tvc)); | |
1603 | *avcp = 0; | |
1604 | } else { | |
1605 | code = 0; | |
1606 | hit = 1; | |
1607 | goto done; | |
1608 | } | |
1609 | #else /* non - LINUX */ | |
1610 | code = 0; | |
1611 | hit = 1; | |
1612 | goto done; | |
1613 | #endif /* linux22 */ | |
1614 | } | |
1615 | ||
1616 | { /* sub-block just to reduce stack usage */ | |
1617 | struct dcache *tdc; | |
1618 | afs_size_t dirOffset, dirLen; | |
1619 | struct VenusFid tfid; | |
1620 | ||
1621 | /* now we have to lookup the next fid */ | |
1622 | if (afs_InReadDir(adp)) | |
1623 | tdc = adp->dcreaddir; | |
1624 | else | |
1625 | tdc = afs_GetDCache(adp, (afs_size_t) 0, treq, | |
1626 | &dirOffset, &dirLen, 1); | |
1627 | if (!tdc) { | |
1628 | *avcp = NULL; /* redundant, but harmless */ | |
1629 | code = EIO; | |
1630 | goto done; | |
1631 | } | |
1632 | ||
1633 | /* now we will just call dir package with appropriate inode. | |
1634 | * Dirs are always fetched in their entirety for now */ | |
1635 | ObtainReadLock(&adp->lock); | |
1636 | ObtainReadLock(&tdc->lock); | |
1637 | ||
1638 | /* | |
1639 | * Make sure that the data in the cache is current. There are two | |
1640 | * cases we need to worry about: | |
1641 | * 1. The cache data is being fetched by another process. | |
1642 | * 2. The cache data is no longer valid | |
1643 | * | |
1644 | * If a readdir is in progress _in this thread_, it has a shared | |
1645 | * lock on the vcache and has obtained current data, so we just | |
1646 | * use that. This eliminates several possible deadlocks. | |
1647 | */ | |
1648 | if (!afs_InReadDir(adp)) { | |
1649 | while ((adp->f.states & CStatd) | |
1650 | && (tdc->dflags & DFFetching) | |
1651 | && hsame(adp->f.m.DataVersion, tdc->f.versionNo)) { | |
1652 | ReleaseReadLock(&tdc->lock); | |
1653 | ReleaseReadLock(&adp->lock); | |
1654 | afs_osi_Sleep(&tdc->validPos); | |
1655 | ObtainReadLock(&adp->lock); | |
1656 | ObtainReadLock(&tdc->lock); | |
1657 | } | |
1658 | if (!(adp->f.states & CStatd) | |
1659 | || !hsame(adp->f.m.DataVersion, tdc->f.versionNo)) { | |
1660 | ReleaseReadLock(&tdc->lock); | |
1661 | ReleaseReadLock(&adp->lock); | |
1662 | afs_PutDCache(tdc); | |
1663 | if (tname && tname != aname) | |
1664 | osi_FreeLargeSpace(tname); | |
1665 | goto redo; | |
1666 | } | |
1667 | } | |
1668 | ||
1669 | /* Save the version number for when we call osi_dnlc_enter */ | |
1670 | hset(versionNo, tdc->f.versionNo); | |
1671 | ||
1672 | /* | |
1673 | * check for, and handle "@sys" if it's there. We should be able | |
1674 | * to avoid the alloc and the strcpy with a little work, but it's | |
1675 | * not pressing. If there aren't any remote users (ie, via the | |
1676 | * NFS translator), we have a slightly easier job. | |
1677 | * the faster way to do this is to check for *aname == '@' and if | |
1678 | * it's there, check for @sys, otherwise, assume there's no @sys | |
1679 | * then, if the lookup fails, check for .*@sys... | |
1680 | */ | |
1681 | /* above now implemented by Check_AtSys and Next_AtSys */ | |
1682 | ||
1683 | /* lookup the name in the appropriate dir, and return a cache entry | |
1684 | * on the resulting fid */ | |
1685 | code = | |
1686 | afs_dir_LookupOffset(tdc, sysState.name, &tfid.Fid, | |
1687 | &dirCookie); | |
1688 | ||
1689 | /* If the first lookup doesn't succeed, maybe it's got @sys in the name */ | |
1690 | while (code == ENOENT && Next_AtSys(adp, treq, &sysState)) | |
1691 | code = | |
1692 | afs_dir_LookupOffset(tdc, sysState.name, &tfid.Fid, | |
1693 | &dirCookie); | |
1694 | tname = sysState.name; | |
1695 | ||
1696 | ReleaseReadLock(&tdc->lock); | |
1697 | if (!afs_InReadDir(adp)) | |
1698 | afs_PutDCache(tdc); | |
1699 | if (code == ENOENT && afs_IsDynroot(adp) && dynrootRetry && !tryEvalOnly) { | |
1700 | struct cell *tc; | |
1701 | char *cn = (tname[0] == '.') ? tname + 1 : tname; | |
1702 | ReleaseReadLock(&adp->lock); | |
1703 | /* confirm it's not just hushed */ | |
1704 | tc = afs_GetCellByName(cn, WRITE_LOCK); | |
1705 | if (tc) { | |
1706 | if (tc->states & CHush) { | |
1707 | tc->states &= ~CHush; | |
1708 | ReleaseWriteLock(&tc->lock); | |
1709 | afs_DynrootInvalidate(); | |
1710 | goto redo; | |
1711 | } | |
1712 | ReleaseWriteLock(&tc->lock); | |
1713 | } | |
1714 | /* Allow a second dynroot retry if the cell was hushed before */ | |
1715 | dynrootRetry = 0; | |
1716 | if (tname[0] == '.') | |
1717 | afs_LookupAFSDB(tname + 1); | |
1718 | else | |
1719 | afs_LookupAFSDB(tname); | |
1720 | if (tname && tname != aname) | |
1721 | osi_FreeLargeSpace(tname); | |
1722 | goto redo; | |
1723 | } else { | |
1724 | ReleaseReadLock(&adp->lock); | |
1725 | } | |
1726 | ||
1727 | /* new fid has same cell and volume */ | |
1728 | tfid.Cell = adp->f.fid.Cell; | |
1729 | tfid.Fid.Volume = adp->f.fid.Fid.Volume; | |
1730 | afs_Trace4(afs_iclSetp, CM_TRACE_LOOKUP, ICL_TYPE_POINTER, adp, | |
1731 | ICL_TYPE_STRING, tname, ICL_TYPE_FID, &tfid, | |
1732 | ICL_TYPE_INT32, code); | |
1733 | ||
1734 | if (code) { | |
1735 | if (code == ENOENT) { | |
1736 | /* The target name really doesn't exist (according to | |
1737 | * afs_dir_LookupOffset, anyway). */ | |
1738 | enoent_prohibited = 0; | |
1739 | } | |
1740 | goto done; | |
1741 | } | |
1742 | ||
1743 | /* prefetch some entries, if the dir is currently open. The variable | |
1744 | * dirCookie tells us where to start prefetching from. | |
1745 | */ | |
1746 | if (!AFS_IS_DISCONNECTED && | |
1747 | AFSDOBULK && adp->opens > 0 && !(adp->f.states & CForeign) | |
1748 | && !afs_IsDynroot(adp) && !afs_InReadDir(adp)) { | |
1749 | afs_int32 retry; | |
1750 | /* if the entry is not in the cache, or is in the cache, | |
1751 | * but hasn't been statd, then do a bulk stat operation. | |
1752 | */ | |
1753 | do { | |
1754 | retry = 0; | |
1755 | ObtainReadLock(&afs_xvcache); | |
1756 | tvc = afs_FindVCache(&tfid, &retry, 0 /* !stats,!lru */ ); | |
1757 | ReleaseReadLock(&afs_xvcache); | |
1758 | } while (tvc && retry); | |
1759 | ||
1760 | if (!tvc || !(tvc->f.states & CStatd)) | |
1761 | bulkcode = afs_DoBulkStat(adp, dirCookie, treq); | |
1762 | else | |
1763 | bulkcode = 0; | |
1764 | ||
1765 | /* if the vcache isn't usable, release it */ | |
1766 | if (tvc && !(tvc->f.states & CStatd)) { | |
1767 | afs_PutVCache(tvc); | |
1768 | tvc = NULL; | |
1769 | } | |
1770 | } else { | |
1771 | tvc = NULL; | |
1772 | bulkcode = 0; | |
1773 | } | |
1774 | ||
1775 | /* now get the status info, if we don't already have it */ | |
1776 | /* This is kind of weird, but we might wind up accidentally calling | |
1777 | * RXAFS_Lookup because we happened upon a file which legitimately | |
1778 | * has a 0 uniquifier. That is the result of allowing unique to wrap | |
1779 | * to 0. This was fixed in AFS 3.4. For CForeign, Unique == 0 means that | |
1780 | * the file has not yet been looked up. | |
1781 | */ | |
1782 | if (!tvc) { | |
1783 | afs_int32 cached = 0; | |
1784 | if (!tfid.Fid.Unique && (adp->f.states & CForeign)) { | |
1785 | tvc = afs_LookupVCache(&tfid, treq, &cached, adp, tname); | |
1786 | } | |
1787 | if (!tvc && !bulkcode) { /* lookup failed or wasn't called */ | |
1788 | tvc = afs_GetVCache(&tfid, treq, &cached, NULL); | |
1789 | } | |
1790 | } /* if !tvc */ | |
1791 | } /* sub-block just to reduce stack usage */ | |
1792 | ||
1793 | if (tvc) { | |
1794 | if (adp->f.states & CForeign) | |
1795 | tvc->f.states |= CForeign; | |
1796 | tvc->f.parent.vnode = adp->f.fid.Fid.Vnode; | |
1797 | tvc->f.parent.unique = adp->f.fid.Fid.Unique; | |
1798 | tvc->f.states &= ~CBulkStat; | |
1799 | ||
1800 | if (afs_fakestat_enable == 2 && tvc->mvstat == AFS_MVSTAT_MTPT) { | |
1801 | ObtainSharedLock(&tvc->lock, 680); | |
1802 | if (!tvc->linkData) { | |
1803 | UpgradeSToWLock(&tvc->lock, 681); | |
1804 | code = afs_HandleLink(tvc, treq); | |
1805 | ConvertWToRLock(&tvc->lock); | |
1806 | } else { | |
1807 | ConvertSToRLock(&tvc->lock); | |
1808 | code = 0; | |
1809 | } | |
1810 | if (!code && !afs_strchr(tvc->linkData, ':')) | |
1811 | force_eval = 1; | |
1812 | ReleaseReadLock(&tvc->lock); | |
1813 | } | |
1814 | if (tvc->mvstat == AFS_MVSTAT_MTPT && (tvc->f.states & CMValid) && tvc->mvid.target_root != NULL) | |
1815 | force_eval = 1; /* This is now almost for free, get it correct */ | |
1816 | ||
1817 | #if defined(UKERNEL) | |
1818 | if (!(flags & AFS_LOOKUP_NOEVAL)) | |
1819 | /* don't eval mount points */ | |
1820 | #endif /* UKERNEL */ | |
1821 | if (tvc->mvstat == AFS_MVSTAT_MTPT && force_eval) { | |
1822 | /* a mt point, possibly unevaluated */ | |
1823 | struct volume *tvolp; | |
1824 | ||
1825 | ObtainWriteLock(&tvc->lock, 133); | |
1826 | code = EvalMountPoint(tvc, adp, &tvolp, treq); | |
1827 | ReleaseWriteLock(&tvc->lock); | |
1828 | ||
1829 | if (code) { | |
1830 | afs_PutVCache(tvc); | |
1831 | if (tvolp) | |
1832 | afs_PutVolume(tvolp, WRITE_LOCK); | |
1833 | goto done; | |
1834 | } | |
1835 | ||
1836 | /* next, we want to continue using the target of the mt point */ | |
1837 | if (tvc->mvid.target_root && (tvc->f.states & CMValid)) { | |
1838 | struct vcache *uvc; | |
1839 | /* now lookup target, to set .. pointer */ | |
1840 | afs_Trace2(afs_iclSetp, CM_TRACE_LOOKUP1, | |
1841 | ICL_TYPE_POINTER, tvc, ICL_TYPE_FID, | |
1842 | &tvc->f.fid); | |
1843 | uvc = tvc; /* remember for later */ | |
1844 | ||
1845 | if (tvolp && (tvolp->states & VForeign)) { | |
1846 | /* XXXX tvolp has ref cnt on but not locked! XXX */ | |
1847 | tvc = | |
1848 | afs_GetRootVCache(tvc->mvid.target_root, treq, NULL, tvolp); | |
1849 | } else { | |
1850 | tvc = afs_GetVCache(tvc->mvid.target_root, treq, NULL, NULL); | |
1851 | } | |
1852 | afs_PutVCache(uvc); /* we're done with it */ | |
1853 | ||
1854 | if (!tvc) { | |
1855 | code = EIO; | |
1856 | if (tvolp) { | |
1857 | afs_PutVolume(tvolp, WRITE_LOCK); | |
1858 | } | |
1859 | goto done; | |
1860 | } | |
1861 | ||
1862 | /* now, if we came via a new mt pt (say because of a new | |
1863 | * release of a R/O volume), we must reevaluate the .. | |
1864 | * ptr to point back to the appropriate place */ | |
1865 | if (tvolp) { | |
1866 | ObtainWriteLock(&tvc->lock, 134); | |
1867 | if (tvc->mvid.parent == NULL) { | |
1868 | tvc->mvid.parent = | |
1869 | osi_AllocSmallSpace(sizeof(struct VenusFid)); | |
1870 | } | |
1871 | /* setup backpointer */ | |
1872 | *tvc->mvid.parent = tvolp->dotdot; | |
1873 | ReleaseWriteLock(&tvc->lock); | |
1874 | afs_PutVolume(tvolp, WRITE_LOCK); | |
1875 | } | |
1876 | } else { | |
1877 | afs_PutVCache(tvc); | |
1878 | code = ENODEV; | |
1879 | if (tvolp) | |
1880 | afs_PutVolume(tvolp, WRITE_LOCK); | |
1881 | goto done; | |
1882 | } | |
1883 | } | |
1884 | *avcp = tvc; | |
1885 | if (tvc && !VREFCOUNT_GT(tvc, 0)) { | |
1886 | osi_Panic("TT3"); | |
1887 | } | |
1888 | code = 0; | |
1889 | } else { | |
1890 | /* if we get here, we found something in a directory that couldn't | |
1891 | * be located (a Multics "connection failure"). If the volume is | |
1892 | * read-only, we try flushing this entry from the cache and trying | |
1893 | * again. */ | |
1894 | if (!AFS_IS_DISCONNECTED) { | |
1895 | if (pass == 0) { | |
1896 | struct volume *tv; | |
1897 | tv = afs_GetVolume(&adp->f.fid, treq, READ_LOCK); | |
1898 | if (tv) { | |
1899 | if (tv->states & VRO) { | |
1900 | pass = 1; /* try this *once* */ | |
1901 | /* re-stat to get later version */ | |
1902 | afs_StaleVCache(adp); | |
1903 | afs_PutVolume(tv, READ_LOCK); | |
1904 | goto redo; | |
1905 | } | |
1906 | afs_PutVolume(tv, READ_LOCK); | |
1907 | } | |
1908 | } | |
1909 | code = EIO; | |
1910 | } else { | |
1911 | code = ENETDOWN; | |
1912 | } | |
1913 | } | |
1914 | ||
1915 | done: | |
1916 | /* put the network buffer back, if need be */ | |
1917 | if (tname != aname && tname) | |
1918 | osi_FreeLargeSpace(tname); | |
1919 | if (code == 0) { | |
1920 | ||
1921 | if (afs_mariner) | |
1922 | afs_AddMarinerName(aname, tvc); | |
1923 | ||
1924 | #if defined(UKERNEL) | |
1925 | if (!(flags & AFS_LOOKUP_NOEVAL)) { | |
1926 | /* Here we don't enter the name into the DNLC because we want the | |
1927 | * evaluated mount dir to be there (the vcache for the mounted | |
1928 | * volume) rather than the vc of the mount point itself. We can | |
1929 | * still find the mount point's vc in the vcache by its fid. */ | |
1930 | #endif /* UKERNEL */ | |
1931 | if (!hit && (force_eval || tvc->mvstat != AFS_MVSTAT_MTPT)) { | |
1932 | osi_dnlc_enter(adp, aname, tvc, &versionNo); | |
1933 | } else { | |
1934 | #ifdef AFS_LINUX20_ENV | |
1935 | /* So Linux inode cache is up to date. */ | |
1936 | code = afs_VerifyVCache(tvc, treq); | |
1937 | #else | |
1938 | afs_PutFakeStat(&fakestate); | |
1939 | afs_DestroyReq(treq); | |
1940 | AFS_DISCON_UNLOCK(); | |
1941 | return 0; /* can't have been any errors if hit and !code */ | |
1942 | #endif | |
1943 | } | |
1944 | #if defined(UKERNEL) | |
1945 | } | |
1946 | #endif | |
1947 | } | |
1948 | if (bulkcode) | |
1949 | code = bulkcode; | |
1950 | ||
1951 | code = afs_CheckCode(code, treq, 19); | |
1952 | if (code) { | |
1953 | /* If there is an error, make sure *avcp is null. | |
1954 | * Alphas panic otherwise - defect 10719. | |
1955 | */ | |
1956 | *avcp = NULL; | |
1957 | } | |
1958 | if (code == ENOENT && enoent_prohibited) { | |
1959 | /* | |
1960 | * We got an ENOENT error, but we didn't get it while looking up the | |
1961 | * dir entry in the relevant dir blob. That means we likely hit some | |
1962 | * other internal error; don't allow us to return ENOENT in this case, | |
1963 | * since some platforms cache ENOENT errors, and the target path name | |
1964 | * may actually exist. | |
1965 | */ | |
1966 | code = EIO; | |
1967 | } | |
1968 | ||
1969 | afs_PutFakeStat(&fakestate); | |
1970 | afs_DestroyReq(treq); | |
1971 | AFS_DISCON_UNLOCK(); | |
1972 | return code; | |
1973 | } |