Import Upstream version 1.8.5
[hcoop/debian/openafs.git] / src / afs / VNOPS / afs_vnop_create.c
... / ...
CommitLineData
1/*
2 * Copyright 2000, International Business Machines Corporation and others.
3 * All Rights Reserved.
4 *
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
8 */
9
10/*
11 * Implements:
12 * afs_create
13 * afs_LocalHero
14 */
15
16#include <afsconfig.h>
17#include "afs/param.h"
18
19
20#include "afs/sysincludes.h" /* Standard vendor system headers */
21#include "afsincludes.h" /* Afs-based standard headers */
22#include "afs/afs_stats.h" /* statistics */
23#include "afs/afs_cbqueue.h"
24#include "afs/nfsclient.h"
25#include "afs/afs_osidnlc.h"
26#include "afs/unified_afs.h"
27
28/* question: does afs_create need to set CDirty in the adp or the avc?
29 * I think we can get away without it, but I'm not sure. Note that
30 * afs_setattr is called in here for truncation.
31 */
32#ifdef AFS_SGI64_ENV
33int
34afs_create(OSI_VC_DECL(adp), char *aname, struct vattr *attrs, int flags,
35 int amode, struct vcache **avcp, afs_ucred_t *acred)
36#else /* AFS_SGI64_ENV */
37int
38afs_create(OSI_VC_DECL(adp), char *aname, struct vattr *attrs,
39 enum vcexcl aexcl, int amode, struct vcache **avcp,
40 afs_ucred_t *acred)
41#endif /* AFS_SGI64_ENV */
42{
43 afs_int32 origCBs, origZaps, finalZaps;
44 struct vrequest *treq = NULL;
45 afs_int32 code;
46 struct afs_conn *tc;
47 struct VenusFid newFid;
48 struct AFSStoreStatus InStatus;
49 struct AFSFetchStatus *OutFidStatus, *OutDirStatus;
50 struct AFSVolSync tsync;
51 struct AFSCallBack CallBack;
52 afs_int32 now;
53 struct dcache *tdc;
54 afs_size_t offset, len;
55 struct server *hostp = 0;
56 struct vcache *tvc;
57 struct volume *volp = 0;
58 struct afs_fakestat_state fakestate;
59 struct rx_connection *rxconn;
60 XSTATS_DECLS;
61 OSI_VC_CONVERT(adp);
62
63 AFS_STATCNT(afs_create);
64
65 OutFidStatus = osi_AllocSmallSpace(sizeof(struct AFSFetchStatus));
66 OutDirStatus = osi_AllocSmallSpace(sizeof(struct AFSFetchStatus));
67 memset(&InStatus, 0, sizeof(InStatus));
68
69 if ((code = afs_CreateReq(&treq, acred)))
70 goto done2;
71
72 afs_Trace3(afs_iclSetp, CM_TRACE_CREATE, ICL_TYPE_POINTER, adp,
73 ICL_TYPE_STRING, aname, ICL_TYPE_INT32, amode);
74
75 afs_InitFakeStat(&fakestate);
76
77#ifdef AFS_SGI65_ENV
78 /* If avcp is passed not null, it's the old reference to this file.
79 * We can use this to avoid create races. For now, just decrement
80 * the reference count on it.
81 */
82 if (*avcp) {
83 AFS_RELE(AFSTOV(*avcp));
84 *avcp = NULL;
85 }
86#endif
87
88 if (strlen(aname) > AFSNAMEMAX) {
89 code = ENAMETOOLONG;
90 goto done3;
91 }
92
93 if (!afs_ENameOK(aname)) {
94 code = EINVAL;
95 goto done3;
96 }
97 switch (attrs->va_type) {
98 case VBLK:
99 case VCHR:
100#if !defined(AFS_SUN5_ENV)
101 case VSOCK:
102#endif
103 case VFIFO:
104 /* We don't support special devices or FIFOs */
105 code = EINVAL;
106 goto done3;
107 default:
108 ;
109 }
110 AFS_DISCON_LOCK();
111
112 code = afs_EvalFakeStat(&adp, &fakestate, treq);
113 if (code)
114 goto done;
115 tagain:
116 code = afs_VerifyVCache(adp, treq);
117 if (code)
118 goto done;
119
120 /** If the volume is read-only, return error without making an RPC to the
121 * fileserver
122 */
123 if (adp->f.states & CRO) {
124 code = EROFS;
125 goto done;
126 }
127
128 if (AFS_IS_DISCONNECTED && !AFS_IS_DISCON_RW) {
129 code = ENETDOWN;
130 goto done;
131 }
132
133 tdc = afs_GetDCache(adp, (afs_size_t) 0, treq, &offset, &len, 1);
134
135 /** Prevent multiple fetchStatus calls to fileserver when afs_GetDCache()
136 * returns NULL for an error condition
137 */
138 if (!tdc) {
139 code = EIO;
140 goto done;
141 }
142
143 ObtainWriteLock(&adp->lock, 135);
144 if (tdc)
145 ObtainSharedLock(&tdc->lock, 630);
146
147 /*
148 * Make sure that the data in the cache is current. We may have
149 * received a callback while we were waiting for the write lock.
150 */
151 if (!(adp->f.states & CStatd)
152 || (tdc && !hsame(adp->f.m.DataVersion, tdc->f.versionNo))) {
153 ReleaseWriteLock(&adp->lock);
154 if (tdc) {
155 ReleaseSharedLock(&tdc->lock);
156 afs_PutDCache(tdc);
157 }
158 goto tagain;
159 }
160 if (tdc) {
161 /* see if file already exists. If it does, we only set
162 * the size attributes (to handle O_TRUNC) */
163 code = afs_dir_Lookup(tdc, aname, &newFid.Fid); /* use dnlc first xxx */
164 if (code == 0) {
165 ReleaseSharedLock(&tdc->lock);
166 afs_PutDCache(tdc);
167 ReleaseWriteLock(&adp->lock);
168#ifdef AFS_SGI64_ENV
169 if (flags & VEXCL) {
170#else
171 if (aexcl != NONEXCL) {
172#endif
173 code = EEXIST; /* file exists in excl mode open */
174 goto done;
175 }
176 /* found the file, so use it */
177 newFid.Cell = adp->f.fid.Cell;
178 newFid.Fid.Volume = adp->f.fid.Fid.Volume;
179 tvc = NULL;
180 if (newFid.Fid.Unique == 0) {
181 tvc = afs_LookupVCache(&newFid, treq, NULL, adp, aname);
182 }
183 if (!tvc) /* lookup failed or wasn't called */
184 tvc = afs_GetVCache(&newFid, treq, NULL, NULL);
185
186 if (tvc) {
187 /* if the thing exists, we need the right access to open it.
188 * we must check that here, since no other checks are
189 * made by the open system call */
190 len = attrs->va_size; /* only do the truncate */
191 /*
192 * We used to check always for READ access before; the
193 * problem is that we will fail if the existing file
194 * has mode -w-w-w, which is wrong.
195 */
196 if ((amode & VREAD)
197 && !afs_AccessOK(tvc, PRSFS_READ, treq, CHECK_MODE_BITS)) {
198 afs_PutVCache(tvc);
199 code = EACCES;
200 goto done;
201 }
202#if defined(AFS_DARWIN80_ENV)
203 if ((amode & VWRITE) || VATTR_IS_ACTIVE(attrs, va_data_size))
204#elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV)
205 if ((amode & VWRITE) || (attrs->va_mask & AT_SIZE))
206#else
207 if ((amode & VWRITE) || len != 0xffffffff)
208#endif
209 {
210 /* needed for write access check */
211 tvc->f.parent.vnode = adp->f.fid.Fid.Vnode;
212 tvc->f.parent.unique = adp->f.fid.Fid.Unique;
213 /* need write mode for these guys */
214 if (!afs_AccessOK
215 (tvc, PRSFS_WRITE, treq, CHECK_MODE_BITS)) {
216 afs_PutVCache(tvc);
217 code = EACCES;
218 goto done;
219 }
220 }
221#if defined(AFS_DARWIN80_ENV)
222 if (VATTR_IS_ACTIVE(attrs, va_data_size))
223#elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV)
224 if (attrs->va_mask & AT_SIZE)
225#else
226 if (len != 0xffffffff)
227#endif
228 {
229 if (vType(tvc) != VREG) {
230 afs_PutVCache(tvc);
231 code = EISDIR;
232 goto done;
233 }
234 /* do a truncate */
235#if defined(AFS_DARWIN80_ENV)
236 VATTR_INIT(attrs);
237 VATTR_SET_SUPPORTED(attrs, va_data_size);
238 VATTR_SET_ACTIVE(attrs, va_data_size);
239#elif defined(UKERNEL)
240 attrs->va_mask = ATTR_SIZE;
241#elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV)
242 attrs->va_mask = AT_SIZE;
243#else
244 VATTR_NULL(attrs);
245#endif
246 attrs->va_size = len;
247 ObtainWriteLock(&tvc->lock, 136);
248 tvc->f.states |= CCreating;
249 ReleaseWriteLock(&tvc->lock);
250#if defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV)
251#if defined(AFS_SGI64_ENV)
252 code =
253 afs_setattr(VNODE_TO_FIRST_BHV((vnode_t *) tvc),
254 attrs, 0, acred);
255#else
256 code = afs_setattr(tvc, attrs, 0, acred);
257#endif /* AFS_SGI64_ENV */
258#else /* SUN5 || SGI */
259 code = afs_setattr(tvc, attrs, acred);
260#endif /* SUN5 || SGI */
261 ObtainWriteLock(&tvc->lock, 137);
262 tvc->f.states &= ~CCreating;
263 ReleaseWriteLock(&tvc->lock);
264 if (code) {
265 afs_PutVCache(tvc);
266 goto done;
267 }
268 }
269 *avcp = tvc;
270
271 } else {
272 /* Directory entry already exists, but we cannot fetch the
273 * fid it points to. */
274 code = EIO;
275 }
276 /* make sure vrefCount bumped only if code == 0 */
277 goto done;
278 }
279 }
280
281 /* if we create the file, we don't do any access checks, since
282 * that's how O_CREAT is supposed to work */
283 if (adp->f.states & CForeign) {
284 origCBs = afs_allCBs;
285 origZaps = afs_allZaps;
286 } else {
287 origCBs = afs_evenCBs; /* if changes, we don't really have a callback */
288 origZaps = afs_evenZaps; /* number of even numbered vnodes discarded */
289 }
290 InStatus.Mask = AFS_SETMODTIME | AFS_SETMODE | AFS_SETGROUP;
291 InStatus.ClientModTime = osi_Time();
292 InStatus.Group = (afs_int32) afs_cr_gid(acred);
293 if (AFS_NFSXLATORREQ(acred)) {
294 /*
295 * XXX The following is mainly used to fix a bug in the HP-UX
296 * nfs client where they create files with mode of 0 without
297 * doing any setattr later on to fix it. * XXX
298 */
299#if defined(AFS_AIX_ENV)
300 if (attrs->va_mode != -1) {
301#else
302#if defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV)
303 if (attrs->va_mask & AT_MODE) {
304#else
305 if (attrs->va_mode != ((unsigned short)-1)) {
306#endif
307#endif
308 if (!attrs->va_mode)
309 attrs->va_mode = 0x1b6; /* XXX default mode: rw-rw-rw XXX */
310 }
311 }
312
313 if (!AFS_IS_DISCONNECTED) {
314 /* If not disconnected, connect to the server.*/
315
316 InStatus.UnixModeBits = attrs->va_mode & 0xffff; /* only care about protection bits */
317 do {
318 tc = afs_Conn(&adp->f.fid, treq, SHARED_LOCK, &rxconn);
319 if (tc) {
320 hostp = tc->parent->srvr->server; /* remember for callback processing */
321 now = osi_Time();
322 XSTATS_START_TIME(AFS_STATS_FS_RPCIDX_CREATEFILE);
323 RX_AFS_GUNLOCK();
324 code =
325 RXAFS_CreateFile(rxconn, (struct AFSFid *)&adp->f.fid.Fid,
326 aname, &InStatus, (struct AFSFid *)
327 &newFid.Fid, OutFidStatus, OutDirStatus,
328 &CallBack, &tsync);
329 RX_AFS_GLOCK();
330 XSTATS_END_TIME;
331 CallBack.ExpirationTime += now;
332 } else
333 code = -1;
334 } while (afs_Analyze
335 (tc, rxconn, code, &adp->f.fid, treq, AFS_STATS_FS_RPCIDX_CREATEFILE,
336 SHARED_LOCK, NULL));
337
338 if ((code == EEXIST || code == UAEEXIST) &&
339#ifdef AFS_SGI64_ENV
340 !(flags & VEXCL)
341#else /* AFS_SGI64_ENV */
342 aexcl == NONEXCL
343#endif
344 ) {
345 /* if we get an EEXIST in nonexcl mode, just do a lookup */
346 if (tdc) {
347 ReleaseSharedLock(&tdc->lock);
348 afs_PutDCache(tdc);
349 }
350 ReleaseWriteLock(&adp->lock);
351
352
353#if defined(AFS_SGI64_ENV)
354 code = afs_lookup(VNODE_TO_FIRST_BHV((vnode_t *) adp), aname, avcp,
355 NULL, 0, NULL, acred);
356#elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV)
357 code = afs_lookup(adp, aname, avcp, NULL, 0, NULL, acred);
358#elif defined(UKERNEL)
359 code = afs_lookup(adp, aname, avcp, acred, 0);
360#elif !defined(AFS_DARWIN_ENV)
361 code = afs_lookup(adp, aname, avcp, acred);
362#endif
363 goto done;
364 }
365
366 if (code) {
367 if (code < 0) {
368 afs_StaleVCache(adp);
369 }
370 ReleaseWriteLock(&adp->lock);
371 if (tdc) {
372 ReleaseSharedLock(&tdc->lock);
373 afs_PutDCache(tdc);
374 }
375 goto done;
376 }
377
378 } else {
379 /* Generate a fake FID for disconnected mode. */
380 newFid.Cell = adp->f.fid.Cell;
381 newFid.Fid.Volume = adp->f.fid.Fid.Volume;
382 afs_GenFakeFid(&newFid, VREG, 1);
383 } /* if (!AFS_IS_DISCON_RW) */
384
385 /* otherwise, we should see if we can make the change to the dir locally */
386 if (tdc)
387 UpgradeSToWLock(&tdc->lock, 631);
388 if (AFS_IS_DISCON_RW || afs_LocalHero(adp, tdc, OutDirStatus, 1)) {
389 /* we can do it locally */
390 ObtainWriteLock(&afs_xdcache, 291);
391 code = afs_dir_Create(tdc, aname, &newFid.Fid);
392 ReleaseWriteLock(&afs_xdcache);
393 if (code) {
394 ZapDCE(tdc);
395 DZap(tdc);
396 }
397 }
398 if (tdc) {
399 ReleaseWriteLock(&tdc->lock);
400 afs_PutDCache(tdc);
401 }
402 if (AFS_IS_DISCON_RW)
403 adp->f.m.LinkCount++;
404
405 newFid.Cell = adp->f.fid.Cell;
406 newFid.Fid.Volume = adp->f.fid.Fid.Volume;
407 ReleaseWriteLock(&adp->lock);
408 volp = afs_FindVolume(&newFid, READ_LOCK);
409
410 /* New tricky optimistic callback handling algorithm for file creation works
411 * as follows. We create the file essentially with no locks set at all. File
412 * server may thus handle operations from others cache managers as well as from
413 * this very own cache manager that reference the file in question before
414 * we managed to create the cache entry. However, if anyone else changes
415 * any of the status information for a file, we'll see afs_evenCBs increase
416 * (files always have even fids). If someone on this workstation manages
417 * to do something to the file, they'll end up having to create a cache
418 * entry for the new file. Either we'll find it once we've got the afs_xvcache
419 * lock set, or it was also *deleted* the vnode before we got there, in which case
420 * we will find evenZaps has changed, too. Thus, we only assume we have the right
421 * status information if no callbacks or vnode removals have occurred to even
422 * numbered files from the time the call started until the time that we got the xvcache
423 * lock set. Of course, this also assumes that any call that modifies a file first
424 * gets a write lock on the file's vnode, but if that weren't true, the whole cache manager
425 * would fail, since no call would be able to update the local vnode status after modifying
426 * a file on a file server. */
427 ObtainWriteLock(&afs_xvcache, 138);
428 if (adp->f.states & CForeign)
429 finalZaps = afs_allZaps; /* do this before calling newvcache */
430 else
431 finalZaps = afs_evenZaps; /* do this before calling newvcache */
432 /* don't need to call RemoveVCB, since only path leaving a callback is the
433 * one where we pass through afs_NewVCache. Can't have queued a VCB unless
434 * we created and freed an entry between file creation time and here, and the
435 * freeing of the vnode will change evenZaps. Don't need to update the VLRU
436 * queue, since the find will only succeed in the event of a create race, and
437 * then the vcache will be at the front of the VLRU queue anyway... */
438 if (!(tvc = afs_FindVCache(&newFid, 0, DO_STATS))) {
439 tvc = afs_NewVCache(&newFid, hostp);
440 if (tvc) {
441 int finalCBs;
442 ObtainWriteLock(&tvc->lock, 139);
443
444 ObtainWriteLock(&afs_xcbhash, 489);
445 finalCBs = afs_evenCBs;
446 /* add the callback in */
447 if (adp->f.states & CForeign) {
448 tvc->f.states |= CForeign;
449 finalCBs = afs_allCBs;
450 }
451 if (origCBs == finalCBs && origZaps == finalZaps) {
452 tvc->f.states |= CStatd; /* we've fake entire thing, so don't stat */
453 tvc->f.states &= ~CBulkFetching;
454 if (!AFS_IS_DISCON_RW) {
455 tvc->cbExpires = CallBack.ExpirationTime;
456 afs_QueueCallback(tvc, CBHash(CallBack.ExpirationTime), volp);
457 }
458 } else {
459 afs_StaleVCacheFlags(tvc,
460 AFS_STALEVC_CBLOCKED | AFS_STALEVC_CLEARCB,
461 CUnique);
462 }
463 ReleaseWriteLock(&afs_xcbhash);
464 if (AFS_IS_DISCON_RW) {
465 afs_DisconAddDirty(tvc, VDisconCreate, 0);
466 afs_GenDisconStatus(adp, tvc, &newFid, attrs, treq, VREG);
467 } else {
468 afs_ProcessFS(tvc, OutFidStatus, treq);
469 }
470
471 tvc->f.parent.vnode = adp->f.fid.Fid.Vnode;
472 tvc->f.parent.unique = adp->f.fid.Fid.Unique;
473 ReleaseWriteLock(&tvc->lock);
474 *avcp = tvc;
475 code = 0;
476
477 } else {
478 /* Cannot create a new vcache. */
479 code = EIO;
480 }
481 } else {
482 /* otherwise cache entry already exists, someone else must
483 * have created it. Comments used to say: "don't need write
484 * lock to *clear* these flags" but we should do it anyway.
485 * Code used to clear stat bit and callback, but I don't see
486 * the point -- we didn't have a create race, somebody else just
487 * snuck into NewVCache before we got here, probably a racing
488 * lookup.
489 */
490 *avcp = tvc;
491 code = 0;
492 }
493 ReleaseWriteLock(&afs_xvcache);
494
495 done:
496 AFS_DISCON_UNLOCK();
497
498 done3:
499 if (volp)
500 afs_PutVolume(volp, READ_LOCK);
501
502 if (code == 0) {
503 if (afs_mariner)
504 afs_AddMarinerName(aname, *avcp);
505 /* return the new status in vattr */
506 afs_CopyOutAttrs(*avcp, attrs);
507 if (afs_mariner)
508 afs_MarinerLog("store$Creating", *avcp);
509 }
510
511 afs_PutFakeStat(&fakestate);
512 code = afs_CheckCode(code, treq, 20);
513 afs_DestroyReq(treq);
514
515 done2:
516 osi_FreeSmallSpace(OutFidStatus);
517 osi_FreeSmallSpace(OutDirStatus);
518 return code;
519}
520
521
522/*
523 * Check to see if we can track the change locally: requires that
524 * we have sufficiently recent info in data cache. If so, we
525 * know the new DataVersion number, and place it correctly in both the
526 * data and stat cache entries. This routine returns 1 if we should
527 * do the operation locally, and 0 otherwise.
528 *
529 * This routine must be called with the stat cache entry write-locked,
530 * and dcache entry write-locked.
531 */
532int
533afs_LocalHero(struct vcache *avc, struct dcache *adc,
534 AFSFetchStatus * astat, int aincr)
535{
536 afs_int32 ok;
537 afs_hyper_t avers;
538
539 AFS_STATCNT(afs_LocalHero);
540 hset64(avers, astat->dataVersionHigh, astat->DataVersion);
541 /* avers *is* the version number now, no matter what */
542
543 if (adc) {
544 /* does what's in the dcache *now* match what's in the vcache *now*,
545 * and do we have a valid callback? if not, our local copy is not "ok" */
546 ok = (hsame(avc->f.m.DataVersion, adc->f.versionNo) && avc->callback
547 && (avc->f.states & CStatd) && avc->cbExpires >= osi_Time());
548 } else {
549 ok = 0;
550 }
551 if (ok) {
552 /* check that the DV on the server is what we expect it to be */
553 afs_hyper_t newDV;
554 hset(newDV, adc->f.versionNo);
555 hadd32(newDV, aincr);
556 if (!hsame(avers, newDV)) {
557 ok = 0;
558 }
559 }
560#if defined(AFS_SGI_ENV)
561 osi_Assert(avc->v.v_type == VDIR);
562#endif
563 /* The bulk status code used the length as a sequence number. */
564 /* Don't update the vcache entry unless the stats are current. */
565 if (avc->f.states & CStatd) {
566 afs_SetDataVersion(avc, &avers);
567#ifdef AFS_64BIT_CLIENT
568 FillInt64(avc->f.m.Length, astat->Length_hi, astat->Length);
569#else /* AFS_64BIT_CLIENT */
570 avc->f.m.Length = astat->Length;
571#endif /* AFS_64BIT_CLIENT */
572 avc->f.m.Date = astat->ClientModTime;
573 }
574 if (ok) {
575 /* we've been tracking things correctly */
576 adc->dflags |= DFEntryMod;
577 adc->f.versionNo = avers;
578 return 1;
579 } else {
580 if (adc) {
581 ZapDCE(adc);
582 DZap(adc);
583 }
584 if (avc->f.states & CStatd) {
585 osi_dnlc_purgedp(avc);
586 }
587 return 0;
588 }
589}