Import Upstream version 1.8.5
[hcoop/debian/openafs.git] / src / afs / afs_osi_vm.c
1 /*
2 * Copyright 2000, International Business Machines Corporation and others.
3 * All Rights Reserved.
4 *
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
8 */
9
10 #include <afsconfig.h>
11 #include "afs/param.h"
12
13
14 #include "afs/sysincludes.h" /* Standard vendor system headers */
15 #include "afsincludes.h" /* Afs-based standard headers */
16 #include "afs/afs_stats.h" /* afs statistics */
17 #ifdef AFS_AIX_ENV
18 #include <sys/adspace.h> /* for vm_att(), vm_det() */
19 #endif
20
21 int
22 osi_Active(struct vcache *avc)
23 {
24 AFS_STATCNT(osi_Active);
25 #if defined(AFS_AIX_ENV) || defined(AFS_SUN5_ENV) || (AFS_LINUX20_ENV) || defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
26 if ((avc->opens > 0) || (avc->f.states & CMAPPED))
27 return 1; /* XXX: Warning, verify this XXX */
28 #elif defined(AFS_SGI_ENV)
29 if ((avc->opens > 0) || AFS_VN_MAPPED(AFSTOV(avc)))
30 return 1;
31 #else
32 if (avc->opens > 0 || (AFSTOV(avc)->v_flag & VTEXT))
33 return (1);
34 #endif
35 return 0;
36 }
37
38 /* this call, unlike osi_FlushText, is supposed to discard caches that may
39 contain invalid information if a file is written remotely, but that may
40 contain valid information that needs to be written back if the file is
41 being written locally. It doesn't subsume osi_FlushText, since the latter
42 function may be needed to flush caches that are invalidated by local writes.
43
44 avc->pvnLock is already held, avc->lock is guaranteed not to be held (by
45 us, of course).
46 */
47 void
48 osi_FlushPages(struct vcache *avc, afs_ucred_t *credp)
49 {
50 afs_hyper_t origDV;
51 #if defined(AFS_CACHE_BYPASS)
52 /* The optimization to check DV under read lock below is identical a
53 * change in CITI cache bypass work. The problem CITI found in 1999
54 * was that this code and background daemon doing prefetching competed
55 * for the vcache entry shared lock. It's not clear to me from the
56 * tech report, but it looks like CITI fixed the general prefetch code
57 * path as a bonus when experimenting on prefetch for cache bypass, see
58 * citi-tr-01-3.
59 */
60 #endif
61 if (vType(avc) == VDIR) {
62 /* not applicable to directories; they're never mapped or stored in
63 * pages */
64 return;
65 }
66 ObtainReadLock(&avc->lock);
67 /* If we've already purged this version, or if we're the ones
68 * writing this version, don't flush it (could lose the
69 * data we're writing). */
70 if ((hcmp((avc->f.m.DataVersion), (avc->mapDV)) <= 0)
71 || ((avc->execsOrWriters > 0) && afs_DirtyPages(avc))) {
72 ReleaseReadLock(&avc->lock);
73 return;
74 }
75 ReleaseReadLock(&avc->lock);
76 ObtainWriteLock(&avc->lock, 10);
77 /* Check again */
78 if ((hcmp((avc->f.m.DataVersion), (avc->mapDV)) <= 0)
79 || ((avc->execsOrWriters > 0) && afs_DirtyPages(avc))) {
80 ReleaseWriteLock(&avc->lock);
81 return;
82 }
83
84 /* At this point, you might think that we can skip trying to flush pages
85 * if mapDV is zero, since a file with a zero DV will not have any data in
86 * it. However, some platforms (notably Linux 2.6.22+) will keep a page
87 * full of zeroes around for an empty file. So play it safe and always
88 * flush pages. */
89
90 AFS_STATCNT(osi_FlushPages);
91 hset(origDV, avc->f.m.DataVersion);
92 afs_Trace3(afs_iclSetp, CM_TRACE_FLUSHPAGES, ICL_TYPE_POINTER, avc,
93 ICL_TYPE_INT32, origDV.low, ICL_TYPE_INT32, avc->f.m.Length);
94
95 ReleaseWriteLock(&avc->lock);
96 #ifndef AFS_FBSD70_ENV
97 AFS_GUNLOCK();
98 #endif
99 osi_VM_FlushPages(avc, credp);
100 #ifndef AFS_FBSD70_ENV
101 AFS_GLOCK();
102 #endif
103 ObtainWriteLock(&avc->lock, 88);
104
105 /* do this last, and to original version, since stores may occur
106 * while executing above PUTPAGE call */
107 hset(avc->mapDV, origDV);
108 ReleaseWriteLock(&avc->lock);
109 }
110
111 #ifdef AFS_TEXT_ENV
112
113 /* This call is supposed to flush all caches that might be invalidated
114 * by either a local write operation or a write operation done on
115 * another client. This call may be called repeatedly on the same
116 * version of a file, even while a file is being written, so it
117 * shouldn't do anything that would discard newly written data before
118 * it is written to the file system. */
119
120 void
121 osi_FlushText_really(struct vcache *vp)
122 {
123 afs_hyper_t fdv; /* version before which we'll flush */
124
125 AFS_STATCNT(osi_FlushText);
126 /* see if we've already flushed this data version */
127 if (hcmp(vp->f.m.DataVersion, vp->flushDV) <= 0)
128 return;
129
130 ObtainWriteLock(&afs_ftf, 317);
131 hset(fdv, vp->f.m.DataVersion);
132
133 /* why this disgusting code below?
134 * xuntext, called by xrele, doesn't notice when it is called
135 * with a freed text object. Sun continually calls xrele or xuntext
136 * without any locking, as long as VTEXT is set on the
137 * corresponding vnode.
138 * But, if the text object is locked when you check the VTEXT
139 * flag, several processes can wait in xuntext, waiting for the
140 * text lock; when the second one finally enters xuntext's
141 * critical region, the text object is already free, but the check
142 * was already done by xuntext's caller.
143 * Even worse, it turns out that xalloc locks the text object
144 * before reading or stating a file via the vnode layer. Thus, we
145 * could end up in getdcache, being asked to bring in a new
146 * version of a file, but the corresponding text object could be
147 * locked. We can't flush the text object without causing
148 * deadlock, so now we just don't try to lock the text object
149 * unless it is guaranteed to work. And we try to flush the text
150 * when we need to a bit more often at the vnode layer. Sun
151 * really blew the vm-cache flushing interface.
152 */
153
154 #if defined (AFS_HPUX_ENV)
155 if (vp->v.v_flag & VTEXT) {
156 xrele(vp);
157
158 if (vp->v.v_flag & VTEXT) { /* still has a text object? */
159 ReleaseWriteLock(&afs_ftf);
160 return;
161 }
162 }
163 #endif
164
165 /* next do the stuff that need not check for deadlock problems */
166 mpurge(vp);
167
168 /* finally, record that we've done it */
169 hset(vp->flushDV, fdv);
170 ReleaseWriteLock(&afs_ftf);
171
172 }
173 #endif /* AFS_TEXT_ENV */
174
175 /* ? is it moderately likely that there are dirty VM pages associated with
176 * this vnode?
177 *
178 * Prereqs: avc must be write-locked
179 *
180 * System Dependencies: - *must* support each type of system for which
181 * memory mapped files are supported, even if all
182 * it does is return TRUE;
183 *
184 * NB: this routine should err on the side of caution for ProcessFS to work
185 * correctly (or at least, not to introduce worse bugs than already exist)
186 */
187 #ifdef notdef
188 int
189 osi_VMDirty_p(struct vcache *avc)
190 {
191 int dirtyPages;
192
193 if (avc->execsOrWriters <= 0)
194 return 0; /* can't be many dirty pages here, I guess */
195
196 #if defined (AFS_AIX32_ENV)
197 #ifdef notdef
198 /* because of the level of hardware involvment with VM and all the
199 * warnings about "This routine must be called at VMM interrupt
200 * level", I thought it would be safest to disable interrupts while
201 * looking at the software page fault table. */
202
203 /* convert vm handle into index into array: I think that stoinio is
204 * always zero... Look into this XXX */
205 #define VMHASH(handle) ( \
206 ( ((handle) & ~vmker.stoinio) \
207 ^ ((((handle) & ~vmker.stoinio) & vmker.stoimask) << vmker.stoihash) \
208 ) & 0x000fffff)
209
210 if (avc->segid) {
211 unsigned int pagef, pri, index, next;
212
213 index = VMHASH(avc->segid);
214 if (scb_valid(index)) { /* could almost be an ASSERT */
215
216 pri = disable_ints();
217 for (pagef = scb_sidlist(index); pagef >= 0; pagef = next) {
218 next = pft_sidfwd(pagef);
219 if (pft_modbit(pagef)) { /* has page frame been modified? */
220 enable_ints(pri);
221 return 1;
222 }
223 }
224 enable_ints(pri);
225 }
226 }
227 #undef VMHASH
228 #endif
229 #endif /* AFS_AIX32_ENV */
230
231 #if defined (AFS_SUN5_ENV)
232 if (avc->f.states & CMAPPED) {
233 struct page *pg;
234 for (pg = AFSTOV(avc)->v_s.v_Pages; pg; pg = pg->p_vpnext) {
235 if (pg->p_mod) {
236 return 1;
237 }
238 }
239 }
240 #endif
241 return 0;
242 }
243 #endif /* notdef */
244
245
246 /*
247 * Solaris osi_ReleaseVM should not drop and re-obtain the vcache entry lock.
248 * This leads to bad races when osi_ReleaseVM() is called from
249 * afs_InvalidateAllSegments().
250
251 * We can do this because Solaris osi_VM_Truncate() doesn't care whether the
252 * vcache entry lock is held or not.
253 *
254 * For other platforms, in some cases osi_VM_Truncate() doesn't care, but
255 * there may be cases where it does care. If so, it would be good to fix
256 * them so they don't care. Until then, we assume the worst.
257 *
258 * Locking: the vcache entry lock is held. It is dropped and re-obtained.
259 */
260 void
261 osi_ReleaseVM(struct vcache *avc, afs_ucred_t *acred)
262 {
263 #ifdef AFS_SUN5_ENV
264 AFS_GUNLOCK();
265 osi_VM_Truncate(avc, 0, acred);
266 AFS_GLOCK();
267 #else
268 ReleaseWriteLock(&avc->lock);
269 AFS_GUNLOCK();
270 osi_VM_Truncate(avc, 0, acred);
271 AFS_GLOCK();
272 ObtainWriteLock(&avc->lock, 80);
273 #endif
274 }