Import Upstream version 1.8.5
[hcoop/debian/openafs.git] / src / vol / fssync-client.c
1 /*
2 * Copyright 2000, International Business Machines Corporation and others.
3 * All Rights Reserved.
4 *
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
8 *
9 * Portions Copyright (c) 2006,2008 Sine Nomine Associates
10 */
11
12 /*
13 System: VICE-TWO
14 Module: fssync.c
15 Institution: The Information Technology Center, Carnegie-Mellon University
16
17 */
18
19 #ifndef AFS_PTHREAD_ENV
20 #define USUAL_PRIORITY (LWP_MAX_PRIORITY - 2)
21
22 /*
23 * stack size increased from 8K because the HP machine seemed to have trouble
24 * with the smaller stack
25 */
26 #define USUAL_STACK_SIZE (24 * 1024)
27 #endif /* !AFS_PTHREAD_ENV */
28
29 /*
30 fssync-client.c
31 File server synchronization with external volume utilities.
32 client-side implementation
33 */
34
35 #include <afsconfig.h>
36 #include <afs/param.h>
37
38 #include <roken.h>
39
40 #include <afs/opr.h>
41 #ifdef AFS_PTHREAD_ENV
42 # include <opr/lock.h>
43 #endif
44
45 #include <afs/afsint.h>
46 #include <rx/rx_queue.h>
47 #include <afs/errors.h>
48 #include <afs/afssyscalls.h>
49
50 #include "nfs.h"
51 #include "daemon_com.h"
52 #include "fssync.h"
53 #include "lwp.h"
54 #include "lock.h"
55 #include "ihandle.h"
56 #include "vnode.h"
57 #include "volume.h"
58 #include "partition.h"
59 #include "common.h"
60
61 #ifdef FSSYNC_BUILD_CLIENT
62
63 static SYNC_client_state fssync_state =
64 { -1, /* file descriptor */
65 FSSYNC_ENDPOINT_DECL, /* server endpoint */
66 FSYNC_PROTO_VERSION, /* protocol version */
67 5, /* connect retry limit */
68 120, /* hard timeout */
69 "FSSYNC", /* protocol name string */
70 };
71
72 #ifdef AFS_PTHREAD_ENV
73 static pthread_mutex_t vol_fsync_mutex;
74 static volatile int vol_fsync_mutex_init = 0;
75 #define VFSYNC_LOCK opr_mutex_enter(&vol_fsync_mutex)
76 #define VFSYNC_UNLOCK opr_mutex_exit(&vol_fsync_mutex)
77 #else
78 #define VFSYNC_LOCK
79 #define VFSYNC_UNLOCK
80 #endif
81
82 int
83 FSYNC_clientInit(void)
84 {
85 #ifdef AFS_PTHREAD_ENV
86 /* this is safe since it gets called with VOL_LOCK held, or before we go multithreaded */
87 if (!vol_fsync_mutex_init) {
88 opr_mutex_init(&vol_fsync_mutex);
89 vol_fsync_mutex_init = 1;
90 }
91 #endif
92 return SYNC_connect(&fssync_state);
93 }
94
95 void
96 FSYNC_clientFinis(void)
97 {
98 SYNC_closeChannel(&fssync_state);
99 }
100
101 int
102 FSYNC_clientChildProcReconnect(void)
103 {
104 return SYNC_reconnect(&fssync_state);
105 }
106
107 /* fsync client interface */
108 afs_int32
109 FSYNC_askfs(SYNC_command * com, SYNC_response * res)
110 {
111 afs_int32 code;
112
113 VFSYNC_LOCK;
114 code = SYNC_ask(&fssync_state, com, res);
115 VFSYNC_UNLOCK;
116
117 switch (code) {
118 case SYNC_OK:
119 case SYNC_FAILED:
120 break;
121 case SYNC_COM_ERROR:
122 case SYNC_BAD_COMMAND:
123 Log("FSYNC_askfs: internal FSSYNC protocol error %d\n", code);
124 break;
125 case SYNC_DENIED:
126 Log("FSYNC_askfs: FSSYNC request denied for reason=%d\n", res->hdr.reason);
127 break;
128 default:
129 Log("FSYNC_askfs: unknown protocol response %d\n", code);
130 break;
131 }
132 return code;
133 }
134
135
136 /**
137 * FSSYNC volume operations client interface.
138 *
139 * @param[in] volume volume id
140 * @param[in] partName partition name string
141 * @param[in] com FSSYNC command code
142 * @param[in] reason FSSYNC reason sub-code
143 * @param[out] res response message
144 *
145 * @return operation status
146 * @retval SYNC_OK success
147 */
148 afs_int32
149 FSYNC_GenericOp(void * ext_hdr, size_t ext_len,
150 int command, int reason,
151 SYNC_response * res_in)
152 {
153 SYNC_response res_l, *res;
154 SYNC_command com;
155
156 if (res_in) {
157 res = res_in;
158 } else {
159 res = &res_l;
160 res_l.payload.buf = NULL;
161 res_l.payload.len = 0;
162 }
163
164 memset(&com, 0, sizeof(com));
165
166 com.hdr.programType = programType;
167 com.hdr.command = command;
168 com.hdr.reason = reason;
169 com.hdr.command_len = sizeof(com.hdr) + ext_len;
170 com.payload.buf = ext_hdr;
171 com.payload.len = ext_len;
172
173 return FSYNC_askfs(&com, res);
174 }
175
176 afs_int32
177 FSYNC_VolOp(VolumeId volume, char * partition,
178 int command, int reason,
179 SYNC_response * res)
180 {
181 FSSYNC_VolOp_hdr vcom;
182
183 memset(&vcom, 0, sizeof(vcom));
184
185 vcom.volume = volume;
186 if (partition)
187 strlcpy(vcom.partName, partition, sizeof(vcom.partName));
188
189 return FSYNC_GenericOp(&vcom, sizeof(vcom), command, reason, res);
190 }
191
192 /**
193 * verify that the fileserver still thinks we have a volume checked out.
194 *
195 * In DAFS, a non-fileserver program accesses a volume by checking it out from
196 * the fileserver (FSYNC_VOL_OFF or FSYNC_VOL_NEEDVOLUME), and then locks the
197 * volume. There is a possibility that the fileserver crashes or restarts for
198 * some reason between volume checkout and locking; if this happens, the
199 * fileserver could attach the volume before we had a chance to lock it. This
200 * function serves to detect if this has happened; it must be called after
201 * volume checkout and locking to make sure the fileserver still thinks we
202 * have the volume. (If it doesn't, we should try to check it out again.)
203 *
204 * @param[in] volume volume ID
205 * @param[in] partition partition name string
206 * @param[in] command the command that was used to checkout the volume
207 * @param[in] reason the reason code used to checkout the volume
208 *
209 * @return operation status
210 * @retval SYNC_OK the fileserver could not have attached the volume since
211 * it was checked out (either it thinks it is still checked
212 * out, or it doesn't know about the volume)
213 * @retval SYNC_DENIED fileserver may have restarted since checkout; checkout
214 * should be reattempted
215 * @retval SYNC_COM_ERROR internal/fatal error
216 */
217 afs_int32
218 FSYNC_VerifyCheckout(VolumeId volume, char * partition,
219 afs_int32 command, afs_int32 reason)
220 {
221 SYNC_response res;
222 FSSYNC_VolOp_info vop;
223 afs_int32 code;
224 afs_int32 pid;
225
226 res.hdr.response_len = sizeof(res.hdr);
227 res.payload.buf = &vop;
228 res.payload.len = sizeof(vop);
229
230 code = FSYNC_VolOp(volume, partition, FSYNC_VOL_QUERY_VOP, FSYNC_WHATEVER, &res);
231 if (code != SYNC_OK) {
232 if (res.hdr.reason == FSYNC_NO_PENDING_VOL_OP) {
233 Log("FSYNC_VerifyCheckout: fileserver claims no vop for vol %lu "
234 "part %s; fileserver may have restarted since checkout\n",
235 afs_printable_uint32_lu(volume), partition);
236 return SYNC_DENIED;
237 }
238
239 if (res.hdr.reason == FSYNC_UNKNOWN_VOLID ||
240 res.hdr.reason == FSYNC_WRONG_PART) {
241 /* if the fileserver does not know about this volume on this
242 * partition, there's no way it could have attached it, so we're
243 * fine */
244 return SYNC_OK;
245 }
246
247 Log("FSYNC_VerifyCheckout: FSYNC_VOL_QUERY_VOP failed for vol %lu "
248 "part %s with code %ld reason %ld\n",
249 afs_printable_uint32_lu(volume), partition,
250 afs_printable_int32_ld(code),
251 afs_printable_int32_ld(res.hdr.reason));
252 return SYNC_COM_ERROR;
253 }
254
255 pid = getpid();
256
257 /* Check if the current vol op is us. Checking pid is probably enough, but
258 * be a little bit paranoid. We could also probably check tid, but I'm not
259 * completely confident of its reliability on all platforms (on pthread
260 * envs, we coerce a pthread_t to an afs_int32, which is not guaranteed
261 * to mean anything significant). */
262
263 if (vop.com.programType == programType && vop.com.pid == pid &&
264 vop.com.command == command && vop.com.reason == reason) {
265
266 /* looks like the current pending vol op is the same one as the one
267 * with which we checked it out. success. */
268 return SYNC_OK;
269 }
270
271 Log("FSYNC_VerifyCheckout: vop for vol %lu part %s does not match "
272 "expectations (got pt %ld pid %ld cmd %ld reason %ld, but expected "
273 "pt %ld pid %ld cmd %ld reason %ld); fileserver may have restarted "
274 "since checkout\n", afs_printable_uint32_lu(volume), partition,
275 afs_printable_int32_ld(vop.com.programType),
276 afs_printable_int32_ld(vop.com.pid),
277 afs_printable_int32_ld(vop.com.command),
278 afs_printable_int32_ld(vop.com.reason),
279 afs_printable_int32_ld(programType),
280 afs_printable_int32_ld(pid),
281 afs_printable_int32_ld(command),
282 afs_printable_int32_ld(reason));
283
284 return SYNC_DENIED;
285 }
286
287 afs_int32
288 FSYNC_StatsOp(FSSYNC_StatsOp_hdr * scom, int command, int reason,
289 SYNC_response * res)
290 {
291 return FSYNC_GenericOp(scom, sizeof(*scom), command, reason, res);
292 }
293
294 /**
295 * query the volume group cache.
296 *
297 * @param[in] part vice partition path
298 * @param[in] volid volume id
299 * @param[out] qry query response object
300 * @param[out] res SYNC response message
301 *
302 * @return operation status
303 * @retval SYNC_OK success
304 */
305 afs_int32
306 FSYNC_VGCQuery(char * part,
307 VolumeId volid,
308 FSSYNC_VGQry_response_t * qry,
309 SYNC_response *res)
310 {
311 SYNC_response lres;
312
313 if (!res) {
314 res = &lres;
315 }
316
317 res->hdr.response_len = sizeof(res->hdr);
318 res->payload.buf = qry;
319 res->payload.len = sizeof(*qry);
320
321 return FSYNC_VolOp(volid, part, FSYNC_VG_QUERY, 0, res);
322 }
323
324 /**
325 * perform an update operation on the VGC.
326 *
327 * @param[in] parent rw volume
328 * @param[in] child volume id to add
329 * @param[in] partition name of vice partition on which this VG resides
330 * @param[in] opcode FSSYNC VG cache opcode
331 * @param[in] reason FSSYNC reason code
332 * @param[out] res SYNC response message
333 *
334 * @return operation status
335 * @retval SYNC_OK success
336 *
337 * @internal
338 */
339 static afs_int32
340 _FSYNC_VGCUpdate(char * partition,
341 VolumeId parent,
342 VolumeId child,
343 int opcode,
344 int reason,
345 SYNC_response *res)
346 {
347 FSSYNC_VGUpdate_command_t vcom;
348
349 memset(&vcom, 0, sizeof(vcom));
350
351 vcom.parent = parent;
352 vcom.child = child;
353 if (partition)
354 strlcpy(vcom.partName, partition, sizeof(vcom.partName));
355
356 return FSYNC_GenericOp(&vcom, sizeof(vcom), opcode, reason, res);
357 }
358
359 /**
360 * Add volume to volume group cache.
361 *
362 * @param[in] parent rw volume
363 * @param[in] child volume id to add
364 * @param[in] partition name of vice partition on which this VG resides
365 * @param[in] reason FSSYNC reason code
366 * @param[out] res SYNC response message
367 *
368 * @return operation status
369 * @retval SYNC_OK success
370 */
371 afs_int32
372 FSYNC_VGCAdd(char * partition,
373 VolumeId parent,
374 VolumeId child,
375 int reason,
376 SYNC_response *res)
377 {
378 return _FSYNC_VGCUpdate(partition, parent, child, FSYNC_VG_ADD, reason, res);
379 }
380
381 /**
382 * Delete volume from volume group cache.
383 *
384 * @param[in] parent rw volume
385 * @param[in] child volume id to add
386 * @param[in] partition name of vice partition on which this VG resides
387 * @param[in] reason FSSYNC reason code
388 * @param[out] res SYNC response message
389 *
390 * @return operation status
391 * @retval SYNC_OK success
392 */
393 afs_int32
394 FSYNC_VGCDel(char * partition,
395 VolumeId parent,
396 VolumeId child,
397 int reason,
398 SYNC_response *res)
399 {
400 return _FSYNC_VGCUpdate(partition, parent, child, FSYNC_VG_DEL, reason, res);
401 }
402
403 /**
404 * perform an asynchronous volume group scan.
405 *
406 * @param[in] partition vice partition string
407 * @param[in] reason FSSYNC reason code
408 *
409 * @note if partition is NULL, all vice partitions will be scanned.
410 *
411 * @return operation status
412 * @retval SYNC_OK success
413 */
414 afs_int32
415 FSYNC_VGCScan(char * partition, int reason)
416 {
417 int command;
418
419 if (partition == NULL) {
420 command = FSYNC_VG_SCAN_ALL;
421 partition = "";
422 } else {
423 command = FSYNC_VG_SCAN;
424 }
425
426 return FSYNC_VolOp(0, partition, command, reason, NULL);
427 }
428
429 #endif /* FSSYNC_BUILD_CLIENT */