local.c revision 1.1 1 1.1 haad /* $NetBSD: local.c,v 1.1 2009/12/02 00:27:10 haad Exp $ */
2 1.1 haad
3 1.1 haad /*
4 1.1 haad * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
5 1.1 haad *
6 1.1 haad * This copyrighted material is made available to anyone wishing to use,
7 1.1 haad * modify, copy, or redistribute it subject to the terms and conditions
8 1.1 haad * of the GNU Lesser General Public License v.2.1.
9 1.1 haad *
10 1.1 haad * You should have received a copy of the GNU Lesser General Public License
11 1.1 haad * along with this program; if not, write to the Free Software Foundation,
12 1.1 haad * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
13 1.1 haad */
14 1.1 haad #include <unistd.h>
15 1.1 haad #include <errno.h>
16 1.1 haad #include <string.h>
17 1.1 haad #include <stdint.h>
18 1.1 haad #include <sys/types.h>
19 1.1 haad #include <sys/socket.h>
20 1.1 haad #include <sys/poll.h>
21 1.1 haad #include <linux/connector.h>
22 1.1 haad #include <linux/netlink.h>
23 1.1 haad
24 1.1 haad #include "dm-log-userspace.h"
25 1.1 haad #include "functions.h"
26 1.1 haad #include "cluster.h"
27 1.1 haad #include "common.h"
28 1.1 haad #include "logging.h"
29 1.1 haad #include "link_mon.h"
30 1.1 haad #include "local.h"
31 1.1 haad
32 1.1 haad #ifndef CN_IDX_DM
33 1.1 haad #warning Kernel should be at least 2.6.31
34 1.1 haad #define CN_IDX_DM 0x7 /* Device Mapper */
35 1.1 haad #define CN_VAL_DM_USERSPACE_LOG 0x1
36 1.1 haad #endif
37 1.1 haad
38 1.1 haad static int cn_fd; /* Connector (netlink) socket fd */
39 1.1 haad static char recv_buf[2048];
40 1.1 haad static char send_buf[2048];
41 1.1 haad
42 1.1 haad
43 1.1 haad /* FIXME: merge this function with kernel_send_helper */
44 1.1 haad static int kernel_ack(uint32_t seq, int error)
45 1.1 haad {
46 1.1 haad int r;
47 1.1 haad struct nlmsghdr *nlh = (struct nlmsghdr *)send_buf;
48 1.1 haad struct cn_msg *msg = NLMSG_DATA(nlh);
49 1.1 haad
50 1.1 haad if (error < 0) {
51 1.1 haad LOG_ERROR("Programmer error: error codes must be positive");
52 1.1 haad return -EINVAL;
53 1.1 haad }
54 1.1 haad
55 1.1 haad memset(send_buf, 0, sizeof(send_buf));
56 1.1 haad
57 1.1 haad nlh->nlmsg_seq = 0;
58 1.1 haad nlh->nlmsg_pid = getpid();
59 1.1 haad nlh->nlmsg_type = NLMSG_DONE;
60 1.1 haad nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct cn_msg));
61 1.1 haad nlh->nlmsg_flags = 0;
62 1.1 haad
63 1.1 haad msg->len = 0;
64 1.1 haad msg->id.idx = CN_IDX_DM;
65 1.1 haad msg->id.val = CN_VAL_DM_USERSPACE_LOG;
66 1.1 haad msg->seq = seq;
67 1.1 haad msg->ack = error;
68 1.1 haad
69 1.1 haad r = send(cn_fd, nlh, NLMSG_LENGTH(sizeof(struct cn_msg)), 0);
70 1.1 haad /* FIXME: do better error processing */
71 1.1 haad if (r <= 0)
72 1.1 haad return -EBADE;
73 1.1 haad
74 1.1 haad return 0;
75 1.1 haad }
76 1.1 haad
77 1.1 haad
78 1.1 haad /*
79 1.1 haad * kernel_recv
80 1.1 haad * @rq: the newly allocated request from kernel
81 1.1 haad *
82 1.1 haad * Read requests from the kernel and allocate space for the new request.
83 1.1 haad * If there is no request from the kernel, *rq is NULL.
84 1.1 haad *
85 1.1 haad * This function is not thread safe due to returned stack pointer. In fact,
86 1.1 haad * the returned pointer must not be in-use when this function is called again.
87 1.1 haad *
88 1.1 haad * Returns: 0 on success, -EXXX on error
89 1.1 haad */
90 1.1 haad static int kernel_recv(struct clog_request **rq)
91 1.1 haad {
92 1.1 haad int r = 0;
93 1.1 haad int len;
94 1.1 haad struct cn_msg *msg;
95 1.1 haad struct dm_ulog_request *u_rq;
96 1.1 haad
97 1.1 haad *rq = NULL;
98 1.1 haad memset(recv_buf, 0, sizeof(recv_buf));
99 1.1 haad
100 1.1 haad len = recv(cn_fd, recv_buf, sizeof(recv_buf), 0);
101 1.1 haad if (len < 0) {
102 1.1 haad LOG_ERROR("Failed to recv message from kernel");
103 1.1 haad r = -errno;
104 1.1 haad goto fail;
105 1.1 haad }
106 1.1 haad
107 1.1 haad switch (((struct nlmsghdr *)recv_buf)->nlmsg_type) {
108 1.1 haad case NLMSG_ERROR:
109 1.1 haad LOG_ERROR("Unable to recv message from kernel: NLMSG_ERROR");
110 1.1 haad r = -EBADE;
111 1.1 haad goto fail;
112 1.1 haad case NLMSG_DONE:
113 1.1 haad msg = (struct cn_msg *)NLMSG_DATA((struct nlmsghdr *)recv_buf);
114 1.1 haad len -= sizeof(struct nlmsghdr);
115 1.1 haad
116 1.1 haad if (len < sizeof(struct cn_msg)) {
117 1.1 haad LOG_ERROR("Incomplete request from kernel received");
118 1.1 haad r = -EBADE;
119 1.1 haad goto fail;
120 1.1 haad }
121 1.1 haad
122 1.1 haad if (msg->len > DM_ULOG_REQUEST_SIZE) {
123 1.1 haad LOG_ERROR("Not enough space to receive kernel request (%d/%d)",
124 1.1 haad msg->len, DM_ULOG_REQUEST_SIZE);
125 1.1 haad r = -EBADE;
126 1.1 haad goto fail;
127 1.1 haad }
128 1.1 haad
129 1.1 haad if (!msg->len)
130 1.1 haad LOG_ERROR("Zero length message received");
131 1.1 haad
132 1.1 haad len -= sizeof(struct cn_msg);
133 1.1 haad
134 1.1 haad if (len < msg->len)
135 1.1 haad LOG_ERROR("len = %d, msg->len = %d", len, msg->len);
136 1.1 haad
137 1.1 haad msg->data[msg->len] = '\0'; /* Cleaner way to ensure this? */
138 1.1 haad u_rq = (struct dm_ulog_request *)msg->data;
139 1.1 haad
140 1.1 haad if (!u_rq->request_type) {
141 1.1 haad LOG_DBG("Bad transmission, requesting resend [%u]",
142 1.1 haad msg->seq);
143 1.1 haad r = -EAGAIN;
144 1.1 haad
145 1.1 haad if (kernel_ack(msg->seq, EAGAIN)) {
146 1.1 haad LOG_ERROR("Failed to NACK kernel transmission [%u]",
147 1.1 haad msg->seq);
148 1.1 haad r = -EBADE;
149 1.1 haad }
150 1.1 haad }
151 1.1 haad
152 1.1 haad /*
153 1.1 haad * Now we've got sizeof(struct cn_msg) + sizeof(struct nlmsghdr)
154 1.1 haad * worth of space that precede the request structure from the
155 1.1 haad * kernel. Since that space isn't going to be used again, we
156 1.1 haad * can take it for our purposes; rather than allocating a whole
157 1.1 haad * new structure and doing a memcpy.
158 1.1 haad *
159 1.1 haad * We should really make sure 'clog_request' doesn't grow
160 1.1 haad * beyond what is available to us, but we need only check it
161 1.1 haad * once... perhaps at compile time?
162 1.1 haad */
163 1.1 haad // *rq = container_of(u_rq, struct clog_request, u_rq);
164 1.1 haad *rq = (void *)u_rq -
165 1.1 haad (sizeof(struct clog_request) -
166 1.1 haad sizeof(struct dm_ulog_request));
167 1.1 haad
168 1.1 haad /* Clear the wrapper container fields */
169 1.1 haad memset(*rq, 0, (void *)u_rq - (void *)(*rq));
170 1.1 haad break;
171 1.1 haad default:
172 1.1 haad LOG_ERROR("Unknown nlmsg_type");
173 1.1 haad r = -EBADE;
174 1.1 haad }
175 1.1 haad
176 1.1 haad fail:
177 1.1 haad if (r)
178 1.1 haad *rq = NULL;
179 1.1 haad
180 1.1 haad return (r == -EAGAIN) ? 0 : r;
181 1.1 haad }
182 1.1 haad
183 1.1 haad static int kernel_send_helper(void *data, int out_size)
184 1.1 haad {
185 1.1 haad int r;
186 1.1 haad struct nlmsghdr *nlh;
187 1.1 haad struct cn_msg *msg;
188 1.1 haad
189 1.1 haad memset(send_buf, 0, sizeof(send_buf));
190 1.1 haad
191 1.1 haad nlh = (struct nlmsghdr *)send_buf;
192 1.1 haad nlh->nlmsg_seq = 0; /* FIXME: Is this used? */
193 1.1 haad nlh->nlmsg_pid = getpid();
194 1.1 haad nlh->nlmsg_type = NLMSG_DONE;
195 1.1 haad nlh->nlmsg_len = NLMSG_LENGTH(out_size + sizeof(struct cn_msg));
196 1.1 haad nlh->nlmsg_flags = 0;
197 1.1 haad
198 1.1 haad msg = NLMSG_DATA(nlh);
199 1.1 haad memcpy(msg->data, data, out_size);
200 1.1 haad msg->len = out_size;
201 1.1 haad msg->id.idx = CN_IDX_DM;
202 1.1 haad msg->id.val = CN_VAL_DM_USERSPACE_LOG;
203 1.1 haad msg->seq = 0;
204 1.1 haad
205 1.1 haad r = send(cn_fd, nlh, NLMSG_LENGTH(out_size + sizeof(struct cn_msg)), 0);
206 1.1 haad /* FIXME: do better error processing */
207 1.1 haad if (r <= 0)
208 1.1 haad return -EBADE;
209 1.1 haad
210 1.1 haad return 0;
211 1.1 haad }
212 1.1 haad
213 1.1 haad /*
214 1.1 haad * do_local_work
215 1.1 haad *
216 1.1 haad * Any processing errors are placed in the 'rq'
217 1.1 haad * structure to be reported back to the kernel.
218 1.1 haad * It may be pointless for this function to
219 1.1 haad * return an int.
220 1.1 haad *
221 1.1 haad * Returns: 0 on success, -EXXX on failure
222 1.1 haad */
223 1.1 haad static int do_local_work(void *data)
224 1.1 haad {
225 1.1 haad int r;
226 1.1 haad struct clog_request *rq;
227 1.1 haad struct dm_ulog_request *u_rq = NULL;
228 1.1 haad
229 1.1 haad r = kernel_recv(&rq);
230 1.1 haad if (r)
231 1.1 haad return r;
232 1.1 haad
233 1.1 haad if (!rq)
234 1.1 haad return 0;
235 1.1 haad
236 1.1 haad u_rq = &rq->u_rq;
237 1.1 haad LOG_DBG("[%s] Request from kernel received: [%s/%u]",
238 1.1 haad SHORT_UUID(u_rq->uuid), RQ_TYPE(u_rq->request_type),
239 1.1 haad u_rq->seq);
240 1.1 haad switch (u_rq->request_type) {
241 1.1 haad case DM_ULOG_CTR:
242 1.1 haad case DM_ULOG_DTR:
243 1.1 haad case DM_ULOG_GET_REGION_SIZE:
244 1.1 haad case DM_ULOG_IN_SYNC:
245 1.1 haad case DM_ULOG_GET_SYNC_COUNT:
246 1.1 haad case DM_ULOG_STATUS_INFO:
247 1.1 haad case DM_ULOG_STATUS_TABLE:
248 1.1 haad case DM_ULOG_PRESUSPEND:
249 1.1 haad /* We do not specify ourselves as server here */
250 1.1 haad r = do_request(rq, 0);
251 1.1 haad if (r)
252 1.1 haad LOG_DBG("Returning failed request to kernel [%s]",
253 1.1 haad RQ_TYPE(u_rq->request_type));
254 1.1 haad r = kernel_send(u_rq);
255 1.1 haad if (r)
256 1.1 haad LOG_ERROR("Failed to respond to kernel [%s]",
257 1.1 haad RQ_TYPE(u_rq->request_type));
258 1.1 haad
259 1.1 haad break;
260 1.1 haad case DM_ULOG_RESUME:
261 1.1 haad /*
262 1.1 haad * Resume is a special case that requires a local
263 1.1 haad * component to join the CPG, and a cluster component
264 1.1 haad * to handle the request.
265 1.1 haad */
266 1.1 haad r = local_resume(u_rq);
267 1.1 haad if (r) {
268 1.1 haad LOG_DBG("Returning failed request to kernel [%s]",
269 1.1 haad RQ_TYPE(u_rq->request_type));
270 1.1 haad r = kernel_send(u_rq);
271 1.1 haad if (r)
272 1.1 haad LOG_ERROR("Failed to respond to kernel [%s]",
273 1.1 haad RQ_TYPE(u_rq->request_type));
274 1.1 haad break;
275 1.1 haad }
276 1.1 haad /* ELSE, fall through */
277 1.1 haad case DM_ULOG_IS_CLEAN:
278 1.1 haad case DM_ULOG_FLUSH:
279 1.1 haad case DM_ULOG_MARK_REGION:
280 1.1 haad case DM_ULOG_GET_RESYNC_WORK:
281 1.1 haad case DM_ULOG_SET_REGION_SYNC:
282 1.1 haad case DM_ULOG_IS_REMOTE_RECOVERING:
283 1.1 haad case DM_ULOG_POSTSUSPEND:
284 1.1 haad r = cluster_send(rq);
285 1.1 haad if (r) {
286 1.1 haad u_rq->data_size = 0;
287 1.1 haad u_rq->error = r;
288 1.1 haad kernel_send(u_rq);
289 1.1 haad }
290 1.1 haad
291 1.1 haad break;
292 1.1 haad case DM_ULOG_CLEAR_REGION:
293 1.1 haad r = kernel_ack(u_rq->seq, 0);
294 1.1 haad
295 1.1 haad r = cluster_send(rq);
296 1.1 haad if (r) {
297 1.1 haad /*
298 1.1 haad * FIXME: store error for delivery on flush
299 1.1 haad * This would allow us to optimize MARK_REGION
300 1.1 haad * too.
301 1.1 haad */
302 1.1 haad }
303 1.1 haad
304 1.1 haad break;
305 1.1 haad default:
306 1.1 haad LOG_ERROR("Invalid log request received (%u), ignoring.",
307 1.1 haad u_rq->request_type);
308 1.1 haad
309 1.1 haad return 0;
310 1.1 haad }
311 1.1 haad
312 1.1 haad if (r && !u_rq->error)
313 1.1 haad u_rq->error = r;
314 1.1 haad
315 1.1 haad return r;
316 1.1 haad }
317 1.1 haad
318 1.1 haad /*
319 1.1 haad * kernel_send
320 1.1 haad * @u_rq: result to pass back to kernel
321 1.1 haad *
322 1.1 haad * This function returns the u_rq structure
323 1.1 haad * (containing the results) to the kernel.
324 1.1 haad * It then frees the structure.
325 1.1 haad *
326 1.1 haad * WARNING: should the structure be freed if
327 1.1 haad * there is an error? I vote 'yes'. If the
328 1.1 haad * kernel doesn't get the response, it should
329 1.1 haad * resend the request.
330 1.1 haad *
331 1.1 haad * Returns: 0 on success, -EXXX on failure
332 1.1 haad */
333 1.1 haad int kernel_send(struct dm_ulog_request *u_rq)
334 1.1 haad {
335 1.1 haad int r;
336 1.1 haad int size;
337 1.1 haad
338 1.1 haad if (!u_rq)
339 1.1 haad return -EINVAL;
340 1.1 haad
341 1.1 haad size = sizeof(struct dm_ulog_request) + u_rq->data_size;
342 1.1 haad
343 1.1 haad if (!u_rq->data_size && !u_rq->error) {
344 1.1 haad /* An ACK is all that is needed */
345 1.1 haad
346 1.1 haad /* FIXME: add ACK code */
347 1.1 haad } else if (size > DM_ULOG_REQUEST_SIZE) {
348 1.1 haad /*
349 1.1 haad * If we gotten here, we've already overrun
350 1.1 haad * our allotted space somewhere.
351 1.1 haad *
352 1.1 haad * We must do something, because the kernel
353 1.1 haad * is waiting for a response.
354 1.1 haad */
355 1.1 haad LOG_ERROR("Not enough space to respond to server");
356 1.1 haad u_rq->error = -ENOSPC;
357 1.1 haad size = sizeof(struct dm_ulog_request);
358 1.1 haad }
359 1.1 haad
360 1.1 haad r = kernel_send_helper(u_rq, size);
361 1.1 haad if (r)
362 1.1 haad LOG_ERROR("Failed to send msg to kernel.");
363 1.1 haad
364 1.1 haad return r;
365 1.1 haad }
366 1.1 haad
367 1.1 haad /*
368 1.1 haad * init_local
369 1.1 haad *
370 1.1 haad * Initialize kernel communication socket (netlink)
371 1.1 haad *
372 1.1 haad * Returns: 0 on success, values from common.h on failure
373 1.1 haad */
374 1.1 haad int init_local(void)
375 1.1 haad {
376 1.1 haad int r = 0;
377 1.1 haad int opt;
378 1.1 haad struct sockaddr_nl addr;
379 1.1 haad
380 1.1 haad cn_fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
381 1.1 haad if (cn_fd < 0)
382 1.1 haad return EXIT_KERNEL_SOCKET;
383 1.1 haad
384 1.1 haad /* memset to fix valgrind complaint */
385 1.1 haad memset(&addr, 0, sizeof(struct sockaddr_nl));
386 1.1 haad
387 1.1 haad addr.nl_family = AF_NETLINK;
388 1.1 haad addr.nl_groups = CN_IDX_DM;
389 1.1 haad addr.nl_pid = 0;
390 1.1 haad
391 1.1 haad r = bind(cn_fd, (struct sockaddr *) &addr, sizeof(addr));
392 1.1 haad if (r < 0) {
393 1.1 haad close(cn_fd);
394 1.1 haad return EXIT_KERNEL_BIND;
395 1.1 haad }
396 1.1 haad
397 1.1 haad opt = addr.nl_groups;
398 1.1 haad r = setsockopt(cn_fd, 270, NETLINK_ADD_MEMBERSHIP, &opt, sizeof(opt));
399 1.1 haad if (r) {
400 1.1 haad close(cn_fd);
401 1.1 haad return EXIT_KERNEL_SETSOCKOPT;
402 1.1 haad }
403 1.1 haad
404 1.1 haad /*
405 1.1 haad r = fcntl(cn_fd, F_SETFL, FNDELAY);
406 1.1 haad */
407 1.1 haad
408 1.1 haad links_register(cn_fd, "local", do_local_work, NULL);
409 1.1 haad
410 1.1 haad return 0;
411 1.1 haad }
412 1.1 haad
413 1.1 haad /*
414 1.1 haad * cleanup_local
415 1.1 haad *
416 1.1 haad * Clean up before exiting
417 1.1 haad */
418 1.1 haad void cleanup_local(void)
419 1.1 haad {
420 1.1 haad links_unregister(cn_fd);
421 1.1 haad close(cn_fd);
422 1.1 haad }
423