Home | History | Annotate | Line # | Download | only in cmirrord
      1  1.1  haad /*	$NetBSD: local.c,v 1.1.1.1 2009/12/02 00:27:10 haad Exp $	*/
      2  1.1  haad 
      3  1.1  haad /*
      4  1.1  haad  * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
      5  1.1  haad  *
      6  1.1  haad  * This copyrighted material is made available to anyone wishing to use,
      7  1.1  haad  * modify, copy, or redistribute it subject to the terms and conditions
      8  1.1  haad  * of the GNU Lesser General Public License v.2.1.
      9  1.1  haad  *
     10  1.1  haad  * You should have received a copy of the GNU Lesser General Public License
     11  1.1  haad  * along with this program; if not, write to the Free Software Foundation,
     12  1.1  haad  * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
     13  1.1  haad  */
     14  1.1  haad #include <unistd.h>
     15  1.1  haad #include <errno.h>
     16  1.1  haad #include <string.h>
     17  1.1  haad #include <stdint.h>
     18  1.1  haad #include <sys/types.h>
     19  1.1  haad #include <sys/socket.h>
     20  1.1  haad #include <sys/poll.h>
     21  1.1  haad #include <linux/connector.h>
     22  1.1  haad #include <linux/netlink.h>
     23  1.1  haad 
     24  1.1  haad #include "dm-log-userspace.h"
     25  1.1  haad #include "functions.h"
     26  1.1  haad #include "cluster.h"
     27  1.1  haad #include "common.h"
     28  1.1  haad #include "logging.h"
     29  1.1  haad #include "link_mon.h"
     30  1.1  haad #include "local.h"
     31  1.1  haad 
     32  1.1  haad #ifndef CN_IDX_DM
     33  1.1  haad #warning Kernel should be at least 2.6.31
     34  1.1  haad #define CN_IDX_DM                       0x7     /* Device Mapper */
     35  1.1  haad #define CN_VAL_DM_USERSPACE_LOG         0x1
     36  1.1  haad #endif
     37  1.1  haad 
     38  1.1  haad static int cn_fd;  /* Connector (netlink) socket fd */
     39  1.1  haad static char recv_buf[2048];
     40  1.1  haad static char send_buf[2048];
     41  1.1  haad 
     42  1.1  haad 
     43  1.1  haad /* FIXME: merge this function with kernel_send_helper */
     44  1.1  haad static int kernel_ack(uint32_t seq, int error)
     45  1.1  haad {
     46  1.1  haad 	int r;
     47  1.1  haad 	struct nlmsghdr *nlh = (struct nlmsghdr *)send_buf;
     48  1.1  haad 	struct cn_msg *msg = NLMSG_DATA(nlh);
     49  1.1  haad 
     50  1.1  haad 	if (error < 0) {
     51  1.1  haad 		LOG_ERROR("Programmer error: error codes must be positive");
     52  1.1  haad 		return -EINVAL;
     53  1.1  haad 	}
     54  1.1  haad 
     55  1.1  haad 	memset(send_buf, 0, sizeof(send_buf));
     56  1.1  haad 
     57  1.1  haad 	nlh->nlmsg_seq = 0;
     58  1.1  haad 	nlh->nlmsg_pid = getpid();
     59  1.1  haad 	nlh->nlmsg_type = NLMSG_DONE;
     60  1.1  haad 	nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct cn_msg));
     61  1.1  haad 	nlh->nlmsg_flags = 0;
     62  1.1  haad 
     63  1.1  haad 	msg->len = 0;
     64  1.1  haad 	msg->id.idx = CN_IDX_DM;
     65  1.1  haad 	msg->id.val = CN_VAL_DM_USERSPACE_LOG;
     66  1.1  haad 	msg->seq = seq;
     67  1.1  haad 	msg->ack = error;
     68  1.1  haad 
     69  1.1  haad 	r = send(cn_fd, nlh, NLMSG_LENGTH(sizeof(struct cn_msg)), 0);
     70  1.1  haad 	/* FIXME: do better error processing */
     71  1.1  haad 	if (r <= 0)
     72  1.1  haad 		return -EBADE;
     73  1.1  haad 
     74  1.1  haad 	return 0;
     75  1.1  haad }
     76  1.1  haad 
     77  1.1  haad 
     78  1.1  haad /*
     79  1.1  haad  * kernel_recv
     80  1.1  haad  * @rq: the newly allocated request from kernel
     81  1.1  haad  *
     82  1.1  haad  * Read requests from the kernel and allocate space for the new request.
     83  1.1  haad  * If there is no request from the kernel, *rq is NULL.
     84  1.1  haad  *
     85  1.1  haad  * This function is not thread safe due to returned stack pointer.  In fact,
     86  1.1  haad  * the returned pointer must not be in-use when this function is called again.
     87  1.1  haad  *
     88  1.1  haad  * Returns: 0 on success, -EXXX on error
     89  1.1  haad  */
     90  1.1  haad static int kernel_recv(struct clog_request **rq)
     91  1.1  haad {
     92  1.1  haad 	int r = 0;
     93  1.1  haad 	int len;
     94  1.1  haad 	struct cn_msg *msg;
     95  1.1  haad 	struct dm_ulog_request *u_rq;
     96  1.1  haad 
     97  1.1  haad 	*rq = NULL;
     98  1.1  haad 	memset(recv_buf, 0, sizeof(recv_buf));
     99  1.1  haad 
    100  1.1  haad 	len = recv(cn_fd, recv_buf, sizeof(recv_buf), 0);
    101  1.1  haad 	if (len < 0) {
    102  1.1  haad 		LOG_ERROR("Failed to recv message from kernel");
    103  1.1  haad 		r = -errno;
    104  1.1  haad 		goto fail;
    105  1.1  haad 	}
    106  1.1  haad 
    107  1.1  haad 	switch (((struct nlmsghdr *)recv_buf)->nlmsg_type) {
    108  1.1  haad 	case NLMSG_ERROR:
    109  1.1  haad 		LOG_ERROR("Unable to recv message from kernel: NLMSG_ERROR");
    110  1.1  haad 		r = -EBADE;
    111  1.1  haad 		goto fail;
    112  1.1  haad 	case NLMSG_DONE:
    113  1.1  haad 		msg = (struct cn_msg *)NLMSG_DATA((struct nlmsghdr *)recv_buf);
    114  1.1  haad 		len -= sizeof(struct nlmsghdr);
    115  1.1  haad 
    116  1.1  haad 		if (len < sizeof(struct cn_msg)) {
    117  1.1  haad 			LOG_ERROR("Incomplete request from kernel received");
    118  1.1  haad 			r = -EBADE;
    119  1.1  haad 			goto fail;
    120  1.1  haad 		}
    121  1.1  haad 
    122  1.1  haad 		if (msg->len > DM_ULOG_REQUEST_SIZE) {
    123  1.1  haad 			LOG_ERROR("Not enough space to receive kernel request (%d/%d)",
    124  1.1  haad 				  msg->len, DM_ULOG_REQUEST_SIZE);
    125  1.1  haad 			r = -EBADE;
    126  1.1  haad 			goto fail;
    127  1.1  haad 		}
    128  1.1  haad 
    129  1.1  haad 		if (!msg->len)
    130  1.1  haad 			LOG_ERROR("Zero length message received");
    131  1.1  haad 
    132  1.1  haad 		len -= sizeof(struct cn_msg);
    133  1.1  haad 
    134  1.1  haad 		if (len < msg->len)
    135  1.1  haad 			LOG_ERROR("len = %d, msg->len = %d", len, msg->len);
    136  1.1  haad 
    137  1.1  haad 		msg->data[msg->len] = '\0'; /* Cleaner way to ensure this? */
    138  1.1  haad 		u_rq = (struct dm_ulog_request *)msg->data;
    139  1.1  haad 
    140  1.1  haad 		if (!u_rq->request_type) {
    141  1.1  haad 			LOG_DBG("Bad transmission, requesting resend [%u]",
    142  1.1  haad 				msg->seq);
    143  1.1  haad 			r = -EAGAIN;
    144  1.1  haad 
    145  1.1  haad 			if (kernel_ack(msg->seq, EAGAIN)) {
    146  1.1  haad 				LOG_ERROR("Failed to NACK kernel transmission [%u]",
    147  1.1  haad 					  msg->seq);
    148  1.1  haad 				r = -EBADE;
    149  1.1  haad 			}
    150  1.1  haad 		}
    151  1.1  haad 
    152  1.1  haad 		/*
    153  1.1  haad 		 * Now we've got sizeof(struct cn_msg) + sizeof(struct nlmsghdr)
    154  1.1  haad 		 * worth of space that precede the request structure from the
    155  1.1  haad 		 * kernel.  Since that space isn't going to be used again, we
    156  1.1  haad 		 * can take it for our purposes; rather than allocating a whole
    157  1.1  haad 		 * new structure and doing a memcpy.
    158  1.1  haad 		 *
    159  1.1  haad 		 * We should really make sure 'clog_request' doesn't grow
    160  1.1  haad 		 * beyond what is available to us, but we need only check it
    161  1.1  haad 		 * once... perhaps at compile time?
    162  1.1  haad 		 */
    163  1.1  haad //		*rq = container_of(u_rq, struct clog_request, u_rq);
    164  1.1  haad 		*rq = (void *)u_rq -
    165  1.1  haad 			(sizeof(struct clog_request) -
    166  1.1  haad 			 sizeof(struct dm_ulog_request));
    167  1.1  haad 
    168  1.1  haad 		/* Clear the wrapper container fields */
    169  1.1  haad 		memset(*rq, 0, (void *)u_rq - (void *)(*rq));
    170  1.1  haad 		break;
    171  1.1  haad 	default:
    172  1.1  haad 		LOG_ERROR("Unknown nlmsg_type");
    173  1.1  haad 		r = -EBADE;
    174  1.1  haad 	}
    175  1.1  haad 
    176  1.1  haad fail:
    177  1.1  haad 	if (r)
    178  1.1  haad 		*rq = NULL;
    179  1.1  haad 
    180  1.1  haad 	return (r == -EAGAIN) ? 0 : r;
    181  1.1  haad }
    182  1.1  haad 
    183  1.1  haad static int kernel_send_helper(void *data, int out_size)
    184  1.1  haad {
    185  1.1  haad 	int r;
    186  1.1  haad 	struct nlmsghdr *nlh;
    187  1.1  haad 	struct cn_msg *msg;
    188  1.1  haad 
    189  1.1  haad 	memset(send_buf, 0, sizeof(send_buf));
    190  1.1  haad 
    191  1.1  haad 	nlh = (struct nlmsghdr *)send_buf;
    192  1.1  haad 	nlh->nlmsg_seq = 0;  /* FIXME: Is this used? */
    193  1.1  haad 	nlh->nlmsg_pid = getpid();
    194  1.1  haad 	nlh->nlmsg_type = NLMSG_DONE;
    195  1.1  haad 	nlh->nlmsg_len = NLMSG_LENGTH(out_size + sizeof(struct cn_msg));
    196  1.1  haad 	nlh->nlmsg_flags = 0;
    197  1.1  haad 
    198  1.1  haad 	msg = NLMSG_DATA(nlh);
    199  1.1  haad 	memcpy(msg->data, data, out_size);
    200  1.1  haad 	msg->len = out_size;
    201  1.1  haad 	msg->id.idx = CN_IDX_DM;
    202  1.1  haad 	msg->id.val = CN_VAL_DM_USERSPACE_LOG;
    203  1.1  haad 	msg->seq = 0;
    204  1.1  haad 
    205  1.1  haad 	r = send(cn_fd, nlh, NLMSG_LENGTH(out_size + sizeof(struct cn_msg)), 0);
    206  1.1  haad 	/* FIXME: do better error processing */
    207  1.1  haad 	if (r <= 0)
    208  1.1  haad 		return -EBADE;
    209  1.1  haad 
    210  1.1  haad 	return 0;
    211  1.1  haad }
    212  1.1  haad 
    213  1.1  haad /*
    214  1.1  haad  * do_local_work
    215  1.1  haad  *
    216  1.1  haad  * Any processing errors are placed in the 'rq'
    217  1.1  haad  * structure to be reported back to the kernel.
    218  1.1  haad  * It may be pointless for this function to
    219  1.1  haad  * return an int.
    220  1.1  haad  *
    221  1.1  haad  * Returns: 0 on success, -EXXX on failure
    222  1.1  haad  */
    223  1.1  haad static int do_local_work(void *data)
    224  1.1  haad {
    225  1.1  haad 	int r;
    226  1.1  haad 	struct clog_request *rq;
    227  1.1  haad 	struct dm_ulog_request *u_rq = NULL;
    228  1.1  haad 
    229  1.1  haad 	r = kernel_recv(&rq);
    230  1.1  haad 	if (r)
    231  1.1  haad 		return r;
    232  1.1  haad 
    233  1.1  haad 	if (!rq)
    234  1.1  haad 		return 0;
    235  1.1  haad 
    236  1.1  haad 	u_rq = &rq->u_rq;
    237  1.1  haad 	LOG_DBG("[%s]  Request from kernel received: [%s/%u]",
    238  1.1  haad 		SHORT_UUID(u_rq->uuid), RQ_TYPE(u_rq->request_type),
    239  1.1  haad 		u_rq->seq);
    240  1.1  haad 	switch (u_rq->request_type) {
    241  1.1  haad 	case DM_ULOG_CTR:
    242  1.1  haad 	case DM_ULOG_DTR:
    243  1.1  haad 	case DM_ULOG_GET_REGION_SIZE:
    244  1.1  haad 	case DM_ULOG_IN_SYNC:
    245  1.1  haad 	case DM_ULOG_GET_SYNC_COUNT:
    246  1.1  haad 	case DM_ULOG_STATUS_INFO:
    247  1.1  haad 	case DM_ULOG_STATUS_TABLE:
    248  1.1  haad 	case DM_ULOG_PRESUSPEND:
    249  1.1  haad 		/* We do not specify ourselves as server here */
    250  1.1  haad 		r = do_request(rq, 0);
    251  1.1  haad 		if (r)
    252  1.1  haad 			LOG_DBG("Returning failed request to kernel [%s]",
    253  1.1  haad 				RQ_TYPE(u_rq->request_type));
    254  1.1  haad 		r = kernel_send(u_rq);
    255  1.1  haad 		if (r)
    256  1.1  haad 			LOG_ERROR("Failed to respond to kernel [%s]",
    257  1.1  haad 				  RQ_TYPE(u_rq->request_type));
    258  1.1  haad 
    259  1.1  haad 		break;
    260  1.1  haad 	case DM_ULOG_RESUME:
    261  1.1  haad 		/*
    262  1.1  haad 		 * Resume is a special case that requires a local
    263  1.1  haad 		 * component to join the CPG, and a cluster component
    264  1.1  haad 		 * to handle the request.
    265  1.1  haad 		 */
    266  1.1  haad 		r = local_resume(u_rq);
    267  1.1  haad 		if (r) {
    268  1.1  haad 			LOG_DBG("Returning failed request to kernel [%s]",
    269  1.1  haad 				RQ_TYPE(u_rq->request_type));
    270  1.1  haad 			r = kernel_send(u_rq);
    271  1.1  haad 			if (r)
    272  1.1  haad 				LOG_ERROR("Failed to respond to kernel [%s]",
    273  1.1  haad 					  RQ_TYPE(u_rq->request_type));
    274  1.1  haad 			break;
    275  1.1  haad 		}
    276  1.1  haad 		/* ELSE, fall through */
    277  1.1  haad 	case DM_ULOG_IS_CLEAN:
    278  1.1  haad 	case DM_ULOG_FLUSH:
    279  1.1  haad 	case DM_ULOG_MARK_REGION:
    280  1.1  haad 	case DM_ULOG_GET_RESYNC_WORK:
    281  1.1  haad 	case DM_ULOG_SET_REGION_SYNC:
    282  1.1  haad 	case DM_ULOG_IS_REMOTE_RECOVERING:
    283  1.1  haad 	case DM_ULOG_POSTSUSPEND:
    284  1.1  haad 		r = cluster_send(rq);
    285  1.1  haad 		if (r) {
    286  1.1  haad 			u_rq->data_size = 0;
    287  1.1  haad 			u_rq->error = r;
    288  1.1  haad 			kernel_send(u_rq);
    289  1.1  haad 		}
    290  1.1  haad 
    291  1.1  haad 		break;
    292  1.1  haad 	case DM_ULOG_CLEAR_REGION:
    293  1.1  haad 		r = kernel_ack(u_rq->seq, 0);
    294  1.1  haad 
    295  1.1  haad 		r = cluster_send(rq);
    296  1.1  haad 		if (r) {
    297  1.1  haad 			/*
    298  1.1  haad 			 * FIXME: store error for delivery on flush
    299  1.1  haad 			 *        This would allow us to optimize MARK_REGION
    300  1.1  haad 			 *        too.
    301  1.1  haad 			 */
    302  1.1  haad 		}
    303  1.1  haad 
    304  1.1  haad 		break;
    305  1.1  haad 	default:
    306  1.1  haad 		LOG_ERROR("Invalid log request received (%u), ignoring.",
    307  1.1  haad 			  u_rq->request_type);
    308  1.1  haad 
    309  1.1  haad 		return 0;
    310  1.1  haad 	}
    311  1.1  haad 
    312  1.1  haad 	if (r && !u_rq->error)
    313  1.1  haad 		u_rq->error = r;
    314  1.1  haad 
    315  1.1  haad 	return r;
    316  1.1  haad }
    317  1.1  haad 
    318  1.1  haad /*
    319  1.1  haad  * kernel_send
    320  1.1  haad  * @u_rq: result to pass back to kernel
    321  1.1  haad  *
    322  1.1  haad  * This function returns the u_rq structure
    323  1.1  haad  * (containing the results) to the kernel.
    324  1.1  haad  * It then frees the structure.
    325  1.1  haad  *
    326  1.1  haad  * WARNING: should the structure be freed if
    327  1.1  haad  * there is an error?  I vote 'yes'.  If the
    328  1.1  haad  * kernel doesn't get the response, it should
    329  1.1  haad  * resend the request.
    330  1.1  haad  *
    331  1.1  haad  * Returns: 0 on success, -EXXX on failure
    332  1.1  haad  */
    333  1.1  haad int kernel_send(struct dm_ulog_request *u_rq)
    334  1.1  haad {
    335  1.1  haad 	int r;
    336  1.1  haad 	int size;
    337  1.1  haad 
    338  1.1  haad 	if (!u_rq)
    339  1.1  haad 		return -EINVAL;
    340  1.1  haad 
    341  1.1  haad 	size = sizeof(struct dm_ulog_request) + u_rq->data_size;
    342  1.1  haad 
    343  1.1  haad 	if (!u_rq->data_size && !u_rq->error) {
    344  1.1  haad 		/* An ACK is all that is needed */
    345  1.1  haad 
    346  1.1  haad 		/* FIXME: add ACK code */
    347  1.1  haad 	} else if (size > DM_ULOG_REQUEST_SIZE) {
    348  1.1  haad 		/*
    349  1.1  haad 		 * If we gotten here, we've already overrun
    350  1.1  haad 		 * our allotted space somewhere.
    351  1.1  haad 		 *
    352  1.1  haad 		 * We must do something, because the kernel
    353  1.1  haad 		 * is waiting for a response.
    354  1.1  haad 		 */
    355  1.1  haad 		LOG_ERROR("Not enough space to respond to server");
    356  1.1  haad 		u_rq->error = -ENOSPC;
    357  1.1  haad 		size = sizeof(struct dm_ulog_request);
    358  1.1  haad 	}
    359  1.1  haad 
    360  1.1  haad 	r = kernel_send_helper(u_rq, size);
    361  1.1  haad 	if (r)
    362  1.1  haad 		LOG_ERROR("Failed to send msg to kernel.");
    363  1.1  haad 
    364  1.1  haad 	return r;
    365  1.1  haad }
    366  1.1  haad 
    367  1.1  haad /*
    368  1.1  haad  * init_local
    369  1.1  haad  *
    370  1.1  haad  * Initialize kernel communication socket (netlink)
    371  1.1  haad  *
    372  1.1  haad  * Returns: 0 on success, values from common.h on failure
    373  1.1  haad  */
    374  1.1  haad int init_local(void)
    375  1.1  haad {
    376  1.1  haad 	int r = 0;
    377  1.1  haad 	int opt;
    378  1.1  haad 	struct sockaddr_nl addr;
    379  1.1  haad 
    380  1.1  haad 	cn_fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
    381  1.1  haad 	if (cn_fd < 0)
    382  1.1  haad 		return EXIT_KERNEL_SOCKET;
    383  1.1  haad 
    384  1.1  haad 	/* memset to fix valgrind complaint */
    385  1.1  haad 	memset(&addr, 0, sizeof(struct sockaddr_nl));
    386  1.1  haad 
    387  1.1  haad 	addr.nl_family = AF_NETLINK;
    388  1.1  haad 	addr.nl_groups = CN_IDX_DM;
    389  1.1  haad 	addr.nl_pid = 0;
    390  1.1  haad 
    391  1.1  haad 	r = bind(cn_fd, (struct sockaddr *) &addr, sizeof(addr));
    392  1.1  haad 	if (r < 0) {
    393  1.1  haad 		close(cn_fd);
    394  1.1  haad 		return EXIT_KERNEL_BIND;
    395  1.1  haad 	}
    396  1.1  haad 
    397  1.1  haad 	opt = addr.nl_groups;
    398  1.1  haad 	r = setsockopt(cn_fd, 270, NETLINK_ADD_MEMBERSHIP, &opt, sizeof(opt));
    399  1.1  haad 	if (r) {
    400  1.1  haad 		close(cn_fd);
    401  1.1  haad 		return EXIT_KERNEL_SETSOCKOPT;
    402  1.1  haad 	}
    403  1.1  haad 
    404  1.1  haad 	/*
    405  1.1  haad 	r = fcntl(cn_fd, F_SETFL, FNDELAY);
    406  1.1  haad 	*/
    407  1.1  haad 
    408  1.1  haad 	links_register(cn_fd, "local", do_local_work, NULL);
    409  1.1  haad 
    410  1.1  haad 	return 0;
    411  1.1  haad }
    412  1.1  haad 
    413  1.1  haad /*
    414  1.1  haad  * cleanup_local
    415  1.1  haad  *
    416  1.1  haad  * Clean up before exiting
    417  1.1  haad  */
    418  1.1  haad void cleanup_local(void)
    419  1.1  haad {
    420  1.1  haad 	links_unregister(cn_fd);
    421  1.1  haad 	close(cn_fd);
    422  1.1  haad }
    423