Home | History | Annotate | Line # | Download | only in locking
      1 /*	$NetBSD: cluster_locking.c,v 1.1.1.3 2009/12/02 00:26:24 haad Exp $	*/
      2 
      3 /*
      4  * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
      5  * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
      6  *
      7  * This file is part of LVM2.
      8  *
      9  * This copyrighted material is made available to anyone wishing to use,
     10  * modify, copy, or redistribute it subject to the terms and conditions
     11  * of the GNU Lesser General Public License v.2.1.
     12  *
     13  * You should have received a copy of the GNU Lesser General Public License
     14  * along with this program; if not, write to the Free Software Foundation,
     15  * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
     16  */
     17 
     18 /*
     19  * Locking functions for LVM.
     20  * The main purpose of this part of the library is to serialise LVM
     21  * management operations across a cluster.
     22  */
     23 
     24 #include "lib.h"
     25 #include "clvm.h"
     26 #include "lvm-string.h"
     27 #include "locking.h"
     28 #include "locking_types.h"
     29 #include "toolcontext.h"
     30 
     31 #include <assert.h>
     32 #include <stddef.h>
     33 #include <sys/socket.h>
     34 #include <sys/un.h>
     35 #include <unistd.h>
     36 
     37 #ifndef CLUSTER_LOCKING_INTERNAL
     38 int lock_resource(struct cmd_context *cmd, const char *resource, uint32_t flags);
     39 int query_resource(const char *resource, int *mode);
     40 void locking_end(void);
     41 int locking_init(int type, struct config_tree *cf, uint32_t *flags);
     42 #endif
     43 
     44 typedef struct lvm_response {
     45 	char node[255];
     46 	char *response;
     47 	int status;
     48 	int len;
     49 } lvm_response_t;
     50 
     51 /*
     52  * This gets stuck at the start of memory we allocate so we
     53  * can sanity-check it at deallocation time
     54  */
     55 #define LVM_SIGNATURE 0x434C564D
     56 
     57 /*
     58  * NOTE: the LVMD uses the socket FD as the client ID, this means
     59  * that any client that calls fork() will inherit the context of
     60  * it's parent.
     61  */
     62 static int _clvmd_sock = -1;
     63 
     64 /* FIXME Install SIGPIPE handler? */
     65 
     66 /* Open connection to the Cluster Manager daemon */
     67 static int _open_local_sock(void)
     68 {
     69 	int local_socket;
     70 	struct sockaddr_un sockaddr;
     71 
     72 	/* Open local socket */
     73 	if ((local_socket = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) {
     74 		log_error("Local socket creation failed: %s", strerror(errno));
     75 		return -1;
     76 	}
     77 
     78 	memset(&sockaddr, 0, sizeof(sockaddr));
     79 	memcpy(sockaddr.sun_path, CLVMD_SOCKNAME, sizeof(CLVMD_SOCKNAME));
     80 
     81 	sockaddr.sun_family = AF_UNIX;
     82 
     83 	if (connect(local_socket,(struct sockaddr *) &sockaddr,
     84 		    sizeof(sockaddr))) {
     85 		int saved_errno = errno;
     86 
     87 		log_error("connect() failed on local socket: %s",
     88 			  strerror(errno));
     89 		if (close(local_socket))
     90 			stack;
     91 
     92 		errno = saved_errno;
     93 		return -1;
     94 	}
     95 
     96 	return local_socket;
     97 }
     98 
     99 /* Send a request and return the status */
    100 static int _send_request(char *inbuf, int inlen, char **retbuf)
    101 {
    102 	char outbuf[PIPE_BUF] __attribute((aligned(8)));
    103 	struct clvm_header *outheader = (struct clvm_header *) outbuf;
    104 	int len;
    105 	int off;
    106 	int buflen;
    107 	int err;
    108 
    109 	/* Send it to CLVMD */
    110  rewrite:
    111 	if ( (err = write(_clvmd_sock, inbuf, inlen)) != inlen) {
    112 		if (err == -1 && errno == EINTR)
    113 			goto rewrite;
    114 		log_error("Error writing data to clvmd: %s", strerror(errno));
    115 		return 0;
    116 	}
    117 
    118 	/* Get the response */
    119  reread:
    120 	if ((len = read(_clvmd_sock, outbuf, sizeof(struct clvm_header))) < 0) {
    121 		if (errno == EINTR)
    122 			goto reread;
    123 		log_error("Error reading data from clvmd: %s", strerror(errno));
    124 		return 0;
    125 	}
    126 
    127 	if (len == 0) {
    128 		log_error("EOF reading CLVMD");
    129 		errno = ENOTCONN;
    130 		return 0;
    131 	}
    132 
    133 	/* Allocate buffer */
    134 	buflen = len + outheader->arglen;
    135 	*retbuf = dm_malloc(buflen);
    136 	if (!*retbuf) {
    137 		errno = ENOMEM;
    138 		return 0;
    139 	}
    140 
    141 	/* Copy the header */
    142 	memcpy(*retbuf, outbuf, len);
    143 	outheader = (struct clvm_header *) *retbuf;
    144 
    145 	/* Read the returned values */
    146 	off = 1;		/* we've already read the first byte */
    147 	while (off <= outheader->arglen && len > 0) {
    148 		len = read(_clvmd_sock, outheader->args + off,
    149 			   buflen - off - offsetof(struct clvm_header, args));
    150 		if (len > 0)
    151 			off += len;
    152 	}
    153 
    154 	/* Was it an error ? */
    155 	if (outheader->status != 0) {
    156 		errno = outheader->status;
    157 
    158 		/* Only return an error here if there are no node-specific
    159 		   errors present in the message that might have more detail */
    160 		if (!(outheader->flags & CLVMD_FLAG_NODEERRS)) {
    161 			log_error("cluster request failed: %s", strerror(errno));
    162 			return 0;
    163 		}
    164 
    165 	}
    166 
    167 	return 1;
    168 }
    169 
    170 /* Build the structure header and parse-out wildcard node names */
    171 /* FIXME: Cleanup implicit casts of clvmd_cmd (int, char, uint8_t, etc). */
    172 static void _build_header(struct clvm_header *head, int clvmd_cmd, const char *node,
    173 			  int len)
    174 {
    175 	head->cmd = clvmd_cmd;
    176 	head->status = 0;
    177 	head->flags = 0;
    178 	head->clientid = 0;
    179 	head->arglen = len;
    180 
    181 	if (node) {
    182 		/*
    183 		 * Allow a couple of special node names:
    184 		 * "*" for all nodes,
    185 		 * "." for the local node only
    186 		 */
    187 		if (strcmp(node, "*") == 0) {
    188 			head->node[0] = '\0';
    189 		} else if (strcmp(node, ".") == 0) {
    190 			head->node[0] = '\0';
    191 			head->flags = CLVMD_FLAG_LOCAL;
    192 		} else
    193 			strcpy(head->node, node);
    194 	} else
    195 		head->node[0] = '\0';
    196 }
    197 
    198 /*
    199  * Send a message to a(or all) node(s) in the cluster and wait for replies
    200  */
    201 static int _cluster_request(char clvmd_cmd, const char *node, void *data, int len,
    202 			   lvm_response_t ** response, int *num)
    203 {
    204 	char outbuf[sizeof(struct clvm_header) + len + strlen(node) + 1] __attribute((aligned(8)));
    205 	char *inptr;
    206 	char *retbuf = NULL;
    207 	int status;
    208 	int i;
    209 	int num_responses = 0;
    210 	struct clvm_header *head = (struct clvm_header *) outbuf;
    211 	lvm_response_t *rarray;
    212 
    213 	*num = 0;
    214 
    215 	if (_clvmd_sock == -1)
    216 		_clvmd_sock = _open_local_sock();
    217 
    218 	if (_clvmd_sock == -1)
    219 		return 0;
    220 
    221 	_build_header(head, clvmd_cmd, node, len);
    222 	memcpy(head->node + strlen(head->node) + 1, data, len);
    223 
    224 	status = _send_request(outbuf, sizeof(struct clvm_header) +
    225 			      strlen(head->node) + len, &retbuf);
    226 	if (!status)
    227 		goto out;
    228 
    229 	/* Count the number of responses we got */
    230 	head = (struct clvm_header *) retbuf;
    231 	inptr = head->args;
    232 	while (inptr[0]) {
    233 		num_responses++;
    234 		inptr += strlen(inptr) + 1;
    235 		inptr += sizeof(int);
    236 		inptr += strlen(inptr) + 1;
    237 	}
    238 
    239 	/*
    240 	 * Allocate response array.
    241 	 * With an extra pair of INTs on the front to sanity
    242 	 * check the pointer when we are given it back to free
    243 	 */
    244 	*response = dm_malloc(sizeof(lvm_response_t) * num_responses);
    245 	if (!*response) {
    246 		errno = ENOMEM;
    247 		status = 0;
    248 		goto out;
    249 	}
    250 
    251 	rarray = *response;
    252 
    253 	/* Unpack the response into an lvm_response_t array */
    254 	inptr = head->args;
    255 	i = 0;
    256 	while (inptr[0]) {
    257 		strcpy(rarray[i].node, inptr);
    258 		inptr += strlen(inptr) + 1;
    259 
    260 		memcpy(&rarray[i].status, inptr, sizeof(int));
    261 		inptr += sizeof(int);
    262 
    263 		rarray[i].response = dm_malloc(strlen(inptr) + 1);
    264 		if (rarray[i].response == NULL) {
    265 			/* Free up everything else and return error */
    266 			int j;
    267 			for (j = 0; j < i; j++)
    268 				dm_free(rarray[i].response);
    269 			free(*response);
    270 			errno = ENOMEM;
    271 			status = -1;
    272 			goto out;
    273 		}
    274 
    275 		strcpy(rarray[i].response, inptr);
    276 		rarray[i].len = strlen(inptr);
    277 		inptr += strlen(inptr) + 1;
    278 		i++;
    279 	}
    280 	*num = num_responses;
    281 	*response = rarray;
    282 
    283       out:
    284 	if (retbuf)
    285 		dm_free(retbuf);
    286 
    287 	return status;
    288 }
    289 
    290 /* Free reply array */
    291 static int _cluster_free_request(lvm_response_t * response, int num)
    292 {
    293 	int i;
    294 
    295 	for (i = 0; i < num; i++) {
    296 		dm_free(response[i].response);
    297 	}
    298 
    299 	dm_free(response);
    300 
    301 	return 1;
    302 }
    303 
    304 static int _lock_for_cluster(struct cmd_context *cmd, unsigned char clvmd_cmd,
    305 			     uint32_t flags, const char *name)
    306 {
    307 	int status;
    308 	int i;
    309 	char *args;
    310 	const char *node = "";
    311 	int len;
    312 	int saved_errno = errno;
    313 	lvm_response_t *response = NULL;
    314 	int num_responses;
    315 
    316 	assert(name);
    317 
    318 	len = strlen(name) + 3;
    319 	args = alloca(len);
    320 	strcpy(args + 2, name);
    321 
    322 	args[0] = flags & 0x7F; /* Maskoff lock flags */
    323 	args[1] = flags & 0xC0; /* Bitmap flags */
    324 
    325 	if (mirror_in_sync())
    326 		args[1] |= LCK_MIRROR_NOSYNC_MODE;
    327 
    328 	if (dmeventd_monitor_mode())
    329 		args[1] |= LCK_DMEVENTD_MONITOR_MODE;
    330 
    331 	if (cmd->partial_activation)
    332 		args[1] |= LCK_PARTIAL_MODE;
    333 
    334 	/*
    335 	 * VG locks are just that: locks, and have no side effects
    336 	 * so we only need to do them on the local node because all
    337 	 * locks are cluster-wide.
    338 	 * Also, if the lock is exclusive it makes no sense to try to
    339 	 * acquire it on all nodes, so just do that on the local node too.
    340 	 * One exception, is that P_ locks /do/ get distributed across
    341 	 * the cluster because they might have side-effects.
    342 	 */
    343 	if (strncmp(name, "P_", 2) &&
    344 	    (clvmd_cmd == CLVMD_CMD_LOCK_VG ||
    345 	     (flags & LCK_TYPE_MASK) == LCK_EXCL ||
    346 	     (flags & LCK_LOCAL) ||
    347 	     !(flags & LCK_CLUSTER_VG)))
    348 		node = ".";
    349 
    350 	status = _cluster_request(clvmd_cmd, node, args, len,
    351 				  &response, &num_responses);
    352 
    353 	/* If any nodes were down then display them and return an error */
    354 	for (i = 0; i < num_responses; i++) {
    355 		if (response[i].status == EHOSTDOWN) {
    356 			log_error("clvmd not running on node %s",
    357 				  response[i].node);
    358 			status = 0;
    359 			errno = response[i].status;
    360 		} else if (response[i].status) {
    361 			log_error("Error locking on node %s: %s",
    362 				  response[i].node,
    363 				  response[i].response[0] ?
    364 				  	response[i].response :
    365 				  	strerror(response[i].status));
    366 			status = 0;
    367 			errno = response[i].status;
    368 		}
    369 	}
    370 
    371 	saved_errno = errno;
    372 	_cluster_free_request(response, num_responses);
    373 	errno = saved_errno;
    374 
    375 	return status;
    376 }
    377 
    378 /* API entry point for LVM */
    379 #ifdef CLUSTER_LOCKING_INTERNAL
    380 static int _lock_resource(struct cmd_context *cmd, const char *resource,
    381 			  uint32_t flags)
    382 #else
    383 int lock_resource(struct cmd_context *cmd, const char *resource, uint32_t flags)
    384 #endif
    385 {
    386 	char lockname[PATH_MAX];
    387 	int clvmd_cmd = 0;
    388 	const char *lock_scope;
    389 	const char *lock_type = "";
    390 
    391 	assert(strlen(resource) < sizeof(lockname));
    392 	assert(resource);
    393 
    394 	switch (flags & LCK_SCOPE_MASK) {
    395 	case LCK_VG:
    396 		if (flags == LCK_VG_BACKUP) {
    397 			log_very_verbose("Requesting backup of VG metadata for %s",
    398 					 resource);
    399 			return _lock_for_cluster(cmd, CLVMD_CMD_VG_BACKUP,
    400 						 LCK_CLUSTER_VG, resource);
    401 		}
    402 
    403 		/* If the VG name is empty then lock the unused PVs */
    404 		if (*resource == '#' || (flags & LCK_CACHE))
    405 			dm_snprintf(lockname, sizeof(lockname), "P_%s",
    406 				    resource);
    407 		else
    408 			dm_snprintf(lockname, sizeof(lockname), "V_%s",
    409 				    resource);
    410 
    411 		lock_scope = "VG";
    412 		clvmd_cmd = CLVMD_CMD_LOCK_VG;
    413 		flags &= LCK_TYPE_MASK;
    414 		break;
    415 
    416 	case LCK_LV:
    417 		clvmd_cmd = CLVMD_CMD_LOCK_LV;
    418 		strcpy(lockname, resource);
    419 		lock_scope = "LV";
    420 		flags &= 0xffdf;	/* Mask off HOLD flag */
    421 		break;
    422 
    423 	default:
    424 		log_error("Unrecognised lock scope: %d",
    425 			  flags & LCK_SCOPE_MASK);
    426 		return 0;
    427 	}
    428 
    429 	switch(flags & LCK_TYPE_MASK) {
    430 	case LCK_UNLOCK:
    431 		lock_type = "UN";
    432 		break;
    433 	case LCK_NULL:
    434 		lock_type = "NL";
    435 		break;
    436 	case LCK_READ:
    437 		lock_type = "CR";
    438 		break;
    439 	case LCK_PREAD:
    440 		lock_type = "PR";
    441 		break;
    442 	case LCK_WRITE:
    443 		lock_type = "PW";
    444 		break;
    445 	case LCK_EXCL:
    446 		lock_type = "EX";
    447 		break;
    448 	default:
    449 		log_error("Unrecognised lock type: %u",
    450 			  flags & LCK_TYPE_MASK);
    451 		return 0;
    452 	}
    453 
    454 	log_very_verbose("Locking %s %s %s %s%s%s%s (0x%x)", lock_scope, lockname,
    455 			 lock_type,
    456 			 flags & LCK_NONBLOCK ? "" : "B",
    457 			 flags & LCK_HOLD ? "H" : "",
    458 			 flags & LCK_LOCAL ? "L" : "",
    459 			 flags & LCK_CLUSTER_VG ? "C" : "",
    460 			 flags);
    461 
    462 	/* Send a message to the cluster manager */
    463 	return _lock_for_cluster(cmd, clvmd_cmd, flags, lockname);
    464 }
    465 
    466 static int decode_lock_type(const char *response)
    467 {
    468 	if (!response)
    469 		return LCK_NULL;
    470 	else if (strcmp(response, "EX"))
    471 		return LCK_EXCL;
    472 	else if (strcmp(response, "CR"))
    473 		return LCK_READ;
    474 	else if (strcmp(response, "PR"))
    475 		return LCK_PREAD;
    476 
    477 	stack;
    478 	return 0;
    479 }
    480 
    481 #ifdef CLUSTER_LOCKING_INTERNAL
    482 static int _query_resource(const char *resource, int *mode)
    483 #else
    484 int query_resource(const char *resource, int *mode)
    485 #endif
    486 {
    487 	int i, status, len, num_responses, saved_errno;
    488 	const char *node = "";
    489 	char *args;
    490 	lvm_response_t *response = NULL;
    491 
    492 	saved_errno = errno;
    493 	len = strlen(resource) + 3;
    494 	args = alloca(len);
    495 	strcpy(args + 2, resource);
    496 
    497 	args[0] = 0;
    498 	args[1] = LCK_CLUSTER_VG;
    499 
    500 	status = _cluster_request(CLVMD_CMD_LOCK_QUERY, node, args, len,
    501 				  &response, &num_responses);
    502 	*mode = LCK_NULL;
    503 	for (i = 0; i < num_responses; i++) {
    504 		if (response[i].status == EHOSTDOWN)
    505 			continue;
    506 
    507 		if (!response[i].response[0])
    508 			continue;
    509 
    510 		/*
    511 		 * All nodes should use CR, or exactly one node
    512 		 * should held EX. (PR is obsolete)
    513 		 * If two nodes node reports different locks,
    514 		 * something is broken - just return more important mode.
    515 		 */
    516 		if (decode_lock_type(response[i].response) > *mode)
    517 			*mode = decode_lock_type(response[i].response);
    518 
    519 		log_debug("Lock held for %s, node %s : %s", resource,
    520 			  response[i].node, response[i].response);
    521 	}
    522 
    523 	_cluster_free_request(response, num_responses);
    524 	errno = saved_errno;
    525 
    526 	return status;
    527 }
    528 
    529 #ifdef CLUSTER_LOCKING_INTERNAL
    530 static void _locking_end(void)
    531 #else
    532 void locking_end(void)
    533 #endif
    534 {
    535 	if (_clvmd_sock != -1 && close(_clvmd_sock))
    536 		stack;
    537 
    538 	_clvmd_sock = -1;
    539 }
    540 
    541 #ifdef CLUSTER_LOCKING_INTERNAL
    542 static void _reset_locking(void)
    543 #else
    544 void reset_locking(void)
    545 #endif
    546 {
    547 	if (close(_clvmd_sock))
    548 		stack;
    549 
    550 	_clvmd_sock = _open_local_sock();
    551 	if (_clvmd_sock == -1)
    552 		stack;
    553 }
    554 
    555 #ifdef CLUSTER_LOCKING_INTERNAL
    556 int init_cluster_locking(struct locking_type *locking, struct cmd_context *cmd)
    557 {
    558 	locking->lock_resource = _lock_resource;
    559 	locking->query_resource = _query_resource;
    560 	locking->fin_locking = _locking_end;
    561 	locking->reset_locking = _reset_locking;
    562 	locking->flags = LCK_PRE_MEMLOCK | LCK_CLUSTERED;
    563 
    564 	_clvmd_sock = _open_local_sock();
    565 	if (_clvmd_sock == -1)
    566 		return 0;
    567 
    568 	return 1;
    569 }
    570 #else
    571 int locking_init(int type, struct config_tree *cf, uint32_t *flags)
    572 {
    573 	_clvmd_sock = _open_local_sock();
    574 	if (_clvmd_sock == -1)
    575 		return 0;
    576 
    577 	/* Ask LVM to lock memory before calling us */
    578 	*flags |= LCK_PRE_MEMLOCK;
    579 	*flags |= LCK_CLUSTERED;
    580 
    581 	return 1;
    582 }
    583 #endif
    584