Home | History | Annotate | Line # | Download | only in netinet
ip_sync.c revision 1.5
      1 /*	$NetBSD: ip_sync.c,v 1.5 2013/09/14 12:18:06 martin Exp $	*/
      2 
      3 /*
      4  * Copyright (C) 2012 by Darren Reed.
      5  *
      6  * See the IPFILTER.LICENCE file for details on licencing.
      7  */
      8 #if defined(KERNEL) || defined(_KERNEL)
      9 # undef KERNEL
     10 # undef _KERNEL
     11 # define        KERNEL	1
     12 # define        _KERNEL	1
     13 #endif
     14 #include <sys/errno.h>
     15 #include <sys/types.h>
     16 #include <sys/param.h>
     17 #include <sys/file.h>
     18 #if !defined(_KERNEL) && !defined(__KERNEL__)
     19 # include <stdio.h>
     20 # include <stdlib.h>
     21 # include <string.h>
     22 # define _KERNEL
     23 # define KERNEL
     24 # ifdef __OpenBSD__
     25 struct file;
     26 # endif
     27 # include <sys/uio.h>
     28 # undef _KERNEL
     29 # undef KERNEL
     30 #else
     31 # include <sys/systm.h>
     32 # if !defined(__SVR4) && !defined(__svr4__)
     33 #  include <sys/mbuf.h>
     34 # endif
     35 # include <sys/select.h>
     36 # if __FreeBSD_version >= 500000
     37 #  include <sys/selinfo.h>
     38 # endif
     39 #endif
     40 #if defined(__NetBSD__) && (__NetBSD_Version__ >= 104000000)
     41 # include <sys/proc.h>
     42 #endif
     43 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
     44 # include <sys/filio.h>
     45 # include <sys/fcntl.h>
     46 #else
     47 # include <sys/ioctl.h>
     48 #endif
     49 #include <sys/time.h>
     50 #if !defined(linux)
     51 # include <sys/protosw.h>
     52 #endif
     53 #include <sys/socket.h>
     54 #if defined(__SVR4) || defined(__svr4__)
     55 # include <sys/filio.h>
     56 # include <sys/byteorder.h>
     57 # ifdef _KERNEL
     58 #  include <sys/dditypes.h>
     59 # endif
     60 # include <sys/stream.h>
     61 # include <sys/kmem.h>
     62 #endif
     63 
     64 #include <net/if.h>
     65 #ifdef sun
     66 # include <net/af.h>
     67 #endif
     68 #include <netinet/in.h>
     69 #include <netinet/in_systm.h>
     70 #include <netinet/ip.h>
     71 #include <netinet/tcp.h>
     72 #if !defined(linux)
     73 # include <netinet/ip_var.h>
     74 #endif
     75 #if !defined(__hpux) && !defined(linux)
     76 # include <netinet/tcp_fsm.h>
     77 #endif
     78 #include <netinet/udp.h>
     79 #include <netinet/ip_icmp.h>
     80 #include "netinet/ip_compat.h"
     81 #include <netinet/tcpip.h>
     82 #include "netinet/ip_fil.h"
     83 #include "netinet/ip_nat.h"
     84 #include "netinet/ip_frag.h"
     85 #include "netinet/ip_state.h"
     86 #include "netinet/ip_proxy.h"
     87 #include "netinet/ip_sync.h"
     88 #ifdef  USE_INET6
     89 #include <netinet/icmp6.h>
     90 #endif
     91 #if (__FreeBSD_version >= 300000)
     92 # include <sys/malloc.h>
     93 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
     94 #  include <sys/libkern.h>
     95 #  include <sys/systm.h>
     96 # endif
     97 #endif
     98 /* END OF INCLUDES */
     99 
    100 #if !defined(lint)
    101 #if defined(__NetBSD__)
    102 #include <sys/cdefs.h>
    103 __KERNEL_RCSID(0, "$NetBSD: ip_sync.c,v 1.5 2013/09/14 12:18:06 martin Exp $");
    104 #else
    105 static const char rcsid[] = "@(#)Id: ip_sync.c,v 1.1.1.2 2012/07/22 13:45:38 darrenr Exp";
    106 #endif
    107 #endif
    108 
    109 #define	SYNC_STATETABSZ	256
    110 #define	SYNC_NATTABSZ	256
    111 
    112 typedef struct ipf_sync_softc_s {
    113 	ipfmutex_t	ipf_syncadd;
    114 	ipfmutex_t	ipsl_mutex;
    115 	ipfrwlock_t	ipf_syncstate;
    116 	ipfrwlock_t	ipf_syncnat;
    117 #if SOLARIS && defined(_KERNEL)
    118 	kcondvar_t	ipslwait;
    119 #endif
    120 #if defined(linux) && defined(_KERNEL)
    121 	wait_queue_head_t	sl_tail_linux;
    122 #endif
    123 	synclist_t	**syncstatetab;
    124 	synclist_t	**syncnattab;
    125 	synclogent_t	*synclog;
    126 	syncupdent_t	*syncupd;
    127 	u_int		ipf_sync_num;
    128 	u_int		ipf_sync_wrap;
    129 	u_int		sl_idx;		/* next available sync log entry */
    130 	u_int		su_idx;		/* next available sync update entry */
    131 	u_int		sl_tail;	/* next sync log entry to read */
    132 	u_int		su_tail;	/* next sync update entry to read */
    133 	int		ipf_sync_log_sz;
    134 	int		ipf_sync_nat_tab_sz;
    135 	int		ipf_sync_state_tab_sz;
    136 	int		ipf_sync_debug;
    137 	int		ipf_sync_events;
    138 	u_32_t		ipf_sync_lastwakeup;
    139 	int		ipf_sync_wake_interval;
    140 	int		ipf_sync_event_high_wm;
    141 	int		ipf_sync_queue_high_wm;
    142 	int		ipf_sync_inited;
    143 } ipf_sync_softc_t;
    144 
    145 static int ipf_sync_flush_table(ipf_sync_softc_t *, int, synclist_t **);
    146 static void ipf_sync_wakeup(ipf_main_softc_t *);
    147 static void ipf_sync_del(ipf_sync_softc_t *, synclist_t *);
    148 static void ipf_sync_poll_wakeup(ipf_main_softc_t *);
    149 static int ipf_sync_nat(ipf_main_softc_t *, synchdr_t *, void *);
    150 static int ipf_sync_state(ipf_main_softc_t *, synchdr_t *, void *);
    151 
    152 # if !defined(sparc) && !defined(__hppa)
    153 void ipf_sync_tcporder(int, struct tcpdata *);
    154 void ipf_sync_natorder(int, struct nat *);
    155 void ipf_sync_storder(int, struct ipstate *);
    156 # endif
    157 
    158 
    159 void *
    160 ipf_sync_soft_create(ipf_main_softc_t *softc)
    161 {
    162 	ipf_sync_softc_t *softs;
    163 
    164 	KMALLOC(softs, ipf_sync_softc_t *);
    165 	if (softs == NULL) {
    166 		IPFERROR(110024);
    167 		return NULL;
    168 	}
    169 
    170 	bzero((char *)softs, sizeof(*softs));
    171 
    172 	softs->ipf_sync_log_sz = SYNCLOG_SZ;
    173 	softs->ipf_sync_nat_tab_sz = SYNC_STATETABSZ;
    174 	softs->ipf_sync_state_tab_sz = SYNC_STATETABSZ;
    175 	softs->ipf_sync_event_high_wm = SYNCLOG_SZ * 100 / 90;	/* 90% */
    176 	softs->ipf_sync_queue_high_wm = SYNCLOG_SZ * 100 / 90;	/* 90% */
    177 
    178 	return softs;
    179 }
    180 
    181 
    182 /* ------------------------------------------------------------------------ */
    183 /* Function:    ipf_sync_init                                               */
    184 /* Returns:     int - 0 == success, -1 == failure                           */
    185 /* Parameters:  Nil                                                         */
    186 /*                                                                          */
    187 /* Initialise all of the locks required for the sync code and initialise    */
    188 /* any data structures, as required.                                        */
    189 /* ------------------------------------------------------------------------ */
    190 int
    191 ipf_sync_soft_init(ipf_main_softc_t *softc, void *arg)
    192 {
    193 	ipf_sync_softc_t *softs = arg;
    194 
    195 	KMALLOCS(softs->synclog, synclogent_t *,
    196 		 softs->ipf_sync_log_sz * sizeof(*softs->synclog));
    197 	if (softs->synclog == NULL)
    198 		return -1;
    199 	bzero((char *)softs->synclog,
    200 	      softs->ipf_sync_log_sz * sizeof(*softs->synclog));
    201 
    202 	KMALLOCS(softs->syncupd, syncupdent_t *,
    203 		 softs->ipf_sync_log_sz * sizeof(*softs->syncupd));
    204 	if (softs->syncupd == NULL)
    205 		return -2;
    206 	bzero((char *)softs->syncupd,
    207 	      softs->ipf_sync_log_sz * sizeof(*softs->syncupd));
    208 
    209 	KMALLOCS(softs->syncstatetab, synclist_t **,
    210 		 softs->ipf_sync_state_tab_sz * sizeof(*softs->syncstatetab));
    211 	if (softs->syncstatetab == NULL)
    212 		return -3;
    213 	bzero((char *)softs->syncstatetab,
    214 	      softs->ipf_sync_state_tab_sz * sizeof(*softs->syncstatetab));
    215 
    216 	KMALLOCS(softs->syncnattab, synclist_t **,
    217 		 softs->ipf_sync_nat_tab_sz * sizeof(*softs->syncnattab));
    218 	if (softs->syncnattab == NULL)
    219 		return -3;
    220 	bzero((char *)softs->syncnattab,
    221 	      softs->ipf_sync_nat_tab_sz * sizeof(*softs->syncnattab));
    222 
    223 	softs->ipf_sync_num = 1;
    224 	softs->ipf_sync_wrap = 0;
    225 	softs->sl_idx = 0;
    226 	softs->su_idx = 0;
    227 	softs->sl_tail = 0;
    228 	softs->su_tail = 0;
    229 	softs->ipf_sync_events = 0;
    230 	softs->ipf_sync_lastwakeup = 0;
    231 
    232 
    233 # if SOLARIS && defined(_KERNEL)
    234 	cv_init(&softs->ipslwait, "ipsl condvar", CV_DRIVER, NULL);
    235 # endif
    236 	RWLOCK_INIT(&softs->ipf_syncstate, "add things to state sync table");
    237 	RWLOCK_INIT(&softs->ipf_syncnat, "add things to nat sync table");
    238 	MUTEX_INIT(&softs->ipf_syncadd, "add things to sync table");
    239 	MUTEX_INIT(&softs->ipsl_mutex, "read ring lock");
    240 
    241 	softs->ipf_sync_inited = 1;
    242 
    243 	return 0;
    244 }
    245 
    246 
    247 /* ------------------------------------------------------------------------ */
    248 /* Function:    ipf_sync_unload                                             */
    249 /* Returns:     int - 0 == success, -1 == failure                           */
    250 /* Parameters:  Nil                                                         */
    251 /*                                                                          */
    252 /* Destroy the locks created when initialising and free any memory in use   */
    253 /* with the synchronisation tables.                                         */
    254 /* ------------------------------------------------------------------------ */
    255 int
    256 ipf_sync_soft_fini(ipf_main_softc_t *softc, void *arg)
    257 {
    258 	ipf_sync_softc_t *softs = arg;
    259 
    260 	if (softs->syncnattab != NULL) {
    261 		ipf_sync_flush_table(softs, softs->ipf_sync_nat_tab_sz,
    262 				     softs->syncnattab);
    263 		KFREES(softs->syncnattab,
    264 		       softs->ipf_sync_nat_tab_sz * sizeof(*softs->syncnattab));
    265 		softs->syncnattab = NULL;
    266 	}
    267 
    268 	if (softs->syncstatetab != NULL) {
    269 		ipf_sync_flush_table(softs, softs->ipf_sync_state_tab_sz,
    270 				     softs->syncstatetab);
    271 		KFREES(softs->syncstatetab,
    272 		       softs->ipf_sync_state_tab_sz *
    273 		       sizeof(*softs->syncstatetab));
    274 		softs->syncstatetab = NULL;
    275 	}
    276 
    277 	if (softs->syncupd != NULL) {
    278 		KFREES(softs->syncupd,
    279 		       softs->ipf_sync_log_sz * sizeof(*softs->syncupd));
    280 		softs->syncupd = NULL;
    281 	}
    282 
    283 	if (softs->synclog != NULL) {
    284 		KFREES(softs->synclog,
    285 		       softs->ipf_sync_log_sz * sizeof(*softs->synclog));
    286 		softs->synclog = NULL;
    287 	}
    288 
    289 	if (softs->ipf_sync_inited == 1) {
    290 		MUTEX_DESTROY(&softs->ipsl_mutex);
    291 		MUTEX_DESTROY(&softs->ipf_syncadd);
    292 		RW_DESTROY(&softs->ipf_syncnat);
    293 		RW_DESTROY(&softs->ipf_syncstate);
    294 		softs->ipf_sync_inited = 0;
    295 	}
    296 
    297 	return 0;
    298 }
    299 
    300 void
    301 ipf_sync_soft_destroy(ipf_main_softc_t *softc, void *arg)
    302 {
    303 	ipf_sync_softc_t *softs = arg;
    304 
    305 	KFREE(softs);
    306 }
    307 
    308 
    309 # if !defined(sparc) && !defined(__hppa)
    310 /* ------------------------------------------------------------------------ */
    311 /* Function:    ipf_sync_tcporder                                           */
    312 /* Returns:     Nil                                                         */
    313 /* Parameters:  way(I) - direction of byte order conversion.                */
    314 /*              td(IO) - pointer to data to be converted.                   */
    315 /*                                                                          */
    316 /* Do byte swapping on values in the TCP state information structure that   */
    317 /* need to be used at both ends by the host in their native byte order.     */
    318 /* ------------------------------------------------------------------------ */
    319 void
    320 ipf_sync_tcporder(int way, tcpdata_t *td)
    321 {
    322 	if (way) {
    323 		td->td_maxwin = htons(td->td_maxwin);
    324 		td->td_end = htonl(td->td_end);
    325 		td->td_maxend = htonl(td->td_maxend);
    326 	} else {
    327 		td->td_maxwin = ntohs(td->td_maxwin);
    328 		td->td_end = ntohl(td->td_end);
    329 		td->td_maxend = ntohl(td->td_maxend);
    330 	}
    331 }
    332 
    333 
    334 /* ------------------------------------------------------------------------ */
    335 /* Function:    ipf_sync_natorder                                           */
    336 /* Returns:     Nil                                                         */
    337 /* Parameters:  way(I)  - direction of byte order conversion.               */
    338 /*              nat(IO) - pointer to data to be converted.                  */
    339 /*                                                                          */
    340 /* Do byte swapping on values in the NAT data structure that need to be     */
    341 /* used at both ends by the host in their native byte order.                */
    342 /* ------------------------------------------------------------------------ */
    343 void
    344 ipf_sync_natorder(int way, nat_t *n)
    345 {
    346 	if (way) {
    347 		n->nat_age = htonl(n->nat_age);
    348 		n->nat_flags = htonl(n->nat_flags);
    349 		n->nat_ipsumd = htonl(n->nat_ipsumd);
    350 		n->nat_use = htonl(n->nat_use);
    351 		n->nat_dir = htonl(n->nat_dir);
    352 	} else {
    353 		n->nat_age = ntohl(n->nat_age);
    354 		n->nat_flags = ntohl(n->nat_flags);
    355 		n->nat_ipsumd = ntohl(n->nat_ipsumd);
    356 		n->nat_use = ntohl(n->nat_use);
    357 		n->nat_dir = ntohl(n->nat_dir);
    358 	}
    359 }
    360 
    361 
    362 /* ------------------------------------------------------------------------ */
    363 /* Function:    ipf_sync_storder                                            */
    364 /* Returns:     Nil                                                         */
    365 /* Parameters:  way(I)  - direction of byte order conversion.               */
    366 /*              ips(IO) - pointer to data to be converted.                  */
    367 /*                                                                          */
    368 /* Do byte swapping on values in the IP state data structure that need to   */
    369 /* be used at both ends by the host in their native byte order.             */
    370 /* ------------------------------------------------------------------------ */
    371 void
    372 ipf_sync_storder(int way, ipstate_t *ips)
    373 {
    374 	ipf_sync_tcporder(way, &ips->is_tcp.ts_data[0]);
    375 	ipf_sync_tcporder(way, &ips->is_tcp.ts_data[1]);
    376 
    377 	if (way) {
    378 		ips->is_hv = htonl(ips->is_hv);
    379 		ips->is_die = htonl(ips->is_die);
    380 		ips->is_pass = htonl(ips->is_pass);
    381 		ips->is_flags = htonl(ips->is_flags);
    382 		ips->is_opt[0] = htonl(ips->is_opt[0]);
    383 		ips->is_opt[1] = htonl(ips->is_opt[1]);
    384 		ips->is_optmsk[0] = htonl(ips->is_optmsk[0]);
    385 		ips->is_optmsk[1] = htonl(ips->is_optmsk[1]);
    386 		ips->is_sec = htons(ips->is_sec);
    387 		ips->is_secmsk = htons(ips->is_secmsk);
    388 		ips->is_auth = htons(ips->is_auth);
    389 		ips->is_authmsk = htons(ips->is_authmsk);
    390 		ips->is_s0[0] = htonl(ips->is_s0[0]);
    391 		ips->is_s0[1] = htonl(ips->is_s0[1]);
    392 		ips->is_smsk[0] = htons(ips->is_smsk[0]);
    393 		ips->is_smsk[1] = htons(ips->is_smsk[1]);
    394 	} else {
    395 		ips->is_hv = ntohl(ips->is_hv);
    396 		ips->is_die = ntohl(ips->is_die);
    397 		ips->is_pass = ntohl(ips->is_pass);
    398 		ips->is_flags = ntohl(ips->is_flags);
    399 		ips->is_opt[0] = ntohl(ips->is_opt[0]);
    400 		ips->is_opt[1] = ntohl(ips->is_opt[1]);
    401 		ips->is_optmsk[0] = ntohl(ips->is_optmsk[0]);
    402 		ips->is_optmsk[1] = ntohl(ips->is_optmsk[1]);
    403 		ips->is_sec = ntohs(ips->is_sec);
    404 		ips->is_secmsk = ntohs(ips->is_secmsk);
    405 		ips->is_auth = ntohs(ips->is_auth);
    406 		ips->is_authmsk = ntohs(ips->is_authmsk);
    407 		ips->is_s0[0] = ntohl(ips->is_s0[0]);
    408 		ips->is_s0[1] = ntohl(ips->is_s0[1]);
    409 		ips->is_smsk[0] = ntohl(ips->is_smsk[0]);
    410 		ips->is_smsk[1] = ntohl(ips->is_smsk[1]);
    411 	}
    412 }
    413 # else /* !defined(sparc) && !defined(__hppa) */
    414 #  define	ipf_sync_tcporder(x,y)
    415 #  define	ipf_sync_natorder(x,y)
    416 #  define	ipf_sync_storder(x,y)
    417 # endif /* !defined(sparc) && !defined(__hppa) */
    418 
    419 
    420 /* ------------------------------------------------------------------------ */
    421 /* Function:    ipf_sync_write                                              */
    422 /* Returns:     int    - 0 == success, else error value.                    */
    423 /* Parameters:  uio(I) - pointer to information about data to write         */
    424 /*                                                                          */
    425 /* Moves data from user space into the kernel and uses it for updating data */
    426 /* structures in the state/NAT tables.                                      */
    427 /* ------------------------------------------------------------------------ */
    428 int
    429 ipf_sync_write(ipf_main_softc_t *softc, struct uio *uio)
    430 {
    431 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
    432 	synchdr_t sh;
    433 
    434 	/*
    435 	 * THIS MUST BE SUFFICIENT LARGE TO STORE
    436 	 * ANY POSSIBLE DATA TYPE
    437 	 */
    438 	char data[2048];
    439 
    440 	int err = 0;
    441 
    442 #  if BSD_GE_YEAR(199306) || defined(__FreeBSD__) || defined(__osf__)
    443 	uio->uio_rw = UIO_WRITE;
    444 #  endif
    445 
    446 	/* Try to get bytes */
    447 	while (uio->uio_resid > 0) {
    448 
    449 		if (uio->uio_resid >= sizeof(sh)) {
    450 
    451 			err = UIOMOVE((void *)&sh, sizeof(sh), UIO_WRITE, uio);
    452 
    453 			if (err) {
    454 				if (softs->ipf_sync_debug > 2)
    455 					printf("uiomove(header) failed: %d\n",
    456 						err);
    457 				return err;
    458 			}
    459 
    460 			/* convert to host order */
    461 			sh.sm_magic = ntohl(sh.sm_magic);
    462 			sh.sm_len = ntohl(sh.sm_len);
    463 			sh.sm_num = ntohl(sh.sm_num);
    464 
    465 			if (softs->ipf_sync_debug > 8)
    466 				printf("[%d] Read v:%d p:%d cmd:%d table:%d rev:%d len:%d magic:%x\n",
    467 					sh.sm_num, sh.sm_v, sh.sm_p, sh.sm_cmd,
    468 					sh.sm_table, sh.sm_rev, sh.sm_len,
    469 					sh.sm_magic);
    470 
    471 			if (sh.sm_magic != SYNHDRMAGIC) {
    472 				if (softs->ipf_sync_debug > 2)
    473 					printf("uiomove(header) invalid %s\n",
    474 						"magic");
    475 				IPFERROR(110001);
    476 				return EINVAL;
    477 			}
    478 
    479 			if (sh.sm_v != 4 && sh.sm_v != 6) {
    480 				if (softs->ipf_sync_debug > 2)
    481 					printf("uiomove(header) invalid %s\n",
    482 						"protocol");
    483 				IPFERROR(110002);
    484 				return EINVAL;
    485 			}
    486 
    487 			if (sh.sm_cmd > SMC_MAXCMD) {
    488 				if (softs->ipf_sync_debug > 2)
    489 					printf("uiomove(header) invalid %s\n",
    490 						"command");
    491 				IPFERROR(110003);
    492 				return EINVAL;
    493 			}
    494 
    495 
    496 			if (sh.sm_table > SMC_MAXTBL) {
    497 				if (softs->ipf_sync_debug > 2)
    498 					printf("uiomove(header) invalid %s\n",
    499 						"table");
    500 				IPFERROR(110004);
    501 				return EINVAL;
    502 			}
    503 
    504 		} else {
    505 			/* unsufficient data, wait until next call */
    506 			if (softs->ipf_sync_debug > 2)
    507 				printf("uiomove(header) insufficient data");
    508 			IPFERROR(110005);
    509 			return EAGAIN;
    510 	 	}
    511 
    512 
    513 		/*
    514 		 * We have a header, so try to read the amount of data
    515 		 * needed for the request
    516 		 */
    517 
    518 		/* not supported */
    519 		if (sh.sm_len == 0) {
    520 			if (softs->ipf_sync_debug > 2)
    521 				printf("uiomove(data zero length %s\n",
    522 					"not supported");
    523 			IPFERROR(110006);
    524 			return EINVAL;
    525 		}
    526 
    527 		if (uio->uio_resid >= sh.sm_len) {
    528 
    529 			err = UIOMOVE((void *)data, sh.sm_len, UIO_WRITE, uio);
    530 
    531 			if (err) {
    532 				if (softs->ipf_sync_debug > 2)
    533 					printf("uiomove(data) failed: %d\n",
    534 						err);
    535 				return err;
    536 			}
    537 
    538 			if (softs->ipf_sync_debug > 7)
    539 				printf("uiomove(data) %d bytes read\n",
    540 					sh.sm_len);
    541 
    542 			if (sh.sm_table == SMC_STATE)
    543 				err = ipf_sync_state(softc, &sh, data);
    544 			else if (sh.sm_table == SMC_NAT)
    545 				err = ipf_sync_nat(softc, &sh, data);
    546 			if (softs->ipf_sync_debug > 7)
    547 				printf("[%d] Finished with error %d\n",
    548 					sh.sm_num, err);
    549 
    550 		} else {
    551 			/* insufficient data, wait until next call */
    552 			if (softs->ipf_sync_debug > 2)
    553 				printf("uiomove(data) %s %d bytes, got %d\n",
    554 					"insufficient data, need",
    555 					sh.sm_len, (int)uio->uio_resid);
    556 			IPFERROR(110007);
    557 			return EAGAIN;
    558 		}
    559 	}
    560 
    561 	/* no more data */
    562 	return 0;
    563 }
    564 
    565 
    566 /* ------------------------------------------------------------------------ */
    567 /* Function:    ipf_sync_read                                               */
    568 /* Returns:     int    - 0 == success, else error value.                    */
    569 /* Parameters:  uio(O) - pointer to information about where to store data   */
    570 /*                                                                          */
    571 /* This function is called when a user program wants to read some data      */
    572 /* for pending state/NAT updates.  If no data is available, the caller is   */
    573 /* put to sleep, pending a wakeup from the "lower half" of this code.       */
    574 /* ------------------------------------------------------------------------ */
    575 int
    576 ipf_sync_read(ipf_main_softc_t *softc, struct uio *uio)
    577 {
    578 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
    579 	syncupdent_t *su;
    580 	synclogent_t *sl;
    581 	int err = 0;
    582 
    583 	if ((uio->uio_resid & 3) || (uio->uio_resid < 8)) {
    584 		IPFERROR(110008);
    585 		return EINVAL;
    586 	}
    587 
    588 #  if BSD_GE_YEAR(199306) || defined(__FreeBSD__) || defined(__osf__)
    589 	uio->uio_rw = UIO_READ;
    590 #  endif
    591 
    592 	MUTEX_ENTER(&softs->ipsl_mutex);
    593 	while ((softs->sl_tail == softs->sl_idx) &&
    594 	       (softs->su_tail == softs->su_idx)) {
    595 #  if defined(_KERNEL)
    596 #   if SOLARIS
    597 		if (!cv_wait_sig(&softs->ipslwait, &softs->ipsl_mutex.ipf_lk)) {
    598 			MUTEX_EXIT(&softs->ipsl_mutex);
    599 			IPFERROR(110009);
    600 			return EINTR;
    601 		}
    602 #   else
    603 #    ifdef __hpux
    604 		{
    605 		lock_t *l;
    606 
    607 		l = get_sleep_lock(&softs->sl_tail);
    608 		err = sleep(&softs->sl_tail, PZERO+1);
    609 		if (err) {
    610 			MUTEX_EXIT(&softs->ipsl_mutex);
    611 			IPFERROR(110010);
    612 			return EINTR;
    613 		}
    614 		spinunlock(l);
    615 		}
    616 #    else /* __hpux */
    617 #     ifdef __osf__
    618 		err = mpsleep(&softs->sl_tail, PSUSP|PCATCH,  "ipl sleep", 0,
    619 			      &softs->ipsl_mutex, MS_LOCK_SIMPLE);
    620 		if (err) {
    621 			IPFERROR(110011);
    622 			return EINTR;
    623 		}
    624 #     else
    625 		MUTEX_EXIT(&softs->ipsl_mutex);
    626 		err = SLEEP(&softs->sl_tail, "ipl sleep");
    627 		if (err) {
    628 			IPFERROR(110012);
    629 			return EINTR;
    630 		}
    631 		MUTEX_ENTER(&softs->ipsl_mutex);
    632 #     endif /* __osf__ */
    633 #    endif /* __hpux */
    634 #   endif /* SOLARIS */
    635 #  endif /* _KERNEL */
    636 	}
    637 
    638 	while ((softs->sl_tail < softs->sl_idx) &&
    639 	       (uio->uio_resid > sizeof(*sl))) {
    640 		sl = softs->synclog + softs->sl_tail++;
    641 		MUTEX_EXIT(&softs->ipsl_mutex);
    642 		err = UIOMOVE(sl, sizeof(*sl), UIO_READ, uio);
    643 		if (err != 0)
    644 			goto goterror;
    645 		MUTEX_ENTER(&softs->ipsl_mutex);
    646 	}
    647 
    648 	while ((softs->su_tail < softs->su_idx) &&
    649 	       (uio->uio_resid > sizeof(*su))) {
    650 		su = softs->syncupd + softs->su_tail;
    651 		softs->su_tail++;
    652 		MUTEX_EXIT(&softs->ipsl_mutex);
    653 		err = UIOMOVE(su, sizeof(*su), UIO_READ, uio);
    654 		if (err != 0)
    655 			goto goterror;
    656 		MUTEX_ENTER(&softs->ipsl_mutex);
    657 		if (su->sup_hdr.sm_sl != NULL)
    658 			su->sup_hdr.sm_sl->sl_idx = -1;
    659 	}
    660 	if (softs->sl_tail == softs->sl_idx)
    661 		softs->sl_tail = softs->sl_idx = 0;
    662 	if (softs->su_tail == softs->su_idx)
    663 		softs->su_tail = softs->su_idx = 0;
    664 	MUTEX_EXIT(&softs->ipsl_mutex);
    665 goterror:
    666 	return err;
    667 }
    668 
    669 
    670 /* ------------------------------------------------------------------------ */
    671 /* Function:    ipf_sync_state                                              */
    672 /* Returns:     int    - 0 == success, else error value.                    */
    673 /* Parameters:  sp(I)  - pointer to sync packet data header                 */
    674 /*              uio(I) - pointer to user data for further information       */
    675 /*                                                                          */
    676 /* Updates the state table according to information passed in the sync      */
    677 /* header.  As required, more data is fetched from the uio structure but    */
    678 /* varies depending on the contents of the sync header.  This function can  */
    679 /* create a new state entry or update one.  Deletion is left to the state   */
    680 /* structures being timed out correctly.                                    */
    681 /* ------------------------------------------------------------------------ */
    682 static int
    683 ipf_sync_state(ipf_main_softc_t *softc, synchdr_t *sp, void *data)
    684 {
    685 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
    686 	synctcp_update_t su;
    687 	ipstate_t *is, sn;
    688 	synclist_t *sl;
    689 	frentry_t *fr;
    690 	u_int hv;
    691 	int err = 0;
    692 
    693 	hv = sp->sm_num & (softs->ipf_sync_state_tab_sz - 1);
    694 
    695 	switch (sp->sm_cmd)
    696 	{
    697 	case SMC_CREATE :
    698 
    699 		bcopy(data, &sn, sizeof(sn));
    700 		KMALLOC(is, ipstate_t *);
    701 		if (is == NULL) {
    702 			IPFERROR(110013);
    703 			err = ENOMEM;
    704 			break;
    705 		}
    706 
    707 		KMALLOC(sl, synclist_t *);
    708 		if (sl == NULL) {
    709 			IPFERROR(110014);
    710 			err = ENOMEM;
    711 			KFREE(is);
    712 			break;
    713 		}
    714 
    715 		bzero((char *)is, offsetof(ipstate_t, is_die));
    716 		bcopy((char *)&sn.is_die, (char *)&is->is_die,
    717 		      sizeof(*is) - offsetof(ipstate_t, is_die));
    718 		ipf_sync_storder(0, is);
    719 
    720 		/*
    721 		 * We need to find the same rule on the slave as was used on
    722 		 * the master to create this state entry.
    723 		 */
    724 		READ_ENTER(&softc->ipf_mutex);
    725 		fr = ipf_getrulen(softc, IPL_LOGIPF, sn.is_group, sn.is_rulen);
    726 		if (fr != NULL) {
    727 			MUTEX_ENTER(&fr->fr_lock);
    728 			fr->fr_ref++;
    729 			fr->fr_statecnt++;
    730 			MUTEX_EXIT(&fr->fr_lock);
    731 		}
    732 		RWLOCK_EXIT(&softc->ipf_mutex);
    733 
    734 		if (softs->ipf_sync_debug > 4)
    735 			printf("[%d] Filter rules = %p\n", sp->sm_num, fr);
    736 
    737 		is->is_rule = fr;
    738 		is->is_sync = sl;
    739 
    740 		sl->sl_idx = -1;
    741 		sl->sl_ips = is;
    742 		bcopy(sp, &sl->sl_hdr, sizeof(struct synchdr));
    743 
    744 		WRITE_ENTER(&softs->ipf_syncstate);
    745 		WRITE_ENTER(&softc->ipf_state);
    746 
    747 		sl->sl_pnext = softs->syncstatetab + hv;
    748 		sl->sl_next = softs->syncstatetab[hv];
    749 		if (softs->syncstatetab[hv] != NULL)
    750 			softs->syncstatetab[hv]->sl_pnext = &sl->sl_next;
    751 		softs->syncstatetab[hv] = sl;
    752 		MUTEX_DOWNGRADE(&softs->ipf_syncstate);
    753 		ipf_state_insert(softc, is, sp->sm_rev);
    754 		/*
    755 		 * Do not initialise the interface pointers for the state
    756 		 * entry as the full complement of interface names may not
    757 		 * be present.
    758 		 *
    759 		 * Put this state entry on its timeout queue.
    760 		 */
    761 		/*fr_setstatequeue(is, sp->sm_rev);*/
    762 		break;
    763 
    764 	case SMC_UPDATE :
    765 		bcopy(data, &su, sizeof(su));
    766 
    767 		if (softs->ipf_sync_debug > 4)
    768 			printf("[%d] Update age %lu state %d/%d \n",
    769 				sp->sm_num, su.stu_age, su.stu_state[0],
    770 				su.stu_state[1]);
    771 
    772 		READ_ENTER(&softs->ipf_syncstate);
    773 		for (sl = softs->syncstatetab[hv]; (sl != NULL);
    774 		     sl = sl->sl_next)
    775 			if (sl->sl_hdr.sm_num == sp->sm_num)
    776 				break;
    777 		if (sl == NULL) {
    778 			if (softs->ipf_sync_debug > 1)
    779 				printf("[%d] State not found - can't update\n",
    780 					sp->sm_num);
    781 			RWLOCK_EXIT(&softs->ipf_syncstate);
    782 			IPFERROR(110015);
    783 			err = ENOENT;
    784 			break;
    785 		}
    786 
    787 		READ_ENTER(&softc->ipf_state);
    788 
    789 		if (softs->ipf_sync_debug > 6)
    790 			printf("[%d] Data from state v:%d p:%d cmd:%d table:%d rev:%d\n",
    791 				sp->sm_num, sl->sl_hdr.sm_v, sl->sl_hdr.sm_p,
    792 				sl->sl_hdr.sm_cmd, sl->sl_hdr.sm_table,
    793 				sl->sl_hdr.sm_rev);
    794 
    795 		is = sl->sl_ips;
    796 
    797 		MUTEX_ENTER(&is->is_lock);
    798 		switch (sp->sm_p)
    799 		{
    800 		case IPPROTO_TCP :
    801 			/* XXX FV --- shouldn't we do ntohl/htonl???? XXX */
    802 			is->is_send = su.stu_data[0].td_end;
    803 			is->is_maxsend = su.stu_data[0].td_maxend;
    804 			is->is_maxswin = su.stu_data[0].td_maxwin;
    805 			is->is_state[0] = su.stu_state[0];
    806 			is->is_dend = su.stu_data[1].td_end;
    807 			is->is_maxdend = su.stu_data[1].td_maxend;
    808 			is->is_maxdwin = su.stu_data[1].td_maxwin;
    809 			is->is_state[1] = su.stu_state[1];
    810 			break;
    811 		default :
    812 			break;
    813 		}
    814 
    815 		if (softs->ipf_sync_debug > 6)
    816 			printf("[%d] Setting timers for state\n", sp->sm_num);
    817 
    818 		ipf_state_setqueue(softc, is, sp->sm_rev);
    819 
    820 		MUTEX_EXIT(&is->is_lock);
    821 		break;
    822 
    823 	default :
    824 		IPFERROR(110016);
    825 		err = EINVAL;
    826 		break;
    827 	}
    828 
    829 	if (err == 0) {
    830 		RWLOCK_EXIT(&softc->ipf_state);
    831 		RWLOCK_EXIT(&softs->ipf_syncstate);
    832 	}
    833 
    834 	if (softs->ipf_sync_debug > 6)
    835 		printf("[%d] Update completed with error %d\n",
    836 			sp->sm_num, err);
    837 
    838 	return err;
    839 }
    840 
    841 
    842 /* ------------------------------------------------------------------------ */
    843 /* Function:    ipf_sync_del                                                */
    844 /* Returns:     Nil                                                         */
    845 /* Parameters:  sl(I) - pointer to synclist object to delete                */
    846 /*                                                                          */
    847 /* Deletes an object from the synclist.                                     */
    848 /* ------------------------------------------------------------------------ */
    849 static void
    850 ipf_sync_del(ipf_sync_softc_t *softs, synclist_t *sl)
    851 {
    852 	*sl->sl_pnext = sl->sl_next;
    853 	if (sl->sl_next != NULL)
    854 		sl->sl_next->sl_pnext = sl->sl_pnext;
    855 	if (sl->sl_idx != -1)
    856 		softs->syncupd[sl->sl_idx].sup_hdr.sm_sl = NULL;
    857 }
    858 
    859 
    860 /* ------------------------------------------------------------------------ */
    861 /* Function:    ipf_sync_del_state                                          */
    862 /* Returns:     Nil                                                         */
    863 /* Parameters:  sl(I) - pointer to synclist object to delete                */
    864 /*                                                                          */
    865 /* Deletes an object from the synclist state table and free's its memory.   */
    866 /* ------------------------------------------------------------------------ */
    867 void
    868 ipf_sync_del_state(void *arg, synclist_t *sl)
    869 {
    870 	ipf_sync_softc_t *softs = arg;
    871 
    872 	WRITE_ENTER(&softs->ipf_syncstate);
    873 	ipf_sync_del(softs, sl);
    874 	RWLOCK_EXIT(&softs->ipf_syncstate);
    875 	KFREE(sl);
    876 }
    877 
    878 
    879 /* ------------------------------------------------------------------------ */
    880 /* Function:    ipf_sync_del_nat                                            */
    881 /* Returns:     Nil                                                         */
    882 /* Parameters:  sl(I) - pointer to synclist object to delete                */
    883 /*                                                                          */
    884 /* Deletes an object from the synclist nat table and free's its memory.     */
    885 /* ------------------------------------------------------------------------ */
    886 void
    887 ipf_sync_del_nat(void *arg, synclist_t *sl)
    888 {
    889 	ipf_sync_softc_t *softs = arg;
    890 
    891 	WRITE_ENTER(&softs->ipf_syncnat);
    892 	ipf_sync_del(softs, sl);
    893 	RWLOCK_EXIT(&softs->ipf_syncnat);
    894 	KFREE(sl);
    895 }
    896 
    897 
    898 /* ------------------------------------------------------------------------ */
    899 /* Function:    ipf_sync_nat                                                */
    900 /* Returns:     int    - 0 == success, else error value.                    */
    901 /* Parameters:  sp(I)  - pointer to sync packet data header                 */
    902 /*              uio(I) - pointer to user data for further information       */
    903 /*                                                                          */
    904 /* Updates the NAT  table according to information passed in the sync       */
    905 /* header.  As required, more data is fetched from the uio structure but    */
    906 /* varies depending on the contents of the sync header.  This function can  */
    907 /* create a new NAT entry or update one.  Deletion is left to the NAT       */
    908 /* structures being timed out correctly.                                    */
    909 /* ------------------------------------------------------------------------ */
    910 static int
    911 ipf_sync_nat(ipf_main_softc_t *softc, synchdr_t *sp, void *data)
    912 {
    913 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
    914 	syncupdent_t su;
    915 	nat_t *n, *nat;
    916 	synclist_t *sl;
    917 	u_int hv = 0;
    918 	int err = 0;
    919 
    920 	READ_ENTER(&softs->ipf_syncnat);
    921 
    922 	switch (sp->sm_cmd)
    923 	{
    924 	case SMC_CREATE :
    925 		KMALLOC(n, nat_t *);
    926 		if (n == NULL) {
    927 			IPFERROR(110017);
    928 			err = ENOMEM;
    929 			break;
    930 		}
    931 
    932 		KMALLOC(sl, synclist_t *);
    933 		if (sl == NULL) {
    934 			IPFERROR(110018);
    935 			err = ENOMEM;
    936 			KFREE(n);
    937 			break;
    938 		}
    939 
    940 		nat = (nat_t *)data;
    941 		bzero((char *)n, offsetof(nat_t, nat_age));
    942 		bcopy((char *)&nat->nat_age, (char *)&n->nat_age,
    943 		      sizeof(*n) - offsetof(nat_t, nat_age));
    944 		ipf_sync_natorder(0, n);
    945 		n->nat_sync = sl;
    946 		n->nat_rev = sl->sl_rev;
    947 
    948 		sl->sl_idx = -1;
    949 		sl->sl_ipn = n;
    950 		sl->sl_num = ntohl(sp->sm_num);
    951 
    952 		WRITE_ENTER(&softc->ipf_nat);
    953 		sl->sl_pnext = softs->syncnattab + hv;
    954 		sl->sl_next = softs->syncnattab[hv];
    955 		if (softs->syncnattab[hv] != NULL)
    956 			softs->syncnattab[hv]->sl_pnext = &sl->sl_next;
    957 		softs->syncnattab[hv] = sl;
    958 		(void) ipf_nat_insert(softc, softc->ipf_nat_soft, n);
    959 		RWLOCK_EXIT(&softc->ipf_nat);
    960 		break;
    961 
    962 	case SMC_UPDATE :
    963 		bcopy(data, &su, sizeof(su));
    964 
    965 		for (sl = softs->syncnattab[hv]; (sl != NULL);
    966 		     sl = sl->sl_next)
    967 			if (sl->sl_hdr.sm_num == sp->sm_num)
    968 				break;
    969 		if (sl == NULL) {
    970 			IPFERROR(110019);
    971 			err = ENOENT;
    972 			break;
    973 		}
    974 
    975 		READ_ENTER(&softc->ipf_nat);
    976 
    977 		nat = sl->sl_ipn;
    978 		nat->nat_rev = sl->sl_rev;
    979 
    980 		MUTEX_ENTER(&nat->nat_lock);
    981 		ipf_nat_setqueue(softc, softc->ipf_nat_soft, nat);
    982 		MUTEX_EXIT(&nat->nat_lock);
    983 
    984 		RWLOCK_EXIT(&softc->ipf_nat);
    985 
    986 		break;
    987 
    988 	default :
    989 		IPFERROR(110020);
    990 		err = EINVAL;
    991 		break;
    992 	}
    993 
    994 	RWLOCK_EXIT(&softs->ipf_syncnat);
    995 	return err;
    996 }
    997 
    998 
    999 /* ------------------------------------------------------------------------ */
   1000 /* Function:    ipf_sync_new                                                */
   1001 /* Returns:     synclist_t* - NULL == failure, else pointer to new synclist */
   1002 /*                            data structure.                               */
   1003 /* Parameters:  tab(I) - type of synclist_t to create                       */
   1004 /*              fin(I) - pointer to packet information                      */
   1005 /*              ptr(I) - pointer to owning object                           */
   1006 /*                                                                          */
   1007 /* Creates a new sync table entry and notifies any sleepers that it's there */
   1008 /* waiting to be processed.                                                 */
   1009 /* ------------------------------------------------------------------------ */
   1010 synclist_t *
   1011 ipf_sync_new(ipf_main_softc_t *softc, int tab, fr_info_t *fin, void *ptr)
   1012 {
   1013 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
   1014 	synclist_t *sl, *ss;
   1015 	synclogent_t *sle;
   1016 	u_int hv, sz;
   1017 
   1018 	if (softs->sl_idx == softs->ipf_sync_log_sz)
   1019 		return NULL;
   1020 	KMALLOC(sl, synclist_t *);
   1021 	if (sl == NULL)
   1022 		return NULL;
   1023 
   1024 	MUTEX_ENTER(&softs->ipf_syncadd);
   1025 	/*
   1026 	 * Get a unique number for this synclist_t.  The number is only meant
   1027 	 * to be unique for the lifetime of the structure and may be reused
   1028 	 * later.
   1029 	 */
   1030 	softs->ipf_sync_num++;
   1031 	if (softs->ipf_sync_num == 0) {
   1032 		softs->ipf_sync_num = 1;
   1033 		softs->ipf_sync_wrap++;
   1034 	}
   1035 
   1036 	/*
   1037 	 * Use the synch number of the object as the hash key.  Should end up
   1038 	 * with relatively even distribution over time.
   1039 	 * XXX - an attacker could lunch an DoS attack, of sorts, if they are
   1040 	 * the only one causing new table entries by only keeping open every
   1041 	 * nth connection they make, where n is a value in the interval
   1042 	 * [0, SYNC_STATETABSZ-1].
   1043 	 */
   1044 	switch (tab)
   1045 	{
   1046 	case SMC_STATE :
   1047 		hv = softs->ipf_sync_num & (softs->ipf_sync_state_tab_sz - 1);
   1048 		while (softs->ipf_sync_wrap != 0) {
   1049 			for (ss = softs->syncstatetab[hv]; ss; ss = ss->sl_next)
   1050 				if (ss->sl_hdr.sm_num == softs->ipf_sync_num)
   1051 					break;
   1052 			if (ss == NULL)
   1053 				break;
   1054 			softs->ipf_sync_num++;
   1055 			hv = softs->ipf_sync_num &
   1056 			     (softs->ipf_sync_state_tab_sz - 1);
   1057 		}
   1058 		sl->sl_pnext = softs->syncstatetab + hv;
   1059 		sl->sl_next = softs->syncstatetab[hv];
   1060 		softs->syncstatetab[hv] = sl;
   1061 		break;
   1062 
   1063 	case SMC_NAT :
   1064 		hv = softs->ipf_sync_num & (softs->ipf_sync_nat_tab_sz - 1);
   1065 		while (softs->ipf_sync_wrap != 0) {
   1066 			for (ss = softs->syncnattab[hv]; ss; ss = ss->sl_next)
   1067 				if (ss->sl_hdr.sm_num == softs->ipf_sync_num)
   1068 					break;
   1069 			if (ss == NULL)
   1070 				break;
   1071 			softs->ipf_sync_num++;
   1072 			hv = softs->ipf_sync_num &
   1073 			     (softs->ipf_sync_nat_tab_sz - 1);
   1074 		}
   1075 		sl->sl_pnext = softs->syncnattab + hv;
   1076 		sl->sl_next = softs->syncnattab[hv];
   1077 		softs->syncnattab[hv] = sl;
   1078 		break;
   1079 
   1080 	default :
   1081 		break;
   1082 	}
   1083 
   1084 	sl->sl_num = softs->ipf_sync_num;
   1085 	MUTEX_EXIT(&softs->ipf_syncadd);
   1086 
   1087 	sl->sl_magic = htonl(SYNHDRMAGIC);
   1088 	sl->sl_v = fin->fin_v;
   1089 	sl->sl_p = fin->fin_p;
   1090 	sl->sl_cmd = SMC_CREATE;
   1091 	sl->sl_idx = -1;
   1092 	sl->sl_table = tab;
   1093 	sl->sl_rev = fin->fin_rev;
   1094 	if (tab == SMC_STATE) {
   1095 		sl->sl_ips = ptr;
   1096 		sz = sizeof(*sl->sl_ips);
   1097 	} else if (tab == SMC_NAT) {
   1098 		sl->sl_ipn = ptr;
   1099 		sz = sizeof(*sl->sl_ipn);
   1100 	} else {
   1101 		ptr = NULL;
   1102 		sz = 0;
   1103 	}
   1104 	sl->sl_len = sz;
   1105 
   1106 	/*
   1107 	 * Create the log entry to be read by a user daemon.  When it has been
   1108 	 * finished and put on the queue, send a signal to wakeup any waiters.
   1109 	 */
   1110 	MUTEX_ENTER(&softs->ipf_syncadd);
   1111 	sle = softs->synclog + softs->sl_idx++;
   1112 	bcopy((char *)&sl->sl_hdr, (char *)&sle->sle_hdr,
   1113 	      sizeof(sle->sle_hdr));
   1114 	sle->sle_hdr.sm_num = htonl(sle->sle_hdr.sm_num);
   1115 	sle->sle_hdr.sm_len = htonl(sle->sle_hdr.sm_len);
   1116 	if (ptr != NULL) {
   1117 		bcopy((char *)ptr, (char *)&sle->sle_un, sz);
   1118 		if (tab == SMC_STATE) {
   1119 			ipf_sync_storder(1, &sle->sle_un.sleu_ips);
   1120 		} else if (tab == SMC_NAT) {
   1121 			ipf_sync_natorder(1, &sle->sle_un.sleu_ipn);
   1122 		}
   1123 	}
   1124 	MUTEX_EXIT(&softs->ipf_syncadd);
   1125 
   1126 	ipf_sync_wakeup(softc);
   1127 	return sl;
   1128 }
   1129 
   1130 
   1131 /* ------------------------------------------------------------------------ */
   1132 /* Function:    ipf_sync_update                                             */
   1133 /* Returns:     Nil                                                         */
   1134 /* Parameters:  tab(I) - type of synclist_t to create                       */
   1135 /*              fin(I) - pointer to packet information                      */
   1136 /*              sl(I)  - pointer to synchronisation object                  */
   1137 /*                                                                          */
   1138 /* For outbound packets, only, create an sync update record for the user    */
   1139 /* process to read.                                                         */
   1140 /* ------------------------------------------------------------------------ */
   1141 void
   1142 ipf_sync_update(ipf_main_softc_t *softc, int tab, fr_info_t *fin,
   1143     synclist_t *sl)
   1144 {
   1145 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
   1146 	synctcp_update_t *st;
   1147 	syncupdent_t *slu;
   1148 	ipstate_t *ips;
   1149 	nat_t *nat;
   1150 	ipfrwlock_t *lock;
   1151 
   1152 	if (fin->fin_out == 0 || sl == NULL)
   1153 		return;
   1154 
   1155 	if (tab == SMC_STATE) {
   1156 		lock = &softs->ipf_syncstate;
   1157 	} else {
   1158 		lock = &softs->ipf_syncnat;
   1159 	}
   1160 
   1161 	READ_ENTER(lock);
   1162 	if (sl->sl_idx == -1) {
   1163 		MUTEX_ENTER(&softs->ipf_syncadd);
   1164 		slu = softs->syncupd + softs->su_idx;
   1165 		sl->sl_idx = softs->su_idx++;
   1166 		MUTEX_EXIT(&softs->ipf_syncadd);
   1167 
   1168 		bcopy((char *)&sl->sl_hdr, (char *)&slu->sup_hdr,
   1169 		      sizeof(slu->sup_hdr));
   1170 		slu->sup_hdr.sm_magic = htonl(SYNHDRMAGIC);
   1171 		slu->sup_hdr.sm_sl = sl;
   1172 		slu->sup_hdr.sm_cmd = SMC_UPDATE;
   1173 		slu->sup_hdr.sm_table = tab;
   1174 		slu->sup_hdr.sm_num = htonl(sl->sl_num);
   1175 		slu->sup_hdr.sm_len = htonl(sizeof(struct synctcp_update));
   1176 		slu->sup_hdr.sm_rev = fin->fin_rev;
   1177 # if 0
   1178 		if (fin->fin_p == IPPROTO_TCP) {
   1179 			st->stu_len[0] = 0;
   1180 			st->stu_len[1] = 0;
   1181 		}
   1182 # endif
   1183 	} else
   1184 		slu = softs->syncupd + sl->sl_idx;
   1185 
   1186 	/*
   1187 	 * Only TCP has complex timeouts, others just use default timeouts.
   1188 	 * For TCP, we only need to track the connection state and window.
   1189 	 */
   1190 	if (fin->fin_p == IPPROTO_TCP) {
   1191 		st = &slu->sup_tcp;
   1192 		if (tab == SMC_STATE) {
   1193 			ips = sl->sl_ips;
   1194 			st->stu_age = htonl(ips->is_die);
   1195 			st->stu_data[0].td_end = ips->is_send;
   1196 			st->stu_data[0].td_maxend = ips->is_maxsend;
   1197 			st->stu_data[0].td_maxwin = ips->is_maxswin;
   1198 			st->stu_state[0] = ips->is_state[0];
   1199 			st->stu_data[1].td_end = ips->is_dend;
   1200 			st->stu_data[1].td_maxend = ips->is_maxdend;
   1201 			st->stu_data[1].td_maxwin = ips->is_maxdwin;
   1202 			st->stu_state[1] = ips->is_state[1];
   1203 		} else if (tab == SMC_NAT) {
   1204 			nat = sl->sl_ipn;
   1205 			st->stu_age = htonl(nat->nat_age);
   1206 		}
   1207 	}
   1208 	RWLOCK_EXIT(lock);
   1209 
   1210 	ipf_sync_wakeup(softc);
   1211 }
   1212 
   1213 
   1214 /* ------------------------------------------------------------------------ */
   1215 /* Function:    ipf_sync_flush_table                                        */
   1216 /* Returns:     int - number of entries freed by flushing table             */
   1217 /* Parameters:  tabsize(I) - size of the array pointed to by table          */
   1218 /*              table(I)   - pointer to sync table to empty                 */
   1219 /*                                                                          */
   1220 /* Walk through a table of sync entries and free each one.  It is assumed   */
   1221 /* that some lock is held so that nobody else tries to access the table     */
   1222 /* during this cleanup.                                                     */
   1223 /* ------------------------------------------------------------------------ */
   1224 static int
   1225 ipf_sync_flush_table(ipf_sync_softc_t *softs, int tabsize, synclist_t **table)
   1226 {
   1227 	synclist_t *sl;
   1228 	int i, items;
   1229 
   1230 	items = 0;
   1231 
   1232 	for (i = 0; i < tabsize; i++) {
   1233 		while ((sl = table[i]) != NULL) {
   1234 			switch (sl->sl_table) {
   1235 			case SMC_STATE :
   1236 				if (sl->sl_ips != NULL)
   1237 					sl->sl_ips->is_sync = NULL;
   1238 				break;
   1239 			case SMC_NAT :
   1240 				if (sl->sl_ipn != NULL)
   1241 					sl->sl_ipn->nat_sync = NULL;
   1242 				break;
   1243 			}
   1244 			if (sl->sl_next != NULL)
   1245 				sl->sl_next->sl_pnext = sl->sl_pnext;
   1246 			table[i] = sl->sl_next;
   1247 			if (sl->sl_idx != -1)
   1248 				softs->syncupd[sl->sl_idx].sup_hdr.sm_sl = NULL;
   1249 			KFREE(sl);
   1250 			items++;
   1251 		}
   1252 	}
   1253 
   1254 	return items;
   1255 }
   1256 
   1257 
   1258 /* ------------------------------------------------------------------------ */
   1259 /* Function:    ipf_sync_ioctl                                              */
   1260 /* Returns:     int - 0 == success, != 0 == failure                         */
   1261 /* Parameters:  data(I) - pointer to ioctl data                             */
   1262 /*              cmd(I)  - ioctl command integer                             */
   1263 /*              mode(I) - file mode bits used with open                     */
   1264 /*                                                                          */
   1265 /* This function currently does not handle any ioctls and so just returns   */
   1266 /* EINVAL on all occasions.                                                 */
   1267 /* ------------------------------------------------------------------------ */
   1268 int
   1269 ipf_sync_ioctl(ipf_main_softc_t *softc, void *data, ioctlcmd_t cmd, int mode,
   1270     int uid, void *ctx)
   1271 {
   1272 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
   1273 	int error, i;
   1274 	SPL_INT(s);
   1275 
   1276 	switch (cmd)
   1277 	{
   1278         case SIOCIPFFL:
   1279 		error = BCOPYIN(data, &i, sizeof(i));
   1280 		if (error != 0) {
   1281 			IPFERROR(110023);
   1282 			error = EFAULT;
   1283 			break;
   1284 		}
   1285 
   1286 		switch (i)
   1287 		{
   1288 		case SMC_RLOG :
   1289 			SPL_NET(s);
   1290 			MUTEX_ENTER(&softs->ipsl_mutex);
   1291 			i = (softs->sl_tail - softs->sl_idx) +
   1292 			    (softs->su_tail - softs->su_idx);
   1293 			softs->sl_idx = 0;
   1294 			softs->su_idx = 0;
   1295 			softs->sl_tail = 0;
   1296 			softs->su_tail = 0;
   1297 			MUTEX_EXIT(&softs->ipsl_mutex);
   1298 			SPL_X(s);
   1299 			break;
   1300 
   1301 		case SMC_NAT :
   1302 			SPL_NET(s);
   1303 			WRITE_ENTER(&softs->ipf_syncnat);
   1304 			i = ipf_sync_flush_table(softs, SYNC_NATTABSZ,
   1305 						 softs->syncnattab);
   1306 			RWLOCK_EXIT(&softs->ipf_syncnat);
   1307 			SPL_X(s);
   1308 			break;
   1309 
   1310 		case SMC_STATE :
   1311 			SPL_NET(s);
   1312 			WRITE_ENTER(&softs->ipf_syncstate);
   1313 			i = ipf_sync_flush_table(softs, SYNC_STATETABSZ,
   1314 						 softs->syncstatetab);
   1315 			RWLOCK_EXIT(&softs->ipf_syncstate);
   1316 			SPL_X(s);
   1317 			break;
   1318 		}
   1319 
   1320 		error = BCOPYOUT(&i, data, sizeof(i));
   1321 		if (error != 0) {
   1322 			IPFERROR(110022);
   1323 			error = EFAULT;
   1324 		}
   1325 		break;
   1326 
   1327 	default :
   1328 		IPFERROR(110021);
   1329 		error = EINVAL;
   1330 		break;
   1331 	}
   1332 
   1333 	return error;
   1334 }
   1335 
   1336 
   1337 /* ------------------------------------------------------------------------ */
   1338 /* Function:    ipf_sync_canread                                            */
   1339 /* Returns:     int - 0 == success, != 0 == failure                         */
   1340 /* Parameters:  Nil                                                         */
   1341 /*                                                                          */
   1342 /* This function provides input to the poll handler about whether or not    */
   1343 /* there is data waiting to be read from the /dev/ipsync device.            */
   1344 /* ------------------------------------------------------------------------ */
   1345 int
   1346 ipf_sync_canread(void *arg)
   1347 {
   1348 	ipf_sync_softc_t *softs = arg;
   1349 	return !((softs->sl_tail == softs->sl_idx) &&
   1350 		 (softs->su_tail == softs->su_idx));
   1351 }
   1352 
   1353 
   1354 /* ------------------------------------------------------------------------ */
   1355 /* Function:    ipf_sync_canwrite                                           */
   1356 /* Returns:     int - 1 == can always write                                 */
   1357 /* Parameters:  Nil                                                         */
   1358 /*                                                                          */
   1359 /* This function lets the poll handler know that it is always ready willing */
   1360 /* to accept write events.                                                  */
   1361 /* XXX Maybe this should return false if the sync table is full?            */
   1362 /* ------------------------------------------------------------------------ */
   1363 int
   1364 ipf_sync_canwrite(void *arg)
   1365 {
   1366 	return 1;
   1367 }
   1368 
   1369 
   1370 /* ------------------------------------------------------------------------ */
   1371 /* Function:    ipf_sync_wakeup                                             */
   1372 /* Parameters:  Nil                                                         */
   1373 /* Returns:     Nil                                                         */
   1374 /*                                                                          */
   1375 /* This function implements the heuristics that decide how often to         */
   1376 /* generate a poll wakeup for programs that are waiting for information     */
   1377 /* about when they can do a read on /dev/ipsync.                            */
   1378 /*                                                                          */
   1379 /* There are three different considerations here:                           */
   1380 /* - do not keep a program waiting too long: ipf_sync_wake_interval is the  */
   1381 /*   maximum number of ipf ticks to let pass by;                            */
   1382 /* - do not let the queue of ouststanding things to generate notifies for   */
   1383 /*   get too full (ipf_sync_queue_high_wm is the high water mark);          */
   1384 /* - do not let too many events get collapsed in before deciding that the   */
   1385 /*   other host(s) need an update (ipf_sync_event_high_wm is the high water */
   1386 /*   mark for this counter.)                                                */
   1387 /* ------------------------------------------------------------------------ */
   1388 static void
   1389 ipf_sync_wakeup(ipf_main_softc_t *softc)
   1390 {
   1391 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
   1392 
   1393 	softs->ipf_sync_events++;
   1394 	if ((softc->ipf_ticks >
   1395 	    softs->ipf_sync_lastwakeup + softs->ipf_sync_wake_interval) ||
   1396 	    (softs->ipf_sync_events > softs->ipf_sync_event_high_wm) ||
   1397 	    ((softs->sl_tail - softs->sl_idx) >
   1398 	     softs->ipf_sync_queue_high_wm) ||
   1399 	    ((softs->su_tail - softs->su_idx) >
   1400 	     softs->ipf_sync_queue_high_wm)) {
   1401 
   1402 		ipf_sync_poll_wakeup(softc);
   1403 	}
   1404 }
   1405 
   1406 
   1407 /* ------------------------------------------------------------------------ */
   1408 /* Function:    ipf_sync_poll_wakeup                                        */
   1409 /* Parameters:  Nil                                                         */
   1410 /* Returns:     Nil                                                         */
   1411 /*                                                                          */
   1412 /* Deliver a poll wakeup and reset counters for two of the three heuristics */
   1413 /* ------------------------------------------------------------------------ */
   1414 static void
   1415 ipf_sync_poll_wakeup(ipf_main_softc_t *softc)
   1416 {
   1417 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
   1418 
   1419 	softs->ipf_sync_events = 0;
   1420 	softs->ipf_sync_lastwakeup = softc->ipf_ticks;
   1421 
   1422 # ifdef _KERNEL
   1423 #  if SOLARIS
   1424 	MUTEX_ENTER(&softs->ipsl_mutex);
   1425 	cv_signal(&softs->ipslwait);
   1426 	MUTEX_EXIT(&softs->ipsl_mutex);
   1427 	pollwakeup(&softc->ipf_poll_head[IPL_LOGSYNC], POLLIN|POLLRDNORM);
   1428 #  else
   1429 	WAKEUP(&softs->sl_tail, 0);
   1430 	POLLWAKEUP(IPL_LOGSYNC);
   1431 #  endif
   1432 # endif
   1433 }
   1434 
   1435 
   1436 /* ------------------------------------------------------------------------ */
   1437 /* Function:    ipf_sync_expire                                             */
   1438 /* Parameters:  Nil                                                         */
   1439 /* Returns:     Nil                                                         */
   1440 /*                                                                          */
   1441 /* This is the function called even ipf_tick.  It implements one of the     */
   1442 /* three heuristics above *IF* there are events waiting.                    */
   1443 /* ------------------------------------------------------------------------ */
   1444 void
   1445 ipf_sync_expire(ipf_main_softc_t *softc)
   1446 {
   1447 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
   1448 
   1449 	if ((softs->ipf_sync_events > 0) &&
   1450 	    (softc->ipf_ticks >
   1451 	     softs->ipf_sync_lastwakeup + softs->ipf_sync_wake_interval)) {
   1452 		ipf_sync_poll_wakeup(softc);
   1453 	}
   1454 }
   1455