Home | History | Annotate | Line # | Download | only in dist
ip_sync.c revision 1.2
      1 /*	$NetBSD: ip_sync.c,v 1.2 2012/07/22 14:27:35 darrenr Exp $	*/
      2 
      3 /*
      4  * Copyright (C) 2012 by Darren Reed.
      5  *
      6  * See the IPFILTER.LICENCE file for details on licencing.
      7  */
      8 #if defined(KERNEL) || defined(_KERNEL)
      9 # undef KERNEL
     10 # undef _KERNEL
     11 # define        KERNEL	1
     12 # define        _KERNEL	1
     13 #endif
     14 #include <sys/errno.h>
     15 #include <sys/types.h>
     16 #include <sys/param.h>
     17 #include <sys/file.h>
     18 #if !defined(_KERNEL) && !defined(__KERNEL__)
     19 # include <stdio.h>
     20 # include <stdlib.h>
     21 # include <string.h>
     22 # define _KERNEL
     23 # define KERNEL
     24 # ifdef __OpenBSD__
     25 struct file;
     26 # endif
     27 # include <sys/uio.h>
     28 # undef _KERNEL
     29 # undef KERNEL
     30 #else
     31 # include <sys/systm.h>
     32 # if !defined(__SVR4) && !defined(__svr4__)
     33 #  include <sys/mbuf.h>
     34 # endif
     35 # include <sys/select.h>
     36 # if __FreeBSD_version >= 500000
     37 #  include <sys/selinfo.h>
     38 # endif
     39 #endif
     40 #if defined(__NetBSD__) && (__NetBSD_Version__ >= 104000000)
     41 # include <sys/proc.h>
     42 #endif
     43 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
     44 # include <sys/filio.h>
     45 # include <sys/fcntl.h>
     46 #else
     47 # include <sys/ioctl.h>
     48 #endif
     49 #include <sys/time.h>
     50 #if !defined(linux)
     51 # include <sys/protosw.h>
     52 #endif
     53 #include <sys/socket.h>
     54 #if defined(__SVR4) || defined(__svr4__)
     55 # include <sys/filio.h>
     56 # include <sys/byteorder.h>
     57 # ifdef _KERNEL
     58 #  include <sys/dditypes.h>
     59 # endif
     60 # include <sys/stream.h>
     61 # include <sys/kmem.h>
     62 #endif
     63 
     64 #include <net/if.h>
     65 #ifdef sun
     66 # include <net/af.h>
     67 #endif
     68 #include <netinet/in.h>
     69 #include <netinet/in_systm.h>
     70 #include <netinet/ip.h>
     71 #include <netinet/tcp.h>
     72 #if !defined(linux)
     73 # include <netinet/ip_var.h>
     74 #endif
     75 #if !defined(__hpux) && !defined(linux)
     76 # include <netinet/tcp_fsm.h>
     77 #endif
     78 #include <netinet/udp.h>
     79 #include <netinet/ip_icmp.h>
     80 #include "netinet/ip_compat.h"
     81 #include <netinet/tcpip.h>
     82 #include "netinet/ip_fil.h"
     83 #include "netinet/ip_nat.h"
     84 #include "netinet/ip_frag.h"
     85 #include "netinet/ip_state.h"
     86 #include "netinet/ip_proxy.h"
     87 #include "netinet/ip_sync.h"
     88 #ifdef  USE_INET6
     89 #include <netinet/icmp6.h>
     90 #endif
     91 #if (__FreeBSD_version >= 300000)
     92 # include <sys/malloc.h>
     93 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
     94 #  include <sys/libkern.h>
     95 #  include <sys/systm.h>
     96 # endif
     97 #endif
     98 /* END OF INCLUDES */
     99 
    100 #if !defined(lint)
    101 static const char rcsid[] = "@(#)Id: ip_sync.c,v 1.1.1.2 2012/07/22 13:44:24 darrenr Exp $";
    102 #endif
    103 
    104 #define	SYNC_STATETABSZ	256
    105 #define	SYNC_NATTABSZ	256
    106 
    107 typedef struct ipf_sync_softc_s {
    108 	ipfmutex_t	ipf_syncadd;
    109 	ipfmutex_t	ipsl_mutex;
    110 	ipfrwlock_t	ipf_syncstate;
    111 	ipfrwlock_t	ipf_syncnat;
    112 #if SOLARIS && defined(_KERNEL)
    113 	kcondvar_t	ipslwait;
    114 #endif
    115 #if defined(linux) && defined(_KERNEL)
    116 	wait_queue_head_t	sl_tail_linux;
    117 #endif
    118 	synclist_t	**syncstatetab;
    119 	synclist_t	**syncnattab;
    120 	synclogent_t	*synclog;
    121 	syncupdent_t	*syncupd;
    122 	u_int		ipf_sync_num;
    123 	u_int		ipf_sync_wrap;
    124 	u_int		sl_idx;		/* next available sync log entry */
    125 	u_int		su_idx;		/* next available sync update entry */
    126 	u_int		sl_tail;	/* next sync log entry to read */
    127 	u_int		su_tail;	/* next sync update entry to read */
    128 	int		ipf_sync_log_sz;
    129 	int		ipf_sync_nat_tab_sz;
    130 	int		ipf_sync_state_tab_sz;
    131 	int		ipf_sync_debug;
    132 	int		ipf_sync_events;
    133 	u_32_t		ipf_sync_lastwakeup;
    134 	int		ipf_sync_wake_interval;
    135 	int		ipf_sync_event_high_wm;
    136 	int		ipf_sync_queue_high_wm;
    137 	int		ipf_sync_inited;
    138 } ipf_sync_softc_t;
    139 
    140 static int ipf_sync_flush_table __P((ipf_sync_softc_t *, int, synclist_t **));
    141 static void ipf_sync_wakeup __P((ipf_main_softc_t *));
    142 static void ipf_sync_del __P((ipf_sync_softc_t *, synclist_t *));
    143 static void ipf_sync_poll_wakeup __P((ipf_main_softc_t *));
    144 static int ipf_sync_nat __P((ipf_main_softc_t *, synchdr_t *, void *));
    145 static int ipf_sync_state __P((ipf_main_softc_t *, synchdr_t *, void *));
    146 
    147 # if !defined(sparc) && !defined(__hppa)
    148 void ipf_sync_tcporder __P((int, struct tcpdata *));
    149 void ipf_sync_natorder __P((int, struct nat *));
    150 void ipf_sync_storder __P((int, struct ipstate *));
    151 # endif
    152 
    153 
    154 void *
    155 ipf_sync_soft_create(softc)
    156 	ipf_main_softc_t *softc;
    157 {
    158 	ipf_sync_softc_t *softs;
    159 
    160 	KMALLOC(softs, ipf_sync_softc_t *);
    161 	if (softs == NULL) {
    162 		IPFERROR(110024);
    163 		return NULL;
    164 	}
    165 
    166 	bzero((char *)softs, sizeof(*softs));
    167 
    168 	softs->ipf_sync_log_sz = SYNCLOG_SZ;
    169 	softs->ipf_sync_nat_tab_sz = SYNC_STATETABSZ;
    170 	softs->ipf_sync_state_tab_sz = SYNC_STATETABSZ;
    171 	softs->ipf_sync_event_high_wm = SYNCLOG_SZ * 100 / 90;	/* 90% */
    172 	softs->ipf_sync_queue_high_wm = SYNCLOG_SZ * 100 / 90;	/* 90% */
    173 
    174 	return softs;
    175 }
    176 
    177 
    178 /* ------------------------------------------------------------------------ */
    179 /* Function:    ipf_sync_init                                               */
    180 /* Returns:     int - 0 == success, -1 == failure                           */
    181 /* Parameters:  Nil                                                         */
    182 /*                                                                          */
    183 /* Initialise all of the locks required for the sync code and initialise    */
    184 /* any data structures, as required.                                        */
    185 /* ------------------------------------------------------------------------ */
    186 int
    187 ipf_sync_soft_init(softc, arg)
    188 	ipf_main_softc_t *softc;
    189 	void *arg;
    190 {
    191 	ipf_sync_softc_t *softs = arg;
    192 
    193 	KMALLOCS(softs->synclog, synclogent_t *,
    194 		 softs->ipf_sync_log_sz * sizeof(*softs->synclog));
    195 	if (softs->synclog == NULL)
    196 		return -1;
    197 	bzero((char *)softs->synclog,
    198 	      softs->ipf_sync_log_sz * sizeof(*softs->synclog));
    199 
    200 	KMALLOCS(softs->syncupd, syncupdent_t *,
    201 		 softs->ipf_sync_log_sz * sizeof(*softs->syncupd));
    202 	if (softs->syncupd == NULL)
    203 		return -2;
    204 	bzero((char *)softs->syncupd,
    205 	      softs->ipf_sync_log_sz * sizeof(*softs->syncupd));
    206 
    207 	KMALLOCS(softs->syncstatetab, synclist_t **,
    208 		 softs->ipf_sync_state_tab_sz * sizeof(*softs->syncstatetab));
    209 	if (softs->syncstatetab == NULL)
    210 		return -3;
    211 	bzero((char *)softs->syncstatetab,
    212 	      softs->ipf_sync_state_tab_sz * sizeof(*softs->syncstatetab));
    213 
    214 	KMALLOCS(softs->syncnattab, synclist_t **,
    215 		 softs->ipf_sync_nat_tab_sz * sizeof(*softs->syncnattab));
    216 	if (softs->syncnattab == NULL)
    217 		return -3;
    218 	bzero((char *)softs->syncnattab,
    219 	      softs->ipf_sync_nat_tab_sz * sizeof(*softs->syncnattab));
    220 
    221 	softs->ipf_sync_num = 1;
    222 	softs->ipf_sync_wrap = 0;
    223 	softs->sl_idx = 0;
    224 	softs->su_idx = 0;
    225 	softs->sl_tail = 0;
    226 	softs->su_tail = 0;
    227 	softs->ipf_sync_events = 0;
    228 	softs->ipf_sync_lastwakeup = 0;
    229 
    230 
    231 # if SOLARIS && defined(_KERNEL)
    232 	cv_init(&softs->ipslwait, "ipsl condvar", CV_DRIVER, NULL);
    233 # endif
    234 	RWLOCK_INIT(&softs->ipf_syncstate, "add things to state sync table");
    235 	RWLOCK_INIT(&softs->ipf_syncnat, "add things to nat sync table");
    236 	MUTEX_INIT(&softs->ipf_syncadd, "add things to sync table");
    237 	MUTEX_INIT(&softs->ipsl_mutex, "read ring lock");
    238 
    239 	softs->ipf_sync_inited = 1;
    240 
    241 	return 0;
    242 }
    243 
    244 
    245 /* ------------------------------------------------------------------------ */
    246 /* Function:    ipf_sync_unload                                             */
    247 /* Returns:     int - 0 == success, -1 == failure                           */
    248 /* Parameters:  Nil                                                         */
    249 /*                                                                          */
    250 /* Destroy the locks created when initialising and free any memory in use   */
    251 /* with the synchronisation tables.                                         */
    252 /* ------------------------------------------------------------------------ */
    253 int
    254 ipf_sync_soft_fini(softc, arg)
    255 	ipf_main_softc_t *softc;
    256 	void *arg;
    257 {
    258 	ipf_sync_softc_t *softs = arg;
    259 
    260 	if (softs->syncnattab != NULL) {
    261 		ipf_sync_flush_table(softs, softs->ipf_sync_nat_tab_sz,
    262 				     softs->syncnattab);
    263 		KFREES(softs->syncnattab,
    264 		       softs->ipf_sync_nat_tab_sz * sizeof(*softs->syncnattab));
    265 		softs->syncnattab = NULL;
    266 	}
    267 
    268 	if (softs->syncstatetab != NULL) {
    269 		ipf_sync_flush_table(softs, softs->ipf_sync_state_tab_sz,
    270 				     softs->syncstatetab);
    271 		KFREES(softs->syncstatetab,
    272 		       softs->ipf_sync_state_tab_sz *
    273 		       sizeof(*softs->syncstatetab));
    274 		softs->syncstatetab = NULL;
    275 	}
    276 
    277 	if (softs->syncupd != NULL) {
    278 		KFREES(softs->syncupd,
    279 		       softs->ipf_sync_log_sz * sizeof(*softs->syncupd));
    280 		softs->syncupd = NULL;
    281 	}
    282 
    283 	if (softs->synclog != NULL) {
    284 		KFREES(softs->synclog,
    285 		       softs->ipf_sync_log_sz * sizeof(*softs->synclog));
    286 		softs->synclog = NULL;
    287 	}
    288 
    289 	if (softs->ipf_sync_inited == 1) {
    290 		MUTEX_DESTROY(&softs->ipsl_mutex);
    291 		MUTEX_DESTROY(&softs->ipf_syncadd);
    292 		RW_DESTROY(&softs->ipf_syncnat);
    293 		RW_DESTROY(&softs->ipf_syncstate);
    294 		softs->ipf_sync_inited = 0;
    295 	}
    296 
    297 	return 0;
    298 }
    299 
    300 void
    301 ipf_sync_soft_destroy(softc, arg)
    302 	ipf_main_softc_t *softc;
    303 	void *arg;
    304 {
    305 	ipf_sync_softc_t *softs = arg;
    306 
    307 	KFREE(softs);
    308 }
    309 
    310 
    311 # if !defined(sparc) && !defined(__hppa)
    312 /* ------------------------------------------------------------------------ */
    313 /* Function:    ipf_sync_tcporder                                           */
    314 /* Returns:     Nil                                                         */
    315 /* Parameters:  way(I) - direction of byte order conversion.                */
    316 /*              td(IO) - pointer to data to be converted.                   */
    317 /*                                                                          */
    318 /* Do byte swapping on values in the TCP state information structure that   */
    319 /* need to be used at both ends by the host in their native byte order.     */
    320 /* ------------------------------------------------------------------------ */
    321 void
    322 ipf_sync_tcporder(way, td)
    323 	int way;
    324 	tcpdata_t *td;
    325 {
    326 	if (way) {
    327 		td->td_maxwin = htons(td->td_maxwin);
    328 		td->td_end = htonl(td->td_end);
    329 		td->td_maxend = htonl(td->td_maxend);
    330 	} else {
    331 		td->td_maxwin = ntohs(td->td_maxwin);
    332 		td->td_end = ntohl(td->td_end);
    333 		td->td_maxend = ntohl(td->td_maxend);
    334 	}
    335 }
    336 
    337 
    338 /* ------------------------------------------------------------------------ */
    339 /* Function:    ipf_sync_natorder                                           */
    340 /* Returns:     Nil                                                         */
    341 /* Parameters:  way(I)  - direction of byte order conversion.               */
    342 /*              nat(IO) - pointer to data to be converted.                  */
    343 /*                                                                          */
    344 /* Do byte swapping on values in the NAT data structure that need to be     */
    345 /* used at both ends by the host in their native byte order.                */
    346 /* ------------------------------------------------------------------------ */
    347 void
    348 ipf_sync_natorder(way, n)
    349 	int way;
    350 	nat_t *n;
    351 {
    352 	if (way) {
    353 		n->nat_age = htonl(n->nat_age);
    354 		n->nat_flags = htonl(n->nat_flags);
    355 		n->nat_ipsumd = htonl(n->nat_ipsumd);
    356 		n->nat_use = htonl(n->nat_use);
    357 		n->nat_dir = htonl(n->nat_dir);
    358 	} else {
    359 		n->nat_age = ntohl(n->nat_age);
    360 		n->nat_flags = ntohl(n->nat_flags);
    361 		n->nat_ipsumd = ntohl(n->nat_ipsumd);
    362 		n->nat_use = ntohl(n->nat_use);
    363 		n->nat_dir = ntohl(n->nat_dir);
    364 	}
    365 }
    366 
    367 
    368 /* ------------------------------------------------------------------------ */
    369 /* Function:    ipf_sync_storder                                            */
    370 /* Returns:     Nil                                                         */
    371 /* Parameters:  way(I)  - direction of byte order conversion.               */
    372 /*              ips(IO) - pointer to data to be converted.                  */
    373 /*                                                                          */
    374 /* Do byte swapping on values in the IP state data structure that need to   */
    375 /* be used at both ends by the host in their native byte order.             */
    376 /* ------------------------------------------------------------------------ */
    377 void
    378 ipf_sync_storder(way, ips)
    379 	int way;
    380 	ipstate_t *ips;
    381 {
    382 	ipf_sync_tcporder(way, &ips->is_tcp.ts_data[0]);
    383 	ipf_sync_tcporder(way, &ips->is_tcp.ts_data[1]);
    384 
    385 	if (way) {
    386 		ips->is_hv = htonl(ips->is_hv);
    387 		ips->is_die = htonl(ips->is_die);
    388 		ips->is_pass = htonl(ips->is_pass);
    389 		ips->is_flags = htonl(ips->is_flags);
    390 		ips->is_opt[0] = htonl(ips->is_opt[0]);
    391 		ips->is_opt[1] = htonl(ips->is_opt[1]);
    392 		ips->is_optmsk[0] = htonl(ips->is_optmsk[0]);
    393 		ips->is_optmsk[1] = htonl(ips->is_optmsk[1]);
    394 		ips->is_sec = htons(ips->is_sec);
    395 		ips->is_secmsk = htons(ips->is_secmsk);
    396 		ips->is_auth = htons(ips->is_auth);
    397 		ips->is_authmsk = htons(ips->is_authmsk);
    398 		ips->is_s0[0] = htonl(ips->is_s0[0]);
    399 		ips->is_s0[1] = htonl(ips->is_s0[1]);
    400 		ips->is_smsk[0] = htons(ips->is_smsk[0]);
    401 		ips->is_smsk[1] = htons(ips->is_smsk[1]);
    402 	} else {
    403 		ips->is_hv = ntohl(ips->is_hv);
    404 		ips->is_die = ntohl(ips->is_die);
    405 		ips->is_pass = ntohl(ips->is_pass);
    406 		ips->is_flags = ntohl(ips->is_flags);
    407 		ips->is_opt[0] = ntohl(ips->is_opt[0]);
    408 		ips->is_opt[1] = ntohl(ips->is_opt[1]);
    409 		ips->is_optmsk[0] = ntohl(ips->is_optmsk[0]);
    410 		ips->is_optmsk[1] = ntohl(ips->is_optmsk[1]);
    411 		ips->is_sec = ntohs(ips->is_sec);
    412 		ips->is_secmsk = ntohs(ips->is_secmsk);
    413 		ips->is_auth = ntohs(ips->is_auth);
    414 		ips->is_authmsk = ntohs(ips->is_authmsk);
    415 		ips->is_s0[0] = ntohl(ips->is_s0[0]);
    416 		ips->is_s0[1] = ntohl(ips->is_s0[1]);
    417 		ips->is_smsk[0] = ntohl(ips->is_smsk[0]);
    418 		ips->is_smsk[1] = ntohl(ips->is_smsk[1]);
    419 	}
    420 }
    421 # else /* !defined(sparc) && !defined(__hppa) */
    422 #  define	ipf_sync_tcporder(x,y)
    423 #  define	ipf_sync_natorder(x,y)
    424 #  define	ipf_sync_storder(x,y)
    425 # endif /* !defined(sparc) && !defined(__hppa) */
    426 
    427 
    428 /* ------------------------------------------------------------------------ */
    429 /* Function:    ipf_sync_write                                              */
    430 /* Returns:     int    - 0 == success, else error value.                    */
    431 /* Parameters:  uio(I) - pointer to information about data to write         */
    432 /*                                                                          */
    433 /* Moves data from user space into the kernel and uses it for updating data */
    434 /* structures in the state/NAT tables.                                      */
    435 /* ------------------------------------------------------------------------ */
    436 int
    437 ipf_sync_write(softc, uio)
    438 	ipf_main_softc_t *softc;
    439 	struct uio *uio;
    440 {
    441 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
    442 	synchdr_t sh;
    443 
    444 	/*
    445 	 * THIS MUST BE SUFFICIENT LARGE TO STORE
    446 	 * ANY POSSIBLE DATA TYPE
    447 	 */
    448 	char data[2048];
    449 
    450 	int err = 0;
    451 
    452 #  if BSD_GE_YEAR(199306) || defined(__FreeBSD__) || defined(__osf__)
    453 	uio->uio_rw = UIO_WRITE;
    454 #  endif
    455 
    456 	/* Try to get bytes */
    457 	while (uio->uio_resid > 0) {
    458 
    459 		if (uio->uio_resid >= sizeof(sh)) {
    460 
    461 			err = UIOMOVE(&sh, sizeof(sh), UIO_WRITE, uio);
    462 
    463 			if (err) {
    464 				if (softs->ipf_sync_debug > 2)
    465 					printf("uiomove(header) failed: %d\n",
    466 						err);
    467 				return err;
    468 			}
    469 
    470 			/* convert to host order */
    471 			sh.sm_magic = ntohl(sh.sm_magic);
    472 			sh.sm_len = ntohl(sh.sm_len);
    473 			sh.sm_num = ntohl(sh.sm_num);
    474 
    475 			if (softs->ipf_sync_debug > 8)
    476 				printf("[%d] Read v:%d p:%d cmd:%d table:%d rev:%d len:%d magic:%x\n",
    477 					sh.sm_num, sh.sm_v, sh.sm_p, sh.sm_cmd,
    478 					sh.sm_table, sh.sm_rev, sh.sm_len,
    479 					sh.sm_magic);
    480 
    481 			if (sh.sm_magic != SYNHDRMAGIC) {
    482 				if (softs->ipf_sync_debug > 2)
    483 					printf("uiomove(header) invalid %s\n",
    484 						"magic");
    485 				IPFERROR(110001);
    486 				return EINVAL;
    487 			}
    488 
    489 			if (sh.sm_v != 4 && sh.sm_v != 6) {
    490 				if (softs->ipf_sync_debug > 2)
    491 					printf("uiomove(header) invalid %s\n",
    492 						"protocol");
    493 				IPFERROR(110002);
    494 				return EINVAL;
    495 			}
    496 
    497 			if (sh.sm_cmd > SMC_MAXCMD) {
    498 				if (softs->ipf_sync_debug > 2)
    499 					printf("uiomove(header) invalid %s\n",
    500 						"command");
    501 				IPFERROR(110003);
    502 				return EINVAL;
    503 			}
    504 
    505 
    506 			if (sh.sm_table > SMC_MAXTBL) {
    507 				if (softs->ipf_sync_debug > 2)
    508 					printf("uiomove(header) invalid %s\n",
    509 						"table");
    510 				IPFERROR(110004);
    511 				return EINVAL;
    512 			}
    513 
    514 		} else {
    515 			/* unsufficient data, wait until next call */
    516 			if (softs->ipf_sync_debug > 2)
    517 				printf("uiomove(header) insufficient data");
    518 			IPFERROR(110005);
    519 			return EAGAIN;
    520 	 	}
    521 
    522 
    523 		/*
    524 		 * We have a header, so try to read the amount of data
    525 		 * needed for the request
    526 		 */
    527 
    528 		/* not supported */
    529 		if (sh.sm_len == 0) {
    530 			if (softs->ipf_sync_debug > 2)
    531 				printf("uiomove(data zero length %s\n",
    532 					"not supported");
    533 			IPFERROR(110006);
    534 			return EINVAL;
    535 		}
    536 
    537 		if (uio->uio_resid >= sh.sm_len) {
    538 
    539 			err = UIOMOVE(data, sh.sm_len, UIO_WRITE, uio);
    540 
    541 			if (err) {
    542 				if (softs->ipf_sync_debug > 2)
    543 					printf("uiomove(data) failed: %d\n",
    544 						err);
    545 				return err;
    546 			}
    547 
    548 			if (softs->ipf_sync_debug > 7)
    549 				printf("uiomove(data) %d bytes read\n",
    550 					sh.sm_len);
    551 
    552 			if (sh.sm_table == SMC_STATE)
    553 				err = ipf_sync_state(softc, &sh, data);
    554 			else if (sh.sm_table == SMC_NAT)
    555 				err = ipf_sync_nat(softc, &sh, data);
    556 			if (softs->ipf_sync_debug > 7)
    557 				printf("[%d] Finished with error %d\n",
    558 					sh.sm_num, err);
    559 
    560 		} else {
    561 			/* insufficient data, wait until next call */
    562 			if (softs->ipf_sync_debug > 2)
    563 				printf("uiomove(data) %s %d bytes, got %d\n",
    564 					"insufficient data, need",
    565 					sh.sm_len, (int)uio->uio_resid);
    566 			IPFERROR(110007);
    567 			return EAGAIN;
    568 		}
    569 	}
    570 
    571 	/* no more data */
    572 	return 0;
    573 }
    574 
    575 
    576 /* ------------------------------------------------------------------------ */
    577 /* Function:    ipf_sync_read                                               */
    578 /* Returns:     int    - 0 == success, else error value.                    */
    579 /* Parameters:  uio(O) - pointer to information about where to store data   */
    580 /*                                                                          */
    581 /* This function is called when a user program wants to read some data      */
    582 /* for pending state/NAT updates.  If no data is available, the caller is   */
    583 /* put to sleep, pending a wakeup from the "lower half" of this code.       */
    584 /* ------------------------------------------------------------------------ */
    585 int
    586 ipf_sync_read(softc, uio)
    587 	ipf_main_softc_t *softc;
    588 	struct uio *uio;
    589 {
    590 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
    591 	syncupdent_t *su;
    592 	synclogent_t *sl;
    593 	int err = 0;
    594 
    595 	if ((uio->uio_resid & 3) || (uio->uio_resid < 8)) {
    596 		IPFERROR(110008);
    597 		return EINVAL;
    598 	}
    599 
    600 #  if BSD_GE_YEAR(199306) || defined(__FreeBSD__) || defined(__osf__)
    601 	uio->uio_rw = UIO_READ;
    602 #  endif
    603 
    604 	MUTEX_ENTER(&softs->ipsl_mutex);
    605 	while ((softs->sl_tail == softs->sl_idx) &&
    606 	       (softs->su_tail == softs->su_idx)) {
    607 #  if defined(_KERNEL)
    608 #   if SOLARIS
    609 		if (!cv_wait_sig(&softs->ipslwait, &softs->ipsl_mutex.ipf_lk)) {
    610 			MUTEX_EXIT(&softs->ipsl_mutex);
    611 			IPFERROR(110009);
    612 			return EINTR;
    613 		}
    614 #   else
    615 #    ifdef __hpux
    616 		{
    617 		lock_t *l;
    618 
    619 		l = get_sleep_lock(&softs->sl_tail);
    620 		err = sleep(&softs->sl_tail, PZERO+1);
    621 		if (err) {
    622 			MUTEX_EXIT(&softs->ipsl_mutex);
    623 			IPFERROR(110010);
    624 			return EINTR;
    625 		}
    626 		spinunlock(l);
    627 		}
    628 #    else /* __hpux */
    629 #     ifdef __osf__
    630 		err = mpsleep(&softs->sl_tail, PSUSP|PCATCH,  "ipl sleep", 0,
    631 			      &softs->ipsl_mutex, MS_LOCK_SIMPLE);
    632 		if (err) {
    633 			IPFERROR(110011);
    634 			return EINTR;
    635 		}
    636 #     else
    637 		MUTEX_EXIT(&softs->ipsl_mutex);
    638 		err = SLEEP(&softs->sl_tail, "ipl sleep");
    639 		if (err) {
    640 			IPFERROR(110012);
    641 			return EINTR;
    642 		}
    643 		MUTEX_ENTER(&softs->ipsl_mutex);
    644 #     endif /* __osf__ */
    645 #    endif /* __hpux */
    646 #   endif /* SOLARIS */
    647 #  endif /* _KERNEL */
    648 	}
    649 
    650 	while ((softs->sl_tail < softs->sl_idx) &&
    651 	       (uio->uio_resid > sizeof(*sl))) {
    652 		sl = softs->synclog + softs->sl_tail++;
    653 		MUTEX_EXIT(&softs->ipsl_mutex);
    654 		err = UIOMOVE(sl, sizeof(*sl), UIO_READ, uio);
    655 		if (err != 0)
    656 			goto goterror;
    657 		MUTEX_ENTER(&softs->ipsl_mutex);
    658 	}
    659 
    660 	while ((softs->su_tail < softs->su_idx) &&
    661 	       (uio->uio_resid > sizeof(*su))) {
    662 		su = softs->syncupd + softs->su_tail;
    663 		softs->su_tail++;
    664 		MUTEX_EXIT(&softs->ipsl_mutex);
    665 		err = UIOMOVE(su, sizeof(*su), UIO_READ, uio);
    666 		if (err != 0)
    667 			goto goterror;
    668 		MUTEX_ENTER(&softs->ipsl_mutex);
    669 		if (su->sup_hdr.sm_sl != NULL)
    670 			su->sup_hdr.sm_sl->sl_idx = -1;
    671 	}
    672 	if (softs->sl_tail == softs->sl_idx)
    673 		softs->sl_tail = softs->sl_idx = 0;
    674 	if (softs->su_tail == softs->su_idx)
    675 		softs->su_tail = softs->su_idx = 0;
    676 	MUTEX_EXIT(&softs->ipsl_mutex);
    677 goterror:
    678 	return err;
    679 }
    680 
    681 
    682 /* ------------------------------------------------------------------------ */
    683 /* Function:    ipf_sync_state                                              */
    684 /* Returns:     int    - 0 == success, else error value.                    */
    685 /* Parameters:  sp(I)  - pointer to sync packet data header                 */
    686 /*              uio(I) - pointer to user data for further information       */
    687 /*                                                                          */
    688 /* Updates the state table according to information passed in the sync      */
    689 /* header.  As required, more data is fetched from the uio structure but    */
    690 /* varies depending on the contents of the sync header.  This function can  */
    691 /* create a new state entry or update one.  Deletion is left to the state   */
    692 /* structures being timed out correctly.                                    */
    693 /* ------------------------------------------------------------------------ */
    694 static int
    695 ipf_sync_state(softc, sp, data)
    696 	ipf_main_softc_t *softc;
    697 	synchdr_t *sp;
    698 	void *data;
    699 {
    700 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
    701 	synctcp_update_t su;
    702 	ipstate_t *is, sn;
    703 	synclist_t *sl;
    704 	frentry_t *fr;
    705 	u_int hv;
    706 	int err = 0;
    707 
    708 	hv = sp->sm_num & (softs->ipf_sync_state_tab_sz - 1);
    709 
    710 	switch (sp->sm_cmd)
    711 	{
    712 	case SMC_CREATE :
    713 
    714 		bcopy(data, &sn, sizeof(sn));
    715 		KMALLOC(is, ipstate_t *);
    716 		if (is == NULL) {
    717 			IPFERROR(110013);
    718 			err = ENOMEM;
    719 			break;
    720 		}
    721 
    722 		KMALLOC(sl, synclist_t *);
    723 		if (sl == NULL) {
    724 			IPFERROR(110014);
    725 			err = ENOMEM;
    726 			KFREE(is);
    727 			break;
    728 		}
    729 
    730 		bzero((char *)is, offsetof(ipstate_t, is_die));
    731 		bcopy((char *)&sn.is_die, (char *)&is->is_die,
    732 		      sizeof(*is) - offsetof(ipstate_t, is_die));
    733 		ipf_sync_storder(0, is);
    734 
    735 		/*
    736 		 * We need to find the same rule on the slave as was used on
    737 		 * the master to create this state entry.
    738 		 */
    739 		READ_ENTER(&softc->ipf_mutex);
    740 		fr = ipf_getrulen(softc, IPL_LOGIPF, sn.is_group, sn.is_rulen);
    741 		if (fr != NULL) {
    742 			MUTEX_ENTER(&fr->fr_lock);
    743 			fr->fr_ref++;
    744 			fr->fr_statecnt++;
    745 			MUTEX_EXIT(&fr->fr_lock);
    746 		}
    747 		RWLOCK_EXIT(&softc->ipf_mutex);
    748 
    749 		if (softs->ipf_sync_debug > 4)
    750 			printf("[%d] Filter rules = %p\n", sp->sm_num, fr);
    751 
    752 		is->is_rule = fr;
    753 		is->is_sync = sl;
    754 
    755 		sl->sl_idx = -1;
    756 		sl->sl_ips = is;
    757 		bcopy(sp, &sl->sl_hdr, sizeof(struct synchdr));
    758 
    759 		WRITE_ENTER(&softs->ipf_syncstate);
    760 		WRITE_ENTER(&softc->ipf_state);
    761 
    762 		sl->sl_pnext = softs->syncstatetab + hv;
    763 		sl->sl_next = softs->syncstatetab[hv];
    764 		if (softs->syncstatetab[hv] != NULL)
    765 			softs->syncstatetab[hv]->sl_pnext = &sl->sl_next;
    766 		softs->syncstatetab[hv] = sl;
    767 		MUTEX_DOWNGRADE(&softs->ipf_syncstate);
    768 		ipf_state_insert(softc, is, sp->sm_rev);
    769 		/*
    770 		 * Do not initialise the interface pointers for the state
    771 		 * entry as the full complement of interface names may not
    772 		 * be present.
    773 		 *
    774 		 * Put this state entry on its timeout queue.
    775 		 */
    776 		/*fr_setstatequeue(is, sp->sm_rev);*/
    777 		break;
    778 
    779 	case SMC_UPDATE :
    780 		bcopy(data, &su, sizeof(su));
    781 
    782 		if (softs->ipf_sync_debug > 4)
    783 			printf("[%d] Update age %lu state %d/%d \n",
    784 				sp->sm_num, su.stu_age, su.stu_state[0],
    785 				su.stu_state[1]);
    786 
    787 		READ_ENTER(&softs->ipf_syncstate);
    788 		for (sl = softs->syncstatetab[hv]; (sl != NULL);
    789 		     sl = sl->sl_next)
    790 			if (sl->sl_hdr.sm_num == sp->sm_num)
    791 				break;
    792 		if (sl == NULL) {
    793 			if (softs->ipf_sync_debug > 1)
    794 				printf("[%d] State not found - can't update\n",
    795 					sp->sm_num);
    796 			RWLOCK_EXIT(&softs->ipf_syncstate);
    797 			IPFERROR(110015);
    798 			err = ENOENT;
    799 			break;
    800 		}
    801 
    802 		READ_ENTER(&softc->ipf_state);
    803 
    804 		if (softs->ipf_sync_debug > 6)
    805 			printf("[%d] Data from state v:%d p:%d cmd:%d table:%d rev:%d\n",
    806 				sp->sm_num, sl->sl_hdr.sm_v, sl->sl_hdr.sm_p,
    807 				sl->sl_hdr.sm_cmd, sl->sl_hdr.sm_table,
    808 				sl->sl_hdr.sm_rev);
    809 
    810 		is = sl->sl_ips;
    811 
    812 		MUTEX_ENTER(&is->is_lock);
    813 		switch (sp->sm_p)
    814 		{
    815 		case IPPROTO_TCP :
    816 			/* XXX FV --- shouldn't we do ntohl/htonl???? XXX */
    817 			is->is_send = su.stu_data[0].td_end;
    818 			is->is_maxsend = su.stu_data[0].td_maxend;
    819 			is->is_maxswin = su.stu_data[0].td_maxwin;
    820 			is->is_state[0] = su.stu_state[0];
    821 			is->is_dend = su.stu_data[1].td_end;
    822 			is->is_maxdend = su.stu_data[1].td_maxend;
    823 			is->is_maxdwin = su.stu_data[1].td_maxwin;
    824 			is->is_state[1] = su.stu_state[1];
    825 			break;
    826 		default :
    827 			break;
    828 		}
    829 
    830 		if (softs->ipf_sync_debug > 6)
    831 			printf("[%d] Setting timers for state\n", sp->sm_num);
    832 
    833 		ipf_state_setqueue(softc, is, sp->sm_rev);
    834 
    835 		MUTEX_EXIT(&is->is_lock);
    836 		break;
    837 
    838 	default :
    839 		IPFERROR(110016);
    840 		err = EINVAL;
    841 		break;
    842 	}
    843 
    844 	if (err == 0) {
    845 		RWLOCK_EXIT(&softc->ipf_state);
    846 		RWLOCK_EXIT(&softs->ipf_syncstate);
    847 	}
    848 
    849 	if (softs->ipf_sync_debug > 6)
    850 		printf("[%d] Update completed with error %d\n",
    851 			sp->sm_num, err);
    852 
    853 	return err;
    854 }
    855 
    856 
    857 /* ------------------------------------------------------------------------ */
    858 /* Function:    ipf_sync_del                                                */
    859 /* Returns:     Nil                                                         */
    860 /* Parameters:  sl(I) - pointer to synclist object to delete                */
    861 /*                                                                          */
    862 /* Deletes an object from the synclist.                                     */
    863 /* ------------------------------------------------------------------------ */
    864 static void
    865 ipf_sync_del(softs, sl)
    866 	ipf_sync_softc_t *softs;
    867 	synclist_t *sl;
    868 {
    869 	*sl->sl_pnext = sl->sl_next;
    870 	if (sl->sl_next != NULL)
    871 		sl->sl_next->sl_pnext = sl->sl_pnext;
    872 	if (sl->sl_idx != -1)
    873 		softs->syncupd[sl->sl_idx].sup_hdr.sm_sl = NULL;
    874 }
    875 
    876 
    877 /* ------------------------------------------------------------------------ */
    878 /* Function:    ipf_sync_del_state                                          */
    879 /* Returns:     Nil                                                         */
    880 /* Parameters:  sl(I) - pointer to synclist object to delete                */
    881 /*                                                                          */
    882 /* Deletes an object from the synclist state table and free's its memory.   */
    883 /* ------------------------------------------------------------------------ */
    884 void
    885 ipf_sync_del_state(arg, sl)
    886 	void *arg;
    887 	synclist_t *sl;
    888 {
    889 	ipf_sync_softc_t *softs = arg;
    890 
    891 	WRITE_ENTER(&softs->ipf_syncstate);
    892 	ipf_sync_del(softs, sl);
    893 	RWLOCK_EXIT(&softs->ipf_syncstate);
    894 	KFREE(sl);
    895 }
    896 
    897 
    898 /* ------------------------------------------------------------------------ */
    899 /* Function:    ipf_sync_del_nat                                            */
    900 /* Returns:     Nil                                                         */
    901 /* Parameters:  sl(I) - pointer to synclist object to delete                */
    902 /*                                                                          */
    903 /* Deletes an object from the synclist nat table and free's its memory.     */
    904 /* ------------------------------------------------------------------------ */
    905 void
    906 ipf_sync_del_nat(arg, sl)
    907 	void *arg;
    908 	synclist_t *sl;
    909 {
    910 	ipf_sync_softc_t *softs = arg;
    911 
    912 	WRITE_ENTER(&softs->ipf_syncnat);
    913 	ipf_sync_del(softs, sl);
    914 	RWLOCK_EXIT(&softs->ipf_syncnat);
    915 	KFREE(sl);
    916 }
    917 
    918 
    919 /* ------------------------------------------------------------------------ */
    920 /* Function:    ipf_sync_nat                                                */
    921 /* Returns:     int    - 0 == success, else error value.                    */
    922 /* Parameters:  sp(I)  - pointer to sync packet data header                 */
    923 /*              uio(I) - pointer to user data for further information       */
    924 /*                                                                          */
    925 /* Updates the NAT  table according to information passed in the sync       */
    926 /* header.  As required, more data is fetched from the uio structure but    */
    927 /* varies depending on the contents of the sync header.  This function can  */
    928 /* create a new NAT entry or update one.  Deletion is left to the NAT       */
    929 /* structures being timed out correctly.                                    */
    930 /* ------------------------------------------------------------------------ */
    931 static int
    932 ipf_sync_nat(softc, sp, data)
    933 	ipf_main_softc_t *softc;
    934 	synchdr_t *sp;
    935 	void *data;
    936 {
    937 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
    938 	syncupdent_t su;
    939 	nat_t *n, *nat;
    940 	synclist_t *sl;
    941 	u_int hv = 0;
    942 	int err;
    943 
    944 	READ_ENTER(&softs->ipf_syncnat);
    945 
    946 	switch (sp->sm_cmd)
    947 	{
    948 	case SMC_CREATE :
    949 		KMALLOC(n, nat_t *);
    950 		if (n == NULL) {
    951 			IPFERROR(110017);
    952 			err = ENOMEM;
    953 			break;
    954 		}
    955 
    956 		KMALLOC(sl, synclist_t *);
    957 		if (sl == NULL) {
    958 			IPFERROR(110018);
    959 			err = ENOMEM;
    960 			KFREE(n);
    961 			break;
    962 		}
    963 
    964 		nat = (nat_t *)data;
    965 		bzero((char *)n, offsetof(nat_t, nat_age));
    966 		bcopy((char *)&nat->nat_age, (char *)&n->nat_age,
    967 		      sizeof(*n) - offsetof(nat_t, nat_age));
    968 		ipf_sync_natorder(0, n);
    969 		n->nat_sync = sl;
    970 		n->nat_rev = sl->sl_rev;
    971 
    972 		sl->sl_idx = -1;
    973 		sl->sl_ipn = n;
    974 		sl->sl_num = ntohl(sp->sm_num);
    975 
    976 		WRITE_ENTER(&softc->ipf_nat);
    977 		sl->sl_pnext = softs->syncnattab + hv;
    978 		sl->sl_next = softs->syncnattab[hv];
    979 		if (softs->syncnattab[hv] != NULL)
    980 			softs->syncnattab[hv]->sl_pnext = &sl->sl_next;
    981 		softs->syncnattab[hv] = sl;
    982 		(void) ipf_nat_insert(softc, softc->ipf_nat_soft, n);
    983 		RWLOCK_EXIT(&softc->ipf_nat);
    984 		break;
    985 
    986 	case SMC_UPDATE :
    987 		bcopy(data, &su, sizeof(su));
    988 
    989 		for (sl = softs->syncnattab[hv]; (sl != NULL);
    990 		     sl = sl->sl_next)
    991 			if (sl->sl_hdr.sm_num == sp->sm_num)
    992 				break;
    993 		if (sl == NULL) {
    994 			IPFERROR(110019);
    995 			err = ENOENT;
    996 			break;
    997 		}
    998 
    999 		READ_ENTER(&softc->ipf_nat);
   1000 
   1001 		nat = sl->sl_ipn;
   1002 		nat->nat_rev = sl->sl_rev;
   1003 
   1004 		MUTEX_ENTER(&nat->nat_lock);
   1005 		ipf_nat_setqueue(softc, softc->ipf_nat_soft, nat);
   1006 		MUTEX_EXIT(&nat->nat_lock);
   1007 
   1008 		RWLOCK_EXIT(&softc->ipf_nat);
   1009 
   1010 		break;
   1011 
   1012 	default :
   1013 		IPFERROR(110020);
   1014 		err = EINVAL;
   1015 		break;
   1016 	}
   1017 
   1018 	RWLOCK_EXIT(&softs->ipf_syncnat);
   1019 	return 0;
   1020 }
   1021 
   1022 
   1023 /* ------------------------------------------------------------------------ */
   1024 /* Function:    ipf_sync_new                                                */
   1025 /* Returns:     synclist_t* - NULL == failure, else pointer to new synclist */
   1026 /*                            data structure.                               */
   1027 /* Parameters:  tab(I) - type of synclist_t to create                       */
   1028 /*              fin(I) - pointer to packet information                      */
   1029 /*              ptr(I) - pointer to owning object                           */
   1030 /*                                                                          */
   1031 /* Creates a new sync table entry and notifies any sleepers that it's there */
   1032 /* waiting to be processed.                                                 */
   1033 /* ------------------------------------------------------------------------ */
   1034 synclist_t *
   1035 ipf_sync_new(softc, tab, fin, ptr)
   1036 	ipf_main_softc_t *softc;
   1037 	int tab;
   1038 	fr_info_t *fin;
   1039 	void *ptr;
   1040 {
   1041 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
   1042 	synclist_t *sl, *ss;
   1043 	synclogent_t *sle;
   1044 	u_int hv, sz;
   1045 
   1046 	if (softs->sl_idx == softs->ipf_sync_log_sz)
   1047 		return NULL;
   1048 	KMALLOC(sl, synclist_t *);
   1049 	if (sl == NULL)
   1050 		return NULL;
   1051 
   1052 	MUTEX_ENTER(&softs->ipf_syncadd);
   1053 	/*
   1054 	 * Get a unique number for this synclist_t.  The number is only meant
   1055 	 * to be unique for the lifetime of the structure and may be reused
   1056 	 * later.
   1057 	 */
   1058 	softs->ipf_sync_num++;
   1059 	if (softs->ipf_sync_num == 0) {
   1060 		softs->ipf_sync_num = 1;
   1061 		softs->ipf_sync_wrap++;
   1062 	}
   1063 
   1064 	/*
   1065 	 * Use the synch number of the object as the hash key.  Should end up
   1066 	 * with relatively even distribution over time.
   1067 	 * XXX - an attacker could lunch an DoS attack, of sorts, if they are
   1068 	 * the only one causing new table entries by only keeping open every
   1069 	 * nth connection they make, where n is a value in the interval
   1070 	 * [0, SYNC_STATETABSZ-1].
   1071 	 */
   1072 	switch (tab)
   1073 	{
   1074 	case SMC_STATE :
   1075 		hv = softs->ipf_sync_num & (softs->ipf_sync_state_tab_sz - 1);
   1076 		while (softs->ipf_sync_wrap != 0) {
   1077 			for (ss = softs->syncstatetab[hv]; ss; ss = ss->sl_next)
   1078 				if (ss->sl_hdr.sm_num == softs->ipf_sync_num)
   1079 					break;
   1080 			if (ss == NULL)
   1081 				break;
   1082 			softs->ipf_sync_num++;
   1083 			hv = softs->ipf_sync_num &
   1084 			     (softs->ipf_sync_state_tab_sz - 1);
   1085 		}
   1086 		sl->sl_pnext = softs->syncstatetab + hv;
   1087 		sl->sl_next = softs->syncstatetab[hv];
   1088 		softs->syncstatetab[hv] = sl;
   1089 		break;
   1090 
   1091 	case SMC_NAT :
   1092 		hv = softs->ipf_sync_num & (softs->ipf_sync_nat_tab_sz - 1);
   1093 		while (softs->ipf_sync_wrap != 0) {
   1094 			for (ss = softs->syncnattab[hv]; ss; ss = ss->sl_next)
   1095 				if (ss->sl_hdr.sm_num == softs->ipf_sync_num)
   1096 					break;
   1097 			if (ss == NULL)
   1098 				break;
   1099 			softs->ipf_sync_num++;
   1100 			hv = softs->ipf_sync_num &
   1101 			     (softs->ipf_sync_nat_tab_sz - 1);
   1102 		}
   1103 		sl->sl_pnext = softs->syncnattab + hv;
   1104 		sl->sl_next = softs->syncnattab[hv];
   1105 		softs->syncnattab[hv] = sl;
   1106 		break;
   1107 
   1108 	default :
   1109 		break;
   1110 	}
   1111 
   1112 	sl->sl_num = softs->ipf_sync_num;
   1113 	MUTEX_EXIT(&softs->ipf_syncadd);
   1114 
   1115 	sl->sl_magic = htonl(SYNHDRMAGIC);
   1116 	sl->sl_v = fin->fin_v;
   1117 	sl->sl_p = fin->fin_p;
   1118 	sl->sl_cmd = SMC_CREATE;
   1119 	sl->sl_idx = -1;
   1120 	sl->sl_table = tab;
   1121 	sl->sl_rev = fin->fin_rev;
   1122 	if (tab == SMC_STATE) {
   1123 		sl->sl_ips = ptr;
   1124 		sz = sizeof(*sl->sl_ips);
   1125 	} else if (tab == SMC_NAT) {
   1126 		sl->sl_ipn = ptr;
   1127 		sz = sizeof(*sl->sl_ipn);
   1128 	} else {
   1129 		ptr = NULL;
   1130 		sz = 0;
   1131 	}
   1132 	sl->sl_len = sz;
   1133 
   1134 	/*
   1135 	 * Create the log entry to be read by a user daemon.  When it has been
   1136 	 * finished and put on the queue, send a signal to wakeup any waiters.
   1137 	 */
   1138 	MUTEX_ENTER(&softs->ipf_syncadd);
   1139 	sle = softs->synclog + softs->sl_idx++;
   1140 	bcopy((char *)&sl->sl_hdr, (char *)&sle->sle_hdr,
   1141 	      sizeof(sle->sle_hdr));
   1142 	sle->sle_hdr.sm_num = htonl(sle->sle_hdr.sm_num);
   1143 	sle->sle_hdr.sm_len = htonl(sle->sle_hdr.sm_len);
   1144 	if (ptr != NULL) {
   1145 		bcopy((char *)ptr, (char *)&sle->sle_un, sz);
   1146 		if (tab == SMC_STATE) {
   1147 			ipf_sync_storder(1, &sle->sle_un.sleu_ips);
   1148 		} else if (tab == SMC_NAT) {
   1149 			ipf_sync_natorder(1, &sle->sle_un.sleu_ipn);
   1150 		}
   1151 	}
   1152 	MUTEX_EXIT(&softs->ipf_syncadd);
   1153 
   1154 	ipf_sync_wakeup(softc);
   1155 	return sl;
   1156 }
   1157 
   1158 
   1159 /* ------------------------------------------------------------------------ */
   1160 /* Function:    ipf_sync_update                                             */
   1161 /* Returns:     Nil                                                         */
   1162 /* Parameters:  tab(I) - type of synclist_t to create                       */
   1163 /*              fin(I) - pointer to packet information                      */
   1164 /*              sl(I)  - pointer to synchronisation object                  */
   1165 /*                                                                          */
   1166 /* For outbound packets, only, create an sync update record for the user    */
   1167 /* process to read.                                                         */
   1168 /* ------------------------------------------------------------------------ */
   1169 void
   1170 ipf_sync_update(softc, tab, fin, sl)
   1171 	ipf_main_softc_t *softc;
   1172 	int tab;
   1173 	fr_info_t *fin;
   1174 	synclist_t *sl;
   1175 {
   1176 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
   1177 	synctcp_update_t *st;
   1178 	syncupdent_t *slu;
   1179 	ipstate_t *ips;
   1180 	nat_t *nat;
   1181 	ipfrwlock_t *lock;
   1182 
   1183 	if (fin->fin_out == 0 || sl == NULL)
   1184 		return;
   1185 
   1186 	if (tab == SMC_STATE) {
   1187 		lock = &softs->ipf_syncstate;
   1188 	} else {
   1189 		lock = &softs->ipf_syncnat;
   1190 	}
   1191 
   1192 	READ_ENTER(lock);
   1193 	if (sl->sl_idx == -1) {
   1194 		MUTEX_ENTER(&softs->ipf_syncadd);
   1195 		slu = softs->syncupd + softs->su_idx;
   1196 		sl->sl_idx = softs->su_idx++;
   1197 		MUTEX_EXIT(&softs->ipf_syncadd);
   1198 
   1199 		bcopy((char *)&sl->sl_hdr, (char *)&slu->sup_hdr,
   1200 		      sizeof(slu->sup_hdr));
   1201 		slu->sup_hdr.sm_magic = htonl(SYNHDRMAGIC);
   1202 		slu->sup_hdr.sm_sl = sl;
   1203 		slu->sup_hdr.sm_cmd = SMC_UPDATE;
   1204 		slu->sup_hdr.sm_table = tab;
   1205 		slu->sup_hdr.sm_num = htonl(sl->sl_num);
   1206 		slu->sup_hdr.sm_len = htonl(sizeof(struct synctcp_update));
   1207 		slu->sup_hdr.sm_rev = fin->fin_rev;
   1208 # if 0
   1209 		if (fin->fin_p == IPPROTO_TCP) {
   1210 			st->stu_len[0] = 0;
   1211 			st->stu_len[1] = 0;
   1212 		}
   1213 # endif
   1214 	} else
   1215 		slu = softs->syncupd + sl->sl_idx;
   1216 
   1217 	/*
   1218 	 * Only TCP has complex timeouts, others just use default timeouts.
   1219 	 * For TCP, we only need to track the connection state and window.
   1220 	 */
   1221 	if (fin->fin_p == IPPROTO_TCP) {
   1222 		st = &slu->sup_tcp;
   1223 		if (tab == SMC_STATE) {
   1224 			ips = sl->sl_ips;
   1225 			st->stu_age = htonl(ips->is_die);
   1226 			st->stu_data[0].td_end = ips->is_send;
   1227 			st->stu_data[0].td_maxend = ips->is_maxsend;
   1228 			st->stu_data[0].td_maxwin = ips->is_maxswin;
   1229 			st->stu_state[0] = ips->is_state[0];
   1230 			st->stu_data[1].td_end = ips->is_dend;
   1231 			st->stu_data[1].td_maxend = ips->is_maxdend;
   1232 			st->stu_data[1].td_maxwin = ips->is_maxdwin;
   1233 			st->stu_state[1] = ips->is_state[1];
   1234 		} else if (tab == SMC_NAT) {
   1235 			nat = sl->sl_ipn;
   1236 			st->stu_age = htonl(nat->nat_age);
   1237 		}
   1238 	}
   1239 	RWLOCK_EXIT(lock);
   1240 
   1241 	ipf_sync_wakeup(softc);
   1242 }
   1243 
   1244 
   1245 /* ------------------------------------------------------------------------ */
   1246 /* Function:    ipf_sync_flush_table                                        */
   1247 /* Returns:     int - number of entries freed by flushing table             */
   1248 /* Parameters:  tabsize(I) - size of the array pointed to by table          */
   1249 /*              table(I)   - pointer to sync table to empty                 */
   1250 /*                                                                          */
   1251 /* Walk through a table of sync entries and free each one.  It is assumed   */
   1252 /* that some lock is held so that nobody else tries to access the table     */
   1253 /* during this cleanup.                                                     */
   1254 /* ------------------------------------------------------------------------ */
   1255 static int
   1256 ipf_sync_flush_table(softs, tabsize, table)
   1257 	ipf_sync_softc_t *softs;
   1258 	int tabsize;
   1259 	synclist_t **table;
   1260 {
   1261 	synclist_t *sl;
   1262 	int i, items;
   1263 
   1264 	items = 0;
   1265 
   1266 	for (i = 0; i < tabsize; i++) {
   1267 		while ((sl = table[i]) != NULL) {
   1268 			switch (sl->sl_table) {
   1269 			case SMC_STATE :
   1270 				if (sl->sl_ips != NULL)
   1271 					sl->sl_ips->is_sync = NULL;
   1272 				break;
   1273 			case SMC_NAT :
   1274 				if (sl->sl_ipn != NULL)
   1275 					sl->sl_ipn->nat_sync = NULL;
   1276 				break;
   1277 			}
   1278 			if (sl->sl_next != NULL)
   1279 				sl->sl_next->sl_pnext = sl->sl_pnext;
   1280 			table[i] = sl->sl_next;
   1281 			if (sl->sl_idx != -1)
   1282 				softs->syncupd[sl->sl_idx].sup_hdr.sm_sl = NULL;
   1283 			KFREE(sl);
   1284 			items++;
   1285 		}
   1286 	}
   1287 
   1288 	return items;
   1289 }
   1290 
   1291 
   1292 /* ------------------------------------------------------------------------ */
   1293 /* Function:    ipf_sync_ioctl                                              */
   1294 /* Returns:     int - 0 == success, != 0 == failure                         */
   1295 /* Parameters:  data(I) - pointer to ioctl data                             */
   1296 /*              cmd(I)  - ioctl command integer                             */
   1297 /*              mode(I) - file mode bits used with open                     */
   1298 /*                                                                          */
   1299 /* This function currently does not handle any ioctls and so just returns   */
   1300 /* EINVAL on all occasions.                                                 */
   1301 /* ------------------------------------------------------------------------ */
   1302 int
   1303 ipf_sync_ioctl(softc, data, cmd, mode, uid, ctx)
   1304 	ipf_main_softc_t *softc;
   1305 	caddr_t data;
   1306 	ioctlcmd_t cmd;
   1307 	int mode, uid;
   1308 	void *ctx;
   1309 {
   1310 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
   1311 	int error, i;
   1312 	SPL_INT(s);
   1313 
   1314 	switch (cmd)
   1315 	{
   1316         case SIOCIPFFL:
   1317 		error = BCOPYIN(data, &i, sizeof(i));
   1318 		if (error != 0) {
   1319 			IPFERROR(110023);
   1320 			error = EFAULT;
   1321 			break;
   1322 		}
   1323 
   1324 		switch (i)
   1325 		{
   1326 		case SMC_RLOG :
   1327 			SPL_NET(s);
   1328 			MUTEX_ENTER(&softs->ipsl_mutex);
   1329 			i = (softs->sl_tail - softs->sl_idx) +
   1330 			    (softs->su_tail - softs->su_idx);
   1331 			softs->sl_idx = 0;
   1332 			softs->su_idx = 0;
   1333 			softs->sl_tail = 0;
   1334 			softs->su_tail = 0;
   1335 			MUTEX_EXIT(&softs->ipsl_mutex);
   1336 			SPL_X(s);
   1337 			break;
   1338 
   1339 		case SMC_NAT :
   1340 			SPL_NET(s);
   1341 			WRITE_ENTER(&softs->ipf_syncnat);
   1342 			i = ipf_sync_flush_table(softs, SYNC_NATTABSZ,
   1343 						 softs->syncnattab);
   1344 			RWLOCK_EXIT(&softs->ipf_syncnat);
   1345 			SPL_X(s);
   1346 			break;
   1347 
   1348 		case SMC_STATE :
   1349 			SPL_NET(s);
   1350 			WRITE_ENTER(&softs->ipf_syncstate);
   1351 			i = ipf_sync_flush_table(softs, SYNC_STATETABSZ,
   1352 						 softs->syncstatetab);
   1353 			RWLOCK_EXIT(&softs->ipf_syncstate);
   1354 			SPL_X(s);
   1355 			break;
   1356 		}
   1357 
   1358 		error = BCOPYOUT(&i, data, sizeof(i));
   1359 		if (error != 0) {
   1360 			IPFERROR(110022);
   1361 			error = EFAULT;
   1362 		}
   1363 		break;
   1364 
   1365 	default :
   1366 		IPFERROR(110021);
   1367 		error = EINVAL;
   1368 		break;
   1369 	}
   1370 
   1371 	return error;
   1372 }
   1373 
   1374 
   1375 /* ------------------------------------------------------------------------ */
   1376 /* Function:    ipf_sync_canread                                            */
   1377 /* Returns:     int - 0 == success, != 0 == failure                         */
   1378 /* Parameters:  Nil                                                         */
   1379 /*                                                                          */
   1380 /* This function provides input to the poll handler about whether or not    */
   1381 /* there is data waiting to be read from the /dev/ipsync device.            */
   1382 /* ------------------------------------------------------------------------ */
   1383 int
   1384 ipf_sync_canread(arg)
   1385 	void *arg;
   1386 {
   1387 	ipf_sync_softc_t *softs = arg;
   1388 	return !((softs->sl_tail == softs->sl_idx) &&
   1389 		 (softs->su_tail == softs->su_idx));
   1390 }
   1391 
   1392 
   1393 /* ------------------------------------------------------------------------ */
   1394 /* Function:    ipf_sync_canwrite                                           */
   1395 /* Returns:     int - 1 == can always write                                 */
   1396 /* Parameters:  Nil                                                         */
   1397 /*                                                                          */
   1398 /* This function lets the poll handler know that it is always ready willing */
   1399 /* to accept write events.                                                  */
   1400 /* XXX Maybe this should return false if the sync table is full?            */
   1401 /* ------------------------------------------------------------------------ */
   1402 int
   1403 ipf_sync_canwrite(arg)
   1404 	void *arg;
   1405 {
   1406 	return 1;
   1407 }
   1408 
   1409 
   1410 /* ------------------------------------------------------------------------ */
   1411 /* Function:    ipf_sync_wakeup                                             */
   1412 /* Parameters:  Nil                                                         */
   1413 /* Returns:     Nil                                                         */
   1414 /*                                                                          */
   1415 /* This function implements the heuristics that decide how often to         */
   1416 /* generate a poll wakeup for programs that are waiting for information     */
   1417 /* about when they can do a read on /dev/ipsync.                            */
   1418 /*                                                                          */
   1419 /* There are three different considerations here:                           */
   1420 /* - do not keep a program waiting too long: ipf_sync_wake_interval is the  */
   1421 /*   maximum number of ipf ticks to let pass by;                            */
   1422 /* - do not let the queue of ouststanding things to generate notifies for   */
   1423 /*   get too full (ipf_sync_queue_high_wm is the high water mark);          */
   1424 /* - do not let too many events get collapsed in before deciding that the   */
   1425 /*   other host(s) need an update (ipf_sync_event_high_wm is the high water */
   1426 /*   mark for this counter.)                                                */
   1427 /* ------------------------------------------------------------------------ */
   1428 static void
   1429 ipf_sync_wakeup(softc)
   1430 	ipf_main_softc_t *softc;
   1431 {
   1432 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
   1433 
   1434 	softs->ipf_sync_events++;
   1435 	if ((softc->ipf_ticks >
   1436 	    softs->ipf_sync_lastwakeup + softs->ipf_sync_wake_interval) ||
   1437 	    (softs->ipf_sync_events > softs->ipf_sync_event_high_wm) ||
   1438 	    ((softs->sl_tail - softs->sl_idx) >
   1439 	     softs->ipf_sync_queue_high_wm) ||
   1440 	    ((softs->su_tail - softs->su_idx) >
   1441 	     softs->ipf_sync_queue_high_wm)) {
   1442 
   1443 		ipf_sync_poll_wakeup(softc);
   1444 	}
   1445 }
   1446 
   1447 
   1448 /* ------------------------------------------------------------------------ */
   1449 /* Function:    ipf_sync_poll_wakeup                                        */
   1450 /* Parameters:  Nil                                                         */
   1451 /* Returns:     Nil                                                         */
   1452 /*                                                                          */
   1453 /* Deliver a poll wakeup and reset counters for two of the three heuristics */
   1454 /* ------------------------------------------------------------------------ */
   1455 static void
   1456 ipf_sync_poll_wakeup(softc)
   1457 	ipf_main_softc_t *softc;
   1458 {
   1459 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
   1460 
   1461 	softs->ipf_sync_events = 0;
   1462 	softs->ipf_sync_lastwakeup = softc->ipf_ticks;
   1463 
   1464 # ifdef _KERNEL
   1465 #  if SOLARIS
   1466 	MUTEX_ENTER(&softs->ipsl_mutex);
   1467 	cv_signal(&softs->ipslwait);
   1468 	MUTEX_EXIT(&softs->ipsl_mutex);
   1469 	pollwakeup(&softc->ipf_poll_head[IPL_LOGSYNC], POLLIN|POLLRDNORM);
   1470 #  else
   1471 	WAKEUP(&softs->sl_tail, 0);
   1472 	POLLWAKEUP(IPL_LOGSYNC);
   1473 #  endif
   1474 # endif
   1475 }
   1476 
   1477 
   1478 /* ------------------------------------------------------------------------ */
   1479 /* Function:    ipf_sync_expire                                             */
   1480 /* Parameters:  Nil                                                         */
   1481 /* Returns:     Nil                                                         */
   1482 /*                                                                          */
   1483 /* This is the function called even ipf_tick.  It implements one of the     */
   1484 /* three heuristics above *IF* there are events waiting.                    */
   1485 /* ------------------------------------------------------------------------ */
   1486 void
   1487 ipf_sync_expire(softc)
   1488 	ipf_main_softc_t *softc;
   1489 {
   1490 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
   1491 
   1492 	if ((softs->ipf_sync_events > 0) &&
   1493 	    (softc->ipf_ticks >
   1494 	     softs->ipf_sync_lastwakeup + softs->ipf_sync_wake_interval)) {
   1495 		ipf_sync_poll_wakeup(softc);
   1496 	}
   1497 }
   1498