Home | History | Annotate | Line # | Download | only in netinet
      1 /*	$NetBSD: ip_sync.c,v 1.7 2026/01/11 15:47:19 christos Exp $	*/
      2 
      3 /*
      4  * Copyright (C) 2012 by Darren Reed.
      5  *
      6  * See the IPFILTER.LICENCE file for details on licencing.
      7  */
      8 #if defined(KERNEL) || defined(_KERNEL)
      9 # undef KERNEL
     10 # undef _KERNEL
     11 # define        KERNEL	1
     12 # define        _KERNEL	1
     13 #endif
     14 #include <sys/errno.h>
     15 #include <sys/types.h>
     16 #include <sys/param.h>
     17 #include <sys/file.h>
     18 #if !defined(_KERNEL) && !defined(__KERNEL__)
     19 # include <stdio.h>
     20 # include <stdlib.h>
     21 # include <string.h>
     22 # define _KERNEL
     23 # define KERNEL
     24 # ifdef __OpenBSD__
     25 struct file;
     26 # endif
     27 # include <sys/uio.h>
     28 # undef _KERNEL
     29 # undef KERNEL
     30 #else
     31 # include <sys/systm.h>
     32 # if !defined(__SVR4) && !defined(__svr4__)
     33 #  include <sys/mbuf.h>
     34 # endif
     35 # include <sys/select.h>
     36 # if __FreeBSD_version >= 500000
     37 #  include <sys/selinfo.h>
     38 # endif
     39 #endif
     40 #if defined(__NetBSD__) && (__NetBSD_Version__ >= 104000000)
     41 # include <sys/proc.h>
     42 #endif
     43 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
     44 # include <sys/filio.h>
     45 # include <sys/fcntl.h>
     46 #else
     47 # include <sys/ioctl.h>
     48 #endif
     49 #include <sys/time.h>
     50 #if !defined(linux)
     51 # include <sys/protosw.h>
     52 #endif
     53 #include <sys/socket.h>
     54 #if defined(__SVR4) || defined(__svr4__)
     55 # include <sys/filio.h>
     56 # include <sys/byteorder.h>
     57 # ifdef _KERNEL
     58 #  include <sys/dditypes.h>
     59 # endif
     60 # include <sys/stream.h>
     61 # include <sys/kmem.h>
     62 #endif
     63 
     64 #include <net/if.h>
     65 #ifdef sun
     66 # include <net/af.h>
     67 #endif
     68 #include <netinet/in.h>
     69 #include <netinet/in_systm.h>
     70 #include <netinet/ip.h>
     71 #include <netinet/tcp.h>
     72 #if !defined(linux)
     73 # include <netinet/ip_var.h>
     74 #endif
     75 #if !defined(__hpux) && !defined(linux)
     76 # include <netinet/tcp_fsm.h>
     77 #endif
     78 #include <netinet/udp.h>
     79 #include <netinet/ip_icmp.h>
     80 #include "netinet/ip_compat.h"
     81 #include "netinet/ip_fil.h"
     82 #include "netinet/ip_nat.h"
     83 #include "netinet/ip_frag.h"
     84 #include "netinet/ip_state.h"
     85 #include "netinet/ip_proxy.h"
     86 #include "netinet/ip_sync.h"
     87 #ifdef  USE_INET6
     88 #include <netinet/icmp6.h>
     89 #endif
     90 #if (__FreeBSD_version >= 300000)
     91 # include <sys/malloc.h>
     92 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
     93 #  include <sys/libkern.h>
     94 #  include <sys/systm.h>
     95 # endif
     96 #endif
     97 /* END OF INCLUDES */
     98 
     99 #if !defined(lint)
    100 #if defined(__NetBSD__)
    101 #include <sys/cdefs.h>
    102 __KERNEL_RCSID(0, "$NetBSD: ip_sync.c,v 1.7 2026/01/11 15:47:19 christos Exp $");
    103 #else
    104 static const char rcsid[] = "@(#)Id: ip_sync.c,v 1.1.1.2 2012/07/22 13:45:38 darrenr Exp";
    105 #endif
    106 #endif
    107 
    108 #define	SYNC_STATETABSZ	256
    109 #define	SYNC_NATTABSZ	256
    110 
    111 typedef struct ipf_sync_softc_s {
    112 	ipfmutex_t	ipf_syncadd;
    113 	ipfmutex_t	ipsl_mutex;
    114 	ipfrwlock_t	ipf_syncstate;
    115 	ipfrwlock_t	ipf_syncnat;
    116 #if SOLARIS && defined(_KERNEL)
    117 	kcondvar_t	ipslwait;
    118 #endif
    119 #if defined(linux) && defined(_KERNEL)
    120 	wait_queue_head_t	sl_tail_linux;
    121 #endif
    122 	synclist_t	**syncstatetab;
    123 	synclist_t	**syncnattab;
    124 	synclogent_t	*synclog;
    125 	syncupdent_t	*syncupd;
    126 	u_int		ipf_sync_num;
    127 	u_int		ipf_sync_wrap;
    128 	u_int		sl_idx;		/* next available sync log entry */
    129 	u_int		su_idx;		/* next available sync update entry */
    130 	u_int		sl_tail;	/* next sync log entry to read */
    131 	u_int		su_tail;	/* next sync update entry to read */
    132 	int		ipf_sync_log_sz;
    133 	int		ipf_sync_nat_tab_sz;
    134 	int		ipf_sync_state_tab_sz;
    135 	int		ipf_sync_debug;
    136 	int		ipf_sync_events;
    137 	u_32_t		ipf_sync_lastwakeup;
    138 	int		ipf_sync_wake_interval;
    139 	int		ipf_sync_event_high_wm;
    140 	int		ipf_sync_queue_high_wm;
    141 	int		ipf_sync_inited;
    142 } ipf_sync_softc_t;
    143 
    144 static int ipf_sync_flush_table(ipf_sync_softc_t *, int, synclist_t **);
    145 static void ipf_sync_wakeup(ipf_main_softc_t *);
    146 static void ipf_sync_del(ipf_sync_softc_t *, synclist_t *);
    147 static void ipf_sync_poll_wakeup(ipf_main_softc_t *);
    148 static int ipf_sync_nat(ipf_main_softc_t *, synchdr_t *, void *);
    149 static int ipf_sync_state(ipf_main_softc_t *, synchdr_t *, void *);
    150 
    151 # if !defined(sparc) && !defined(__hppa)
    152 void ipf_sync_tcporder(int, struct tcpdata *);
    153 void ipf_sync_natorder(int, struct nat *);
    154 void ipf_sync_storder(int, struct ipstate *);
    155 # endif
    156 
    157 
    158 void *
    159 ipf_sync_soft_create(ipf_main_softc_t *softc)
    160 {
    161 	ipf_sync_softc_t *softs;
    162 
    163 	KMALLOC(softs, ipf_sync_softc_t *);
    164 	if (softs == NULL) {
    165 		IPFERROR(110024);
    166 		return NULL;
    167 	}
    168 
    169 	bzero((char *)softs, sizeof(*softs));
    170 
    171 	softs->ipf_sync_log_sz = SYNCLOG_SZ;
    172 	softs->ipf_sync_nat_tab_sz = SYNC_STATETABSZ;
    173 	softs->ipf_sync_state_tab_sz = SYNC_STATETABSZ;
    174 	softs->ipf_sync_event_high_wm = SYNCLOG_SZ * 100 / 90;	/* 90% */
    175 	softs->ipf_sync_queue_high_wm = SYNCLOG_SZ * 100 / 90;	/* 90% */
    176 
    177 	return softs;
    178 }
    179 
    180 
    181 /* ------------------------------------------------------------------------ */
    182 /* Function:    ipf_sync_init                                               */
    183 /* Returns:     int - 0 == success, -1 == failure                           */
    184 /* Parameters:  Nil                                                         */
    185 /*                                                                          */
    186 /* Initialise all of the locks required for the sync code and initialise    */
    187 /* any data structures, as required.                                        */
    188 /* ------------------------------------------------------------------------ */
    189 int
    190 ipf_sync_soft_init(ipf_main_softc_t *softc, void *arg)
    191 {
    192 	ipf_sync_softc_t *softs = arg;
    193 
    194 	KMALLOCS(softs->synclog, synclogent_t *,
    195 		 softs->ipf_sync_log_sz * sizeof(*softs->synclog));
    196 	if (softs->synclog == NULL)
    197 		return -1;
    198 	bzero((char *)softs->synclog,
    199 	      softs->ipf_sync_log_sz * sizeof(*softs->synclog));
    200 
    201 	KMALLOCS(softs->syncupd, syncupdent_t *,
    202 		 softs->ipf_sync_log_sz * sizeof(*softs->syncupd));
    203 	if (softs->syncupd == NULL)
    204 		return -2;
    205 	bzero((char *)softs->syncupd,
    206 	      softs->ipf_sync_log_sz * sizeof(*softs->syncupd));
    207 
    208 	KMALLOCS(softs->syncstatetab, synclist_t **,
    209 		 softs->ipf_sync_state_tab_sz * sizeof(*softs->syncstatetab));
    210 	if (softs->syncstatetab == NULL)
    211 		return -3;
    212 	bzero((char *)softs->syncstatetab,
    213 	      softs->ipf_sync_state_tab_sz * sizeof(*softs->syncstatetab));
    214 
    215 	KMALLOCS(softs->syncnattab, synclist_t **,
    216 		 softs->ipf_sync_nat_tab_sz * sizeof(*softs->syncnattab));
    217 	if (softs->syncnattab == NULL)
    218 		return -3;
    219 	bzero((char *)softs->syncnattab,
    220 	      softs->ipf_sync_nat_tab_sz * sizeof(*softs->syncnattab));
    221 
    222 	softs->ipf_sync_num = 1;
    223 	softs->ipf_sync_wrap = 0;
    224 	softs->sl_idx = 0;
    225 	softs->su_idx = 0;
    226 	softs->sl_tail = 0;
    227 	softs->su_tail = 0;
    228 	softs->ipf_sync_events = 0;
    229 	softs->ipf_sync_lastwakeup = 0;
    230 
    231 
    232 # if SOLARIS && defined(_KERNEL)
    233 	cv_init(&softs->ipslwait, "ipsl condvar", CV_DRIVER, NULL);
    234 # endif
    235 	RWLOCK_INIT(&softs->ipf_syncstate, "add things to state sync table");
    236 	RWLOCK_INIT(&softs->ipf_syncnat, "add things to nat sync table");
    237 	MUTEX_INIT(&softs->ipf_syncadd, "add things to sync table");
    238 	MUTEX_INIT(&softs->ipsl_mutex, "read ring lock");
    239 
    240 	softs->ipf_sync_inited = 1;
    241 
    242 	return 0;
    243 }
    244 
    245 
    246 /* ------------------------------------------------------------------------ */
    247 /* Function:    ipf_sync_unload                                             */
    248 /* Returns:     int - 0 == success, -1 == failure                           */
    249 /* Parameters:  Nil                                                         */
    250 /*                                                                          */
    251 /* Destroy the locks created when initialising and free any memory in use   */
    252 /* with the synchronisation tables.                                         */
    253 /* ------------------------------------------------------------------------ */
    254 int
    255 ipf_sync_soft_fini(ipf_main_softc_t *softc, void *arg)
    256 {
    257 	ipf_sync_softc_t *softs = arg;
    258 
    259 	if (softs->syncnattab != NULL) {
    260 		ipf_sync_flush_table(softs, softs->ipf_sync_nat_tab_sz,
    261 				     softs->syncnattab);
    262 		KFREES(softs->syncnattab,
    263 		       softs->ipf_sync_nat_tab_sz * sizeof(*softs->syncnattab));
    264 		softs->syncnattab = NULL;
    265 	}
    266 
    267 	if (softs->syncstatetab != NULL) {
    268 		ipf_sync_flush_table(softs, softs->ipf_sync_state_tab_sz,
    269 				     softs->syncstatetab);
    270 		KFREES(softs->syncstatetab,
    271 		       softs->ipf_sync_state_tab_sz *
    272 		       sizeof(*softs->syncstatetab));
    273 		softs->syncstatetab = NULL;
    274 	}
    275 
    276 	if (softs->syncupd != NULL) {
    277 		KFREES(softs->syncupd,
    278 		       softs->ipf_sync_log_sz * sizeof(*softs->syncupd));
    279 		softs->syncupd = NULL;
    280 	}
    281 
    282 	if (softs->synclog != NULL) {
    283 		KFREES(softs->synclog,
    284 		       softs->ipf_sync_log_sz * sizeof(*softs->synclog));
    285 		softs->synclog = NULL;
    286 	}
    287 
    288 	if (softs->ipf_sync_inited == 1) {
    289 		MUTEX_DESTROY(&softs->ipsl_mutex);
    290 		MUTEX_DESTROY(&softs->ipf_syncadd);
    291 		RW_DESTROY(&softs->ipf_syncnat);
    292 		RW_DESTROY(&softs->ipf_syncstate);
    293 		softs->ipf_sync_inited = 0;
    294 	}
    295 
    296 	return 0;
    297 }
    298 
    299 void
    300 ipf_sync_soft_destroy(ipf_main_softc_t *softc, void *arg)
    301 {
    302 	ipf_sync_softc_t *softs = arg;
    303 
    304 	KFREE(softs);
    305 }
    306 
    307 
    308 # if !defined(sparc) && !defined(__hppa)
    309 /* ------------------------------------------------------------------------ */
    310 /* Function:    ipf_sync_tcporder                                           */
    311 /* Returns:     Nil                                                         */
    312 /* Parameters:  way(I) - direction of byte order conversion.                */
    313 /*              td(IO) - pointer to data to be converted.                   */
    314 /*                                                                          */
    315 /* Do byte swapping on values in the TCP state information structure that   */
    316 /* need to be used at both ends by the host in their native byte order.     */
    317 /* ------------------------------------------------------------------------ */
    318 void
    319 ipf_sync_tcporder(int way, tcpdata_t *td)
    320 {
    321 	if (way) {
    322 		td->td_maxwin = htons(td->td_maxwin);
    323 		td->td_end = htonl(td->td_end);
    324 		td->td_maxend = htonl(td->td_maxend);
    325 	} else {
    326 		td->td_maxwin = ntohs(td->td_maxwin);
    327 		td->td_end = ntohl(td->td_end);
    328 		td->td_maxend = ntohl(td->td_maxend);
    329 	}
    330 }
    331 
    332 
    333 /* ------------------------------------------------------------------------ */
    334 /* Function:    ipf_sync_natorder                                           */
    335 /* Returns:     Nil                                                         */
    336 /* Parameters:  way(I)  - direction of byte order conversion.               */
    337 /*              nat(IO) - pointer to data to be converted.                  */
    338 /*                                                                          */
    339 /* Do byte swapping on values in the NAT data structure that need to be     */
    340 /* used at both ends by the host in their native byte order.                */
    341 /* ------------------------------------------------------------------------ */
    342 void
    343 ipf_sync_natorder(int way, nat_t *n)
    344 {
    345 	if (way) {
    346 		n->nat_age = htonl(n->nat_age);
    347 		n->nat_flags = htonl(n->nat_flags);
    348 		n->nat_ipsumd = htonl(n->nat_ipsumd);
    349 		n->nat_use = htonl(n->nat_use);
    350 		n->nat_dir = htonl(n->nat_dir);
    351 	} else {
    352 		n->nat_age = ntohl(n->nat_age);
    353 		n->nat_flags = ntohl(n->nat_flags);
    354 		n->nat_ipsumd = ntohl(n->nat_ipsumd);
    355 		n->nat_use = ntohl(n->nat_use);
    356 		n->nat_dir = ntohl(n->nat_dir);
    357 	}
    358 }
    359 
    360 
    361 /* ------------------------------------------------------------------------ */
    362 /* Function:    ipf_sync_storder                                            */
    363 /* Returns:     Nil                                                         */
    364 /* Parameters:  way(I)  - direction of byte order conversion.               */
    365 /*              ips(IO) - pointer to data to be converted.                  */
    366 /*                                                                          */
    367 /* Do byte swapping on values in the IP state data structure that need to   */
    368 /* be used at both ends by the host in their native byte order.             */
    369 /* ------------------------------------------------------------------------ */
    370 void
    371 ipf_sync_storder(int way, ipstate_t *ips)
    372 {
    373 	ipf_sync_tcporder(way, &ips->is_tcp.ts_data[0]);
    374 	ipf_sync_tcporder(way, &ips->is_tcp.ts_data[1]);
    375 
    376 	if (way) {
    377 		ips->is_hv = htonl(ips->is_hv);
    378 		ips->is_die = htonl(ips->is_die);
    379 		ips->is_pass = htonl(ips->is_pass);
    380 		ips->is_flags = htonl(ips->is_flags);
    381 		ips->is_opt[0] = htonl(ips->is_opt[0]);
    382 		ips->is_opt[1] = htonl(ips->is_opt[1]);
    383 		ips->is_optmsk[0] = htonl(ips->is_optmsk[0]);
    384 		ips->is_optmsk[1] = htonl(ips->is_optmsk[1]);
    385 		ips->is_sec = htons(ips->is_sec);
    386 		ips->is_secmsk = htons(ips->is_secmsk);
    387 		ips->is_auth = htons(ips->is_auth);
    388 		ips->is_authmsk = htons(ips->is_authmsk);
    389 		ips->is_s0[0] = htonl(ips->is_s0[0]);
    390 		ips->is_s0[1] = htonl(ips->is_s0[1]);
    391 		ips->is_smsk[0] = htons(ips->is_smsk[0]);
    392 		ips->is_smsk[1] = htons(ips->is_smsk[1]);
    393 	} else {
    394 		ips->is_hv = ntohl(ips->is_hv);
    395 		ips->is_die = ntohl(ips->is_die);
    396 		ips->is_pass = ntohl(ips->is_pass);
    397 		ips->is_flags = ntohl(ips->is_flags);
    398 		ips->is_opt[0] = ntohl(ips->is_opt[0]);
    399 		ips->is_opt[1] = ntohl(ips->is_opt[1]);
    400 		ips->is_optmsk[0] = ntohl(ips->is_optmsk[0]);
    401 		ips->is_optmsk[1] = ntohl(ips->is_optmsk[1]);
    402 		ips->is_sec = ntohs(ips->is_sec);
    403 		ips->is_secmsk = ntohs(ips->is_secmsk);
    404 		ips->is_auth = ntohs(ips->is_auth);
    405 		ips->is_authmsk = ntohs(ips->is_authmsk);
    406 		ips->is_s0[0] = ntohl(ips->is_s0[0]);
    407 		ips->is_s0[1] = ntohl(ips->is_s0[1]);
    408 		ips->is_smsk[0] = ntohl(ips->is_smsk[0]);
    409 		ips->is_smsk[1] = ntohl(ips->is_smsk[1]);
    410 	}
    411 }
    412 # else /* !defined(sparc) && !defined(__hppa) */
    413 #  define	ipf_sync_tcporder(x,y)
    414 #  define	ipf_sync_natorder(x,y)
    415 #  define	ipf_sync_storder(x,y)
    416 # endif /* !defined(sparc) && !defined(__hppa) */
    417 
    418 
    419 /* ------------------------------------------------------------------------ */
    420 /* Function:    ipf_sync_write                                              */
    421 /* Returns:     int    - 0 == success, else error value.                    */
    422 /* Parameters:  uio(I) - pointer to information about data to write         */
    423 /*                                                                          */
    424 /* Moves data from user space into the kernel and uses it for updating data */
    425 /* structures in the state/NAT tables.                                      */
    426 /* ------------------------------------------------------------------------ */
    427 int
    428 ipf_sync_write(ipf_main_softc_t *softc, struct uio *uio)
    429 {
    430 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
    431 	synchdr_t sh;
    432 
    433 	/*
    434 	 * THIS MUST BE SUFFICIENT LARGE TO STORE
    435 	 * ANY POSSIBLE DATA TYPE
    436 	 */
    437 	char data[2048];
    438 
    439 	int err = 0;
    440 
    441 #  if BSD_GE_YEAR(199306) || defined(__FreeBSD__) || defined(__osf__)
    442 	uio->uio_rw = UIO_WRITE;
    443 #  endif
    444 
    445 	/* Try to get bytes */
    446 	while (uio->uio_resid > 0) {
    447 
    448 		if (uio->uio_resid >= sizeof(sh)) {
    449 
    450 			err = UIOMOVE((void *)&sh, sizeof(sh), UIO_WRITE, uio);
    451 
    452 			if (err) {
    453 				if (softs->ipf_sync_debug > 2)
    454 					printf("uiomove(header) failed: %d\n",
    455 						err);
    456 				return err;
    457 			}
    458 
    459 			/* convert to host order */
    460 			sh.sm_magic = ntohl(sh.sm_magic);
    461 			sh.sm_len = ntohl(sh.sm_len);
    462 			sh.sm_num = ntohl(sh.sm_num);
    463 
    464 			if (softs->ipf_sync_debug > 8)
    465 				printf("[%d] Read v:%d p:%d cmd:%d table:%d rev:%d len:%d magic:%x\n",
    466 					sh.sm_num, sh.sm_v, sh.sm_p, sh.sm_cmd,
    467 					sh.sm_table, sh.sm_rev, sh.sm_len,
    468 					sh.sm_magic);
    469 
    470 			if (sh.sm_magic != SYNHDRMAGIC) {
    471 				if (softs->ipf_sync_debug > 2)
    472 					printf("uiomove(header) invalid %s\n",
    473 						"magic");
    474 				IPFERROR(110001);
    475 				return EINVAL;
    476 			}
    477 
    478 			if (sh.sm_v != 4 && sh.sm_v != 6) {
    479 				if (softs->ipf_sync_debug > 2)
    480 					printf("uiomove(header) invalid %s\n",
    481 						"protocol");
    482 				IPFERROR(110002);
    483 				return EINVAL;
    484 			}
    485 
    486 			if (sh.sm_cmd > SMC_MAXCMD) {
    487 				if (softs->ipf_sync_debug > 2)
    488 					printf("uiomove(header) invalid %s\n",
    489 						"command");
    490 				IPFERROR(110003);
    491 				return EINVAL;
    492 			}
    493 
    494 
    495 			if (sh.sm_table > SMC_MAXTBL) {
    496 				if (softs->ipf_sync_debug > 2)
    497 					printf("uiomove(header) invalid %s\n",
    498 						"table");
    499 				IPFERROR(110004);
    500 				return EINVAL;
    501 			}
    502 
    503 		} else {
    504 			/* unsufficient data, wait until next call */
    505 			if (softs->ipf_sync_debug > 2)
    506 				printf("uiomove(header) insufficient data");
    507 			IPFERROR(110005);
    508 			return EAGAIN;
    509 	 	}
    510 
    511 
    512 		/*
    513 		 * We have a header, so try to read the amount of data
    514 		 * needed for the request
    515 		 */
    516 
    517 		/* not supported */
    518 		if (sh.sm_len == 0) {
    519 			if (softs->ipf_sync_debug > 2)
    520 				printf("uiomove(data zero length %s\n",
    521 					"not supported");
    522 			IPFERROR(110006);
    523 			return EINVAL;
    524 		}
    525 
    526 		if (uio->uio_resid >= sh.sm_len) {
    527 			if (sh.sm_len > sizeof(data)) {
    528 				printf("uiomove(data) size too big: %d\n",
    529 				       sh.sm_len);
    530 				IPFERROR(110008);
    531 				return ENOSPC;
    532 			}
    533 
    534 			err = UIOMOVE((void *)data, sh.sm_len, UIO_WRITE, uio);
    535 
    536 			if (err) {
    537 				if (softs->ipf_sync_debug > 2)
    538 					printf("uiomove(data) failed: %d\n",
    539 						err);
    540 				return err;
    541 			}
    542 
    543 			if (softs->ipf_sync_debug > 7)
    544 				printf("uiomove(data) %d bytes read\n",
    545 					sh.sm_len);
    546 
    547 			if (sh.sm_table == SMC_STATE)
    548 				err = ipf_sync_state(softc, &sh, data);
    549 			else if (sh.sm_table == SMC_NAT)
    550 				err = ipf_sync_nat(softc, &sh, data);
    551 			if (softs->ipf_sync_debug > 7)
    552 				printf("[%d] Finished with error %d\n",
    553 					sh.sm_num, err);
    554 
    555 		} else {
    556 			/* insufficient data, wait until next call */
    557 			if (softs->ipf_sync_debug > 2)
    558 				printf("uiomove(data) %s %d bytes, got %d\n",
    559 					"insufficient data, need",
    560 					sh.sm_len, (int)uio->uio_resid);
    561 			IPFERROR(110007);
    562 			return EAGAIN;
    563 		}
    564 	}
    565 
    566 	/* no more data */
    567 	return 0;
    568 }
    569 
    570 
    571 /* ------------------------------------------------------------------------ */
    572 /* Function:    ipf_sync_read                                               */
    573 /* Returns:     int    - 0 == success, else error value.                    */
    574 /* Parameters:  uio(O) - pointer to information about where to store data   */
    575 /*                                                                          */
    576 /* This function is called when a user program wants to read some data      */
    577 /* for pending state/NAT updates.  If no data is available, the caller is   */
    578 /* put to sleep, pending a wakeup from the "lower half" of this code.       */
    579 /* ------------------------------------------------------------------------ */
    580 int
    581 ipf_sync_read(ipf_main_softc_t *softc, struct uio *uio)
    582 {
    583 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
    584 	syncupdent_t *su;
    585 	synclogent_t *sl;
    586 	int err = 0;
    587 
    588 	if ((uio->uio_resid & 3) || (uio->uio_resid < 8)) {
    589 		IPFERROR(110008);
    590 		return EINVAL;
    591 	}
    592 
    593 #  if BSD_GE_YEAR(199306) || defined(__FreeBSD__) || defined(__osf__)
    594 	uio->uio_rw = UIO_READ;
    595 #  endif
    596 
    597 	MUTEX_ENTER(&softs->ipsl_mutex);
    598 	while ((softs->sl_tail == softs->sl_idx) &&
    599 	       (softs->su_tail == softs->su_idx)) {
    600 #  if defined(_KERNEL)
    601 #   if SOLARIS
    602 		if (!cv_wait_sig(&softs->ipslwait, &softs->ipsl_mutex.ipf_lk)) {
    603 			MUTEX_EXIT(&softs->ipsl_mutex);
    604 			IPFERROR(110009);
    605 			return EINTR;
    606 		}
    607 #   else
    608 #    ifdef __hpux
    609 		{
    610 		lock_t *l;
    611 
    612 		l = get_sleep_lock(&softs->sl_tail);
    613 		err = sleep(&softs->sl_tail, PZERO+1);
    614 		if (err) {
    615 			MUTEX_EXIT(&softs->ipsl_mutex);
    616 			IPFERROR(110010);
    617 			return EINTR;
    618 		}
    619 		spinunlock(l);
    620 		}
    621 #    else /* __hpux */
    622 #     ifdef __osf__
    623 		err = mpsleep(&softs->sl_tail, PSUSP|PCATCH,  "ipl sleep", 0,
    624 			      &softs->ipsl_mutex, MS_LOCK_SIMPLE);
    625 		if (err) {
    626 			IPFERROR(110011);
    627 			return EINTR;
    628 		}
    629 #     else
    630 		MUTEX_EXIT(&softs->ipsl_mutex);
    631 		err = SLEEP(&softs->sl_tail, "ipl sleep");
    632 		if (err) {
    633 			IPFERROR(110012);
    634 			return EINTR;
    635 		}
    636 		MUTEX_ENTER(&softs->ipsl_mutex);
    637 #     endif /* __osf__ */
    638 #    endif /* __hpux */
    639 #   endif /* SOLARIS */
    640 #  endif /* _KERNEL */
    641 	}
    642 
    643 	while ((softs->sl_tail < softs->sl_idx) &&
    644 	       (uio->uio_resid > sizeof(*sl))) {
    645 		sl = softs->synclog + softs->sl_tail++;
    646 		MUTEX_EXIT(&softs->ipsl_mutex);
    647 		err = UIOMOVE(sl, sizeof(*sl), UIO_READ, uio);
    648 		if (err != 0)
    649 			goto goterror;
    650 		MUTEX_ENTER(&softs->ipsl_mutex);
    651 	}
    652 
    653 	while ((softs->su_tail < softs->su_idx) &&
    654 	       (uio->uio_resid > sizeof(*su))) {
    655 		su = softs->syncupd + softs->su_tail;
    656 		softs->su_tail++;
    657 		MUTEX_EXIT(&softs->ipsl_mutex);
    658 		err = UIOMOVE(su, sizeof(*su), UIO_READ, uio);
    659 		if (err != 0)
    660 			goto goterror;
    661 		MUTEX_ENTER(&softs->ipsl_mutex);
    662 		if (su->sup_hdr.sm_sl != NULL)
    663 			su->sup_hdr.sm_sl->sl_idx = -1;
    664 	}
    665 	if (softs->sl_tail == softs->sl_idx)
    666 		softs->sl_tail = softs->sl_idx = 0;
    667 	if (softs->su_tail == softs->su_idx)
    668 		softs->su_tail = softs->su_idx = 0;
    669 	MUTEX_EXIT(&softs->ipsl_mutex);
    670 goterror:
    671 	return err;
    672 }
    673 
    674 
    675 /* ------------------------------------------------------------------------ */
    676 /* Function:    ipf_sync_state                                              */
    677 /* Returns:     int    - 0 == success, else error value.                    */
    678 /* Parameters:  sp(I)  - pointer to sync packet data header                 */
    679 /*              uio(I) - pointer to user data for further information       */
    680 /*                                                                          */
    681 /* Updates the state table according to information passed in the sync      */
    682 /* header.  As required, more data is fetched from the uio structure but    */
    683 /* varies depending on the contents of the sync header.  This function can  */
    684 /* create a new state entry or update one.  Deletion is left to the state   */
    685 /* structures being timed out correctly.                                    */
    686 /* ------------------------------------------------------------------------ */
    687 static int
    688 ipf_sync_state(ipf_main_softc_t *softc, synchdr_t *sp, void *data)
    689 {
    690 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
    691 	synctcp_update_t su;
    692 	ipstate_t *is, sn;
    693 	synclist_t *sl;
    694 	frentry_t *fr;
    695 	u_int hv;
    696 	int err = 0;
    697 
    698 	hv = sp->sm_num & (softs->ipf_sync_state_tab_sz - 1);
    699 
    700 	switch (sp->sm_cmd)
    701 	{
    702 	case SMC_CREATE :
    703 
    704 		bcopy(data, &sn, sizeof(sn));
    705 		KMALLOC(is, ipstate_t *);
    706 		if (is == NULL) {
    707 			IPFERROR(110013);
    708 			err = ENOMEM;
    709 			break;
    710 		}
    711 
    712 		KMALLOC(sl, synclist_t *);
    713 		if (sl == NULL) {
    714 			IPFERROR(110014);
    715 			err = ENOMEM;
    716 			KFREE(is);
    717 			break;
    718 		}
    719 
    720 		bzero((char *)is, offsetof(ipstate_t, is_die));
    721 		bcopy((char *)&sn.is_die, (char *)&is->is_die,
    722 		      sizeof(*is) - offsetof(ipstate_t, is_die));
    723 		ipf_sync_storder(0, is);
    724 
    725 		/*
    726 		 * We need to find the same rule on the slave as was used on
    727 		 * the master to create this state entry.
    728 		 */
    729 		READ_ENTER(&softc->ipf_mutex);
    730 		fr = ipf_getrulen(softc, IPL_LOGIPF, sn.is_group, sn.is_rulen);
    731 		if (fr != NULL) {
    732 			MUTEX_ENTER(&fr->fr_lock);
    733 			fr->fr_ref++;
    734 			fr->fr_statecnt++;
    735 			MUTEX_EXIT(&fr->fr_lock);
    736 		}
    737 		RWLOCK_EXIT(&softc->ipf_mutex);
    738 
    739 		if (softs->ipf_sync_debug > 4)
    740 			printf("[%d] Filter rules = %p\n", sp->sm_num, fr);
    741 
    742 		is->is_rule = fr;
    743 		is->is_sync = sl;
    744 
    745 		sl->sl_idx = -1;
    746 		sl->sl_ips = is;
    747 		bcopy(sp, &sl->sl_hdr, sizeof(struct synchdr));
    748 
    749 		WRITE_ENTER(&softs->ipf_syncstate);
    750 		WRITE_ENTER(&softc->ipf_state);
    751 
    752 		sl->sl_pnext = softs->syncstatetab + hv;
    753 		sl->sl_next = softs->syncstatetab[hv];
    754 		if (softs->syncstatetab[hv] != NULL)
    755 			softs->syncstatetab[hv]->sl_pnext = &sl->sl_next;
    756 		softs->syncstatetab[hv] = sl;
    757 		MUTEX_DOWNGRADE(&softs->ipf_syncstate);
    758 		ipf_state_insert(softc, is, sp->sm_rev);
    759 		/*
    760 		 * Do not initialise the interface pointers for the state
    761 		 * entry as the full complement of interface names may not
    762 		 * be present.
    763 		 *
    764 		 * Put this state entry on its timeout queue.
    765 		 */
    766 		/*fr_setstatequeue(is, sp->sm_rev);*/
    767 		break;
    768 
    769 	case SMC_UPDATE :
    770 		bcopy(data, &su, sizeof(su));
    771 
    772 		if (softs->ipf_sync_debug > 4)
    773 			printf("[%d] Update age %lu state %d/%d \n",
    774 				sp->sm_num, su.stu_age, su.stu_state[0],
    775 				su.stu_state[1]);
    776 
    777 		READ_ENTER(&softs->ipf_syncstate);
    778 		for (sl = softs->syncstatetab[hv]; (sl != NULL);
    779 		     sl = sl->sl_next)
    780 			if (sl->sl_hdr.sm_num == sp->sm_num)
    781 				break;
    782 		if (sl == NULL) {
    783 			if (softs->ipf_sync_debug > 1)
    784 				printf("[%d] State not found - can't update\n",
    785 					sp->sm_num);
    786 			RWLOCK_EXIT(&softs->ipf_syncstate);
    787 			IPFERROR(110015);
    788 			err = ENOENT;
    789 			break;
    790 		}
    791 
    792 		READ_ENTER(&softc->ipf_state);
    793 
    794 		if (softs->ipf_sync_debug > 6)
    795 			printf("[%d] Data from state v:%d p:%d cmd:%d table:%d rev:%d\n",
    796 				sp->sm_num, sl->sl_hdr.sm_v, sl->sl_hdr.sm_p,
    797 				sl->sl_hdr.sm_cmd, sl->sl_hdr.sm_table,
    798 				sl->sl_hdr.sm_rev);
    799 
    800 		is = sl->sl_ips;
    801 
    802 		MUTEX_ENTER(&is->is_lock);
    803 		switch (sp->sm_p)
    804 		{
    805 		case IPPROTO_TCP :
    806 			/* XXX FV --- shouldn't we do ntohl/htonl???? XXX */
    807 			is->is_send = su.stu_data[0].td_end;
    808 			is->is_maxsend = su.stu_data[0].td_maxend;
    809 			is->is_maxswin = su.stu_data[0].td_maxwin;
    810 			is->is_state[0] = su.stu_state[0];
    811 			is->is_dend = su.stu_data[1].td_end;
    812 			is->is_maxdend = su.stu_data[1].td_maxend;
    813 			is->is_maxdwin = su.stu_data[1].td_maxwin;
    814 			is->is_state[1] = su.stu_state[1];
    815 			break;
    816 		default :
    817 			break;
    818 		}
    819 
    820 		if (softs->ipf_sync_debug > 6)
    821 			printf("[%d] Setting timers for state\n", sp->sm_num);
    822 
    823 		ipf_state_setqueue(softc, is, sp->sm_rev);
    824 
    825 		MUTEX_EXIT(&is->is_lock);
    826 		break;
    827 
    828 	default :
    829 		IPFERROR(110016);
    830 		err = EINVAL;
    831 		break;
    832 	}
    833 
    834 	if (err == 0) {
    835 		RWLOCK_EXIT(&softc->ipf_state);
    836 		RWLOCK_EXIT(&softs->ipf_syncstate);
    837 	}
    838 
    839 	if (softs->ipf_sync_debug > 6)
    840 		printf("[%d] Update completed with error %d\n",
    841 			sp->sm_num, err);
    842 
    843 	return err;
    844 }
    845 
    846 
    847 /* ------------------------------------------------------------------------ */
    848 /* Function:    ipf_sync_del                                                */
    849 /* Returns:     Nil                                                         */
    850 /* Parameters:  sl(I) - pointer to synclist object to delete                */
    851 /*                                                                          */
    852 /* Deletes an object from the synclist.                                     */
    853 /* ------------------------------------------------------------------------ */
    854 static void
    855 ipf_sync_del(ipf_sync_softc_t *softs, synclist_t *sl)
    856 {
    857 	*sl->sl_pnext = sl->sl_next;
    858 	if (sl->sl_next != NULL)
    859 		sl->sl_next->sl_pnext = sl->sl_pnext;
    860 	if (sl->sl_idx != -1)
    861 		softs->syncupd[sl->sl_idx].sup_hdr.sm_sl = NULL;
    862 }
    863 
    864 
    865 /* ------------------------------------------------------------------------ */
    866 /* Function:    ipf_sync_del_state                                          */
    867 /* Returns:     Nil                                                         */
    868 /* Parameters:  sl(I) - pointer to synclist object to delete                */
    869 /*                                                                          */
    870 /* Deletes an object from the synclist state table and free's its memory.   */
    871 /* ------------------------------------------------------------------------ */
    872 void
    873 ipf_sync_del_state(void *arg, synclist_t *sl)
    874 {
    875 	ipf_sync_softc_t *softs = arg;
    876 
    877 	WRITE_ENTER(&softs->ipf_syncstate);
    878 	ipf_sync_del(softs, sl);
    879 	RWLOCK_EXIT(&softs->ipf_syncstate);
    880 	KFREE(sl);
    881 }
    882 
    883 
    884 /* ------------------------------------------------------------------------ */
    885 /* Function:    ipf_sync_del_nat                                            */
    886 /* Returns:     Nil                                                         */
    887 /* Parameters:  sl(I) - pointer to synclist object to delete                */
    888 /*                                                                          */
    889 /* Deletes an object from the synclist nat table and free's its memory.     */
    890 /* ------------------------------------------------------------------------ */
    891 void
    892 ipf_sync_del_nat(void *arg, synclist_t *sl)
    893 {
    894 	ipf_sync_softc_t *softs = arg;
    895 
    896 	WRITE_ENTER(&softs->ipf_syncnat);
    897 	ipf_sync_del(softs, sl);
    898 	RWLOCK_EXIT(&softs->ipf_syncnat);
    899 	KFREE(sl);
    900 }
    901 
    902 
    903 /* ------------------------------------------------------------------------ */
    904 /* Function:    ipf_sync_nat                                                */
    905 /* Returns:     int    - 0 == success, else error value.                    */
    906 /* Parameters:  sp(I)  - pointer to sync packet data header                 */
    907 /*              uio(I) - pointer to user data for further information       */
    908 /*                                                                          */
    909 /* Updates the NAT  table according to information passed in the sync       */
    910 /* header.  As required, more data is fetched from the uio structure but    */
    911 /* varies depending on the contents of the sync header.  This function can  */
    912 /* create a new NAT entry or update one.  Deletion is left to the NAT       */
    913 /* structures being timed out correctly.                                    */
    914 /* ------------------------------------------------------------------------ */
    915 static int
    916 ipf_sync_nat(ipf_main_softc_t *softc, synchdr_t *sp, void *data)
    917 {
    918 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
    919 	syncupdent_t su;
    920 	nat_t *n, *nat;
    921 	synclist_t *sl;
    922 	u_int hv = 0;
    923 	int err = 0;
    924 
    925 	READ_ENTER(&softs->ipf_syncnat);
    926 
    927 	switch (sp->sm_cmd)
    928 	{
    929 	case SMC_CREATE :
    930 		KMALLOC(n, nat_t *);
    931 		if (n == NULL) {
    932 			IPFERROR(110017);
    933 			err = ENOMEM;
    934 			break;
    935 		}
    936 
    937 		KMALLOC(sl, synclist_t *);
    938 		if (sl == NULL) {
    939 			IPFERROR(110018);
    940 			err = ENOMEM;
    941 			KFREE(n);
    942 			break;
    943 		}
    944 
    945 		nat = (nat_t *)data;
    946 		bzero((char *)n, offsetof(nat_t, nat_age));
    947 		bcopy((char *)&nat->nat_age, (char *)&n->nat_age,
    948 		      sizeof(*n) - offsetof(nat_t, nat_age));
    949 		ipf_sync_natorder(0, n);
    950 		n->nat_sync = sl;
    951 		n->nat_rev = sl->sl_rev;
    952 
    953 		sl->sl_idx = -1;
    954 		sl->sl_ipn = n;
    955 		sl->sl_num = ntohl(sp->sm_num);
    956 
    957 		WRITE_ENTER(&softc->ipf_nat);
    958 		sl->sl_pnext = softs->syncnattab + hv;
    959 		sl->sl_next = softs->syncnattab[hv];
    960 		if (softs->syncnattab[hv] != NULL)
    961 			softs->syncnattab[hv]->sl_pnext = &sl->sl_next;
    962 		softs->syncnattab[hv] = sl;
    963 		(void) ipf_nat_insert(softc, softc->ipf_nat_soft, n);
    964 		RWLOCK_EXIT(&softc->ipf_nat);
    965 		break;
    966 
    967 	case SMC_UPDATE :
    968 		bcopy(data, &su, sizeof(su));
    969 
    970 		for (sl = softs->syncnattab[hv]; (sl != NULL);
    971 		     sl = sl->sl_next)
    972 			if (sl->sl_hdr.sm_num == sp->sm_num)
    973 				break;
    974 		if (sl == NULL) {
    975 			IPFERROR(110019);
    976 			err = ENOENT;
    977 			break;
    978 		}
    979 
    980 		READ_ENTER(&softc->ipf_nat);
    981 
    982 		nat = sl->sl_ipn;
    983 		nat->nat_rev = sl->sl_rev;
    984 
    985 		MUTEX_ENTER(&nat->nat_lock);
    986 		ipf_nat_setqueue(softc, softc->ipf_nat_soft, nat);
    987 		MUTEX_EXIT(&nat->nat_lock);
    988 
    989 		RWLOCK_EXIT(&softc->ipf_nat);
    990 
    991 		break;
    992 
    993 	default :
    994 		IPFERROR(110020);
    995 		err = EINVAL;
    996 		break;
    997 	}
    998 
    999 	RWLOCK_EXIT(&softs->ipf_syncnat);
   1000 	return err;
   1001 }
   1002 
   1003 
   1004 /* ------------------------------------------------------------------------ */
   1005 /* Function:    ipf_sync_new                                                */
   1006 /* Returns:     synclist_t* - NULL == failure, else pointer to new synclist */
   1007 /*                            data structure.                               */
   1008 /* Parameters:  tab(I) - type of synclist_t to create                       */
   1009 /*              fin(I) - pointer to packet information                      */
   1010 /*              ptr(I) - pointer to owning object                           */
   1011 /*                                                                          */
   1012 /* Creates a new sync table entry and notifies any sleepers that it's there */
   1013 /* waiting to be processed.                                                 */
   1014 /* ------------------------------------------------------------------------ */
   1015 synclist_t *
   1016 ipf_sync_new(ipf_main_softc_t *softc, int tab, fr_info_t *fin, void *ptr)
   1017 {
   1018 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
   1019 	synclist_t *sl, *ss;
   1020 	synclogent_t *sle;
   1021 	u_int hv, sz;
   1022 
   1023 	if (softs->sl_idx == softs->ipf_sync_log_sz)
   1024 		return NULL;
   1025 	KMALLOC(sl, synclist_t *);
   1026 	if (sl == NULL)
   1027 		return NULL;
   1028 
   1029 	MUTEX_ENTER(&softs->ipf_syncadd);
   1030 	/*
   1031 	 * Get a unique number for this synclist_t.  The number is only meant
   1032 	 * to be unique for the lifetime of the structure and may be reused
   1033 	 * later.
   1034 	 */
   1035 	softs->ipf_sync_num++;
   1036 	if (softs->ipf_sync_num == 0) {
   1037 		softs->ipf_sync_num = 1;
   1038 		softs->ipf_sync_wrap++;
   1039 	}
   1040 
   1041 	/*
   1042 	 * Use the synch number of the object as the hash key.  Should end up
   1043 	 * with relatively even distribution over time.
   1044 	 * XXX - an attacker could lunch an DoS attack, of sorts, if they are
   1045 	 * the only one causing new table entries by only keeping open every
   1046 	 * nth connection they make, where n is a value in the interval
   1047 	 * [0, SYNC_STATETABSZ-1].
   1048 	 */
   1049 	switch (tab)
   1050 	{
   1051 	case SMC_STATE :
   1052 		hv = softs->ipf_sync_num & (softs->ipf_sync_state_tab_sz - 1);
   1053 		while (softs->ipf_sync_wrap != 0) {
   1054 			for (ss = softs->syncstatetab[hv]; ss; ss = ss->sl_next)
   1055 				if (ss->sl_hdr.sm_num == softs->ipf_sync_num)
   1056 					break;
   1057 			if (ss == NULL)
   1058 				break;
   1059 			softs->ipf_sync_num++;
   1060 			hv = softs->ipf_sync_num &
   1061 			     (softs->ipf_sync_state_tab_sz - 1);
   1062 		}
   1063 		sl->sl_pnext = softs->syncstatetab + hv;
   1064 		sl->sl_next = softs->syncstatetab[hv];
   1065 		softs->syncstatetab[hv] = sl;
   1066 		break;
   1067 
   1068 	case SMC_NAT :
   1069 		hv = softs->ipf_sync_num & (softs->ipf_sync_nat_tab_sz - 1);
   1070 		while (softs->ipf_sync_wrap != 0) {
   1071 			for (ss = softs->syncnattab[hv]; ss; ss = ss->sl_next)
   1072 				if (ss->sl_hdr.sm_num == softs->ipf_sync_num)
   1073 					break;
   1074 			if (ss == NULL)
   1075 				break;
   1076 			softs->ipf_sync_num++;
   1077 			hv = softs->ipf_sync_num &
   1078 			     (softs->ipf_sync_nat_tab_sz - 1);
   1079 		}
   1080 		sl->sl_pnext = softs->syncnattab + hv;
   1081 		sl->sl_next = softs->syncnattab[hv];
   1082 		softs->syncnattab[hv] = sl;
   1083 		break;
   1084 
   1085 	default :
   1086 		break;
   1087 	}
   1088 
   1089 	sl->sl_num = softs->ipf_sync_num;
   1090 	MUTEX_EXIT(&softs->ipf_syncadd);
   1091 
   1092 	sl->sl_magic = htonl(SYNHDRMAGIC);
   1093 	sl->sl_v = fin->fin_v;
   1094 	sl->sl_p = fin->fin_p;
   1095 	sl->sl_cmd = SMC_CREATE;
   1096 	sl->sl_idx = -1;
   1097 	sl->sl_table = tab;
   1098 	sl->sl_rev = fin->fin_rev;
   1099 	if (tab == SMC_STATE) {
   1100 		sl->sl_ips = ptr;
   1101 		sz = sizeof(*sl->sl_ips);
   1102 	} else if (tab == SMC_NAT) {
   1103 		sl->sl_ipn = ptr;
   1104 		sz = sizeof(*sl->sl_ipn);
   1105 	} else {
   1106 		ptr = NULL;
   1107 		sz = 0;
   1108 	}
   1109 	sl->sl_len = sz;
   1110 
   1111 	/*
   1112 	 * Create the log entry to be read by a user daemon.  When it has been
   1113 	 * finished and put on the queue, send a signal to wakeup any waiters.
   1114 	 */
   1115 	MUTEX_ENTER(&softs->ipf_syncadd);
   1116 	sle = softs->synclog + softs->sl_idx++;
   1117 	bcopy((char *)&sl->sl_hdr, (char *)&sle->sle_hdr,
   1118 	      sizeof(sle->sle_hdr));
   1119 	sle->sle_hdr.sm_num = htonl(sle->sle_hdr.sm_num);
   1120 	sle->sle_hdr.sm_len = htonl(sle->sle_hdr.sm_len);
   1121 	if (ptr != NULL) {
   1122 		bcopy((char *)ptr, (char *)&sle->sle_un, sz);
   1123 		if (tab == SMC_STATE) {
   1124 			ipf_sync_storder(1, &sle->sle_un.sleu_ips);
   1125 		} else if (tab == SMC_NAT) {
   1126 			ipf_sync_natorder(1, &sle->sle_un.sleu_ipn);
   1127 		}
   1128 	}
   1129 	MUTEX_EXIT(&softs->ipf_syncadd);
   1130 
   1131 	ipf_sync_wakeup(softc);
   1132 	return sl;
   1133 }
   1134 
   1135 
   1136 /* ------------------------------------------------------------------------ */
   1137 /* Function:    ipf_sync_update                                             */
   1138 /* Returns:     Nil                                                         */
   1139 /* Parameters:  tab(I) - type of synclist_t to create                       */
   1140 /*              fin(I) - pointer to packet information                      */
   1141 /*              sl(I)  - pointer to synchronisation object                  */
   1142 /*                                                                          */
   1143 /* For outbound packets, only, create an sync update record for the user    */
   1144 /* process to read.                                                         */
   1145 /* ------------------------------------------------------------------------ */
   1146 void
   1147 ipf_sync_update(ipf_main_softc_t *softc, int tab, fr_info_t *fin,
   1148     synclist_t *sl)
   1149 {
   1150 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
   1151 	synctcp_update_t *st;
   1152 	syncupdent_t *slu;
   1153 	ipstate_t *ips;
   1154 	nat_t *nat;
   1155 	ipfrwlock_t *lock;
   1156 
   1157 	if (fin->fin_out == 0 || sl == NULL)
   1158 		return;
   1159 
   1160 	if (tab == SMC_STATE) {
   1161 		lock = &softs->ipf_syncstate;
   1162 	} else {
   1163 		lock = &softs->ipf_syncnat;
   1164 	}
   1165 
   1166 	READ_ENTER(lock);
   1167 	if (sl->sl_idx == -1) {
   1168 		MUTEX_ENTER(&softs->ipf_syncadd);
   1169 		slu = softs->syncupd + softs->su_idx;
   1170 		sl->sl_idx = softs->su_idx++;
   1171 		MUTEX_EXIT(&softs->ipf_syncadd);
   1172 
   1173 		bcopy((char *)&sl->sl_hdr, (char *)&slu->sup_hdr,
   1174 		      sizeof(slu->sup_hdr));
   1175 		slu->sup_hdr.sm_magic = htonl(SYNHDRMAGIC);
   1176 		slu->sup_hdr.sm_sl = sl;
   1177 		slu->sup_hdr.sm_cmd = SMC_UPDATE;
   1178 		slu->sup_hdr.sm_table = tab;
   1179 		slu->sup_hdr.sm_num = htonl(sl->sl_num);
   1180 		slu->sup_hdr.sm_len = htonl(sizeof(struct synctcp_update));
   1181 		slu->sup_hdr.sm_rev = fin->fin_rev;
   1182 # if 0
   1183 		if (fin->fin_p == IPPROTO_TCP) {
   1184 			st->stu_len[0] = 0;
   1185 			st->stu_len[1] = 0;
   1186 		}
   1187 # endif
   1188 	} else
   1189 		slu = softs->syncupd + sl->sl_idx;
   1190 
   1191 	/*
   1192 	 * Only TCP has complex timeouts, others just use default timeouts.
   1193 	 * For TCP, we only need to track the connection state and window.
   1194 	 */
   1195 	if (fin->fin_p == IPPROTO_TCP) {
   1196 		st = &slu->sup_tcp;
   1197 		if (tab == SMC_STATE) {
   1198 			ips = sl->sl_ips;
   1199 			st->stu_age = htonl(ips->is_die);
   1200 			st->stu_data[0].td_end = ips->is_send;
   1201 			st->stu_data[0].td_maxend = ips->is_maxsend;
   1202 			st->stu_data[0].td_maxwin = ips->is_maxswin;
   1203 			st->stu_state[0] = ips->is_state[0];
   1204 			st->stu_data[1].td_end = ips->is_dend;
   1205 			st->stu_data[1].td_maxend = ips->is_maxdend;
   1206 			st->stu_data[1].td_maxwin = ips->is_maxdwin;
   1207 			st->stu_state[1] = ips->is_state[1];
   1208 		} else if (tab == SMC_NAT) {
   1209 			nat = sl->sl_ipn;
   1210 			st->stu_age = htonl(nat->nat_age);
   1211 		}
   1212 	}
   1213 	RWLOCK_EXIT(lock);
   1214 
   1215 	ipf_sync_wakeup(softc);
   1216 }
   1217 
   1218 
   1219 /* ------------------------------------------------------------------------ */
   1220 /* Function:    ipf_sync_flush_table                                        */
   1221 /* Returns:     int - number of entries freed by flushing table             */
   1222 /* Parameters:  tabsize(I) - size of the array pointed to by table          */
   1223 /*              table(I)   - pointer to sync table to empty                 */
   1224 /*                                                                          */
   1225 /* Walk through a table of sync entries and free each one.  It is assumed   */
   1226 /* that some lock is held so that nobody else tries to access the table     */
   1227 /* during this cleanup.                                                     */
   1228 /* ------------------------------------------------------------------------ */
   1229 static int
   1230 ipf_sync_flush_table(ipf_sync_softc_t *softs, int tabsize, synclist_t **table)
   1231 {
   1232 	synclist_t *sl;
   1233 	int i, items;
   1234 
   1235 	items = 0;
   1236 
   1237 	for (i = 0; i < tabsize; i++) {
   1238 		while ((sl = table[i]) != NULL) {
   1239 			switch (sl->sl_table) {
   1240 			case SMC_STATE :
   1241 				if (sl->sl_ips != NULL)
   1242 					sl->sl_ips->is_sync = NULL;
   1243 				break;
   1244 			case SMC_NAT :
   1245 				if (sl->sl_ipn != NULL)
   1246 					sl->sl_ipn->nat_sync = NULL;
   1247 				break;
   1248 			}
   1249 			if (sl->sl_next != NULL)
   1250 				sl->sl_next->sl_pnext = sl->sl_pnext;
   1251 			table[i] = sl->sl_next;
   1252 			if (sl->sl_idx != -1)
   1253 				softs->syncupd[sl->sl_idx].sup_hdr.sm_sl = NULL;
   1254 			KFREE(sl);
   1255 			items++;
   1256 		}
   1257 	}
   1258 
   1259 	return items;
   1260 }
   1261 
   1262 
   1263 /* ------------------------------------------------------------------------ */
   1264 /* Function:    ipf_sync_ioctl                                              */
   1265 /* Returns:     int - 0 == success, != 0 == failure                         */
   1266 /* Parameters:  data(I) - pointer to ioctl data                             */
   1267 /*              cmd(I)  - ioctl command integer                             */
   1268 /*              mode(I) - file mode bits used with open                     */
   1269 /*                                                                          */
   1270 /* This function currently does not handle any ioctls and so just returns   */
   1271 /* EINVAL on all occasions.                                                 */
   1272 /* ------------------------------------------------------------------------ */
   1273 int
   1274 ipf_sync_ioctl(ipf_main_softc_t *softc, void *data, ioctlcmd_t cmd, int mode,
   1275     int uid, void *ctx)
   1276 {
   1277 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
   1278 	int error, i;
   1279 	SPL_INT(s);
   1280 
   1281 	switch (cmd)
   1282 	{
   1283         case SIOCIPFFL:
   1284 		error = BCOPYIN(data, &i, sizeof(i));
   1285 		if (error != 0) {
   1286 			IPFERROR(110023);
   1287 			error = EFAULT;
   1288 			break;
   1289 		}
   1290 
   1291 		switch (i)
   1292 		{
   1293 		case SMC_RLOG :
   1294 			SPL_NET(s);
   1295 			MUTEX_ENTER(&softs->ipsl_mutex);
   1296 			i = (softs->sl_tail - softs->sl_idx) +
   1297 			    (softs->su_tail - softs->su_idx);
   1298 			softs->sl_idx = 0;
   1299 			softs->su_idx = 0;
   1300 			softs->sl_tail = 0;
   1301 			softs->su_tail = 0;
   1302 			MUTEX_EXIT(&softs->ipsl_mutex);
   1303 			SPL_X(s);
   1304 			break;
   1305 
   1306 		case SMC_NAT :
   1307 			SPL_NET(s);
   1308 			WRITE_ENTER(&softs->ipf_syncnat);
   1309 			i = ipf_sync_flush_table(softs, SYNC_NATTABSZ,
   1310 						 softs->syncnattab);
   1311 			RWLOCK_EXIT(&softs->ipf_syncnat);
   1312 			SPL_X(s);
   1313 			break;
   1314 
   1315 		case SMC_STATE :
   1316 			SPL_NET(s);
   1317 			WRITE_ENTER(&softs->ipf_syncstate);
   1318 			i = ipf_sync_flush_table(softs, SYNC_STATETABSZ,
   1319 						 softs->syncstatetab);
   1320 			RWLOCK_EXIT(&softs->ipf_syncstate);
   1321 			SPL_X(s);
   1322 			break;
   1323 		}
   1324 
   1325 		error = BCOPYOUT(&i, data, sizeof(i));
   1326 		if (error != 0) {
   1327 			IPFERROR(110022);
   1328 			error = EFAULT;
   1329 		}
   1330 		break;
   1331 
   1332 	default :
   1333 		IPFERROR(110021);
   1334 		error = EINVAL;
   1335 		break;
   1336 	}
   1337 
   1338 	return error;
   1339 }
   1340 
   1341 
   1342 /* ------------------------------------------------------------------------ */
   1343 /* Function:    ipf_sync_canread                                            */
   1344 /* Returns:     int - 0 == success, != 0 == failure                         */
   1345 /* Parameters:  Nil                                                         */
   1346 /*                                                                          */
   1347 /* This function provides input to the poll handler about whether or not    */
   1348 /* there is data waiting to be read from the /dev/ipsync device.            */
   1349 /* ------------------------------------------------------------------------ */
   1350 int
   1351 ipf_sync_canread(void *arg)
   1352 {
   1353 	ipf_sync_softc_t *softs = arg;
   1354 	return !((softs->sl_tail == softs->sl_idx) &&
   1355 		 (softs->su_tail == softs->su_idx));
   1356 }
   1357 
   1358 
   1359 /* ------------------------------------------------------------------------ */
   1360 /* Function:    ipf_sync_canwrite                                           */
   1361 /* Returns:     int - 1 == can always write                                 */
   1362 /* Parameters:  Nil                                                         */
   1363 /*                                                                          */
   1364 /* This function lets the poll handler know that it is always ready willing */
   1365 /* to accept write events.                                                  */
   1366 /* XXX Maybe this should return false if the sync table is full?            */
   1367 /* ------------------------------------------------------------------------ */
   1368 int
   1369 ipf_sync_canwrite(void *arg)
   1370 {
   1371 	return 1;
   1372 }
   1373 
   1374 
   1375 /* ------------------------------------------------------------------------ */
   1376 /* Function:    ipf_sync_wakeup                                             */
   1377 /* Parameters:  Nil                                                         */
   1378 /* Returns:     Nil                                                         */
   1379 /*                                                                          */
   1380 /* This function implements the heuristics that decide how often to         */
   1381 /* generate a poll wakeup for programs that are waiting for information     */
   1382 /* about when they can do a read on /dev/ipsync.                            */
   1383 /*                                                                          */
   1384 /* There are three different considerations here:                           */
   1385 /* - do not keep a program waiting too long: ipf_sync_wake_interval is the  */
   1386 /*   maximum number of ipf ticks to let pass by;                            */
   1387 /* - do not let the queue of ouststanding things to generate notifies for   */
   1388 /*   get too full (ipf_sync_queue_high_wm is the high water mark);          */
   1389 /* - do not let too many events get collapsed in before deciding that the   */
   1390 /*   other host(s) need an update (ipf_sync_event_high_wm is the high water */
   1391 /*   mark for this counter.)                                                */
   1392 /* ------------------------------------------------------------------------ */
   1393 static void
   1394 ipf_sync_wakeup(ipf_main_softc_t *softc)
   1395 {
   1396 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
   1397 
   1398 	softs->ipf_sync_events++;
   1399 	if ((softc->ipf_ticks >
   1400 	    softs->ipf_sync_lastwakeup + softs->ipf_sync_wake_interval) ||
   1401 	    (softs->ipf_sync_events > softs->ipf_sync_event_high_wm) ||
   1402 	    ((softs->sl_tail - softs->sl_idx) >
   1403 	     softs->ipf_sync_queue_high_wm) ||
   1404 	    ((softs->su_tail - softs->su_idx) >
   1405 	     softs->ipf_sync_queue_high_wm)) {
   1406 
   1407 		ipf_sync_poll_wakeup(softc);
   1408 	}
   1409 }
   1410 
   1411 
   1412 /* ------------------------------------------------------------------------ */
   1413 /* Function:    ipf_sync_poll_wakeup                                        */
   1414 /* Parameters:  Nil                                                         */
   1415 /* Returns:     Nil                                                         */
   1416 /*                                                                          */
   1417 /* Deliver a poll wakeup and reset counters for two of the three heuristics */
   1418 /* ------------------------------------------------------------------------ */
   1419 static void
   1420 ipf_sync_poll_wakeup(ipf_main_softc_t *softc)
   1421 {
   1422 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
   1423 
   1424 	softs->ipf_sync_events = 0;
   1425 	softs->ipf_sync_lastwakeup = softc->ipf_ticks;
   1426 
   1427 # ifdef _KERNEL
   1428 #  if SOLARIS
   1429 	MUTEX_ENTER(&softs->ipsl_mutex);
   1430 	cv_signal(&softs->ipslwait);
   1431 	MUTEX_EXIT(&softs->ipsl_mutex);
   1432 	pollwakeup(&softc->ipf_poll_head[IPL_LOGSYNC], POLLIN|POLLRDNORM);
   1433 #  else
   1434 	WAKEUP(&softs->sl_tail, 0);
   1435 	POLLWAKEUP(IPL_LOGSYNC);
   1436 #  endif
   1437 # endif
   1438 }
   1439 
   1440 
   1441 /* ------------------------------------------------------------------------ */
   1442 /* Function:    ipf_sync_expire                                             */
   1443 /* Parameters:  Nil                                                         */
   1444 /* Returns:     Nil                                                         */
   1445 /*                                                                          */
   1446 /* This is the function called even ipf_tick.  It implements one of the     */
   1447 /* three heuristics above *IF* there are events waiting.                    */
   1448 /* ------------------------------------------------------------------------ */
   1449 void
   1450 ipf_sync_expire(ipf_main_softc_t *softc)
   1451 {
   1452 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
   1453 
   1454 	if ((softs->ipf_sync_events > 0) &&
   1455 	    (softc->ipf_ticks >
   1456 	     softs->ipf_sync_lastwakeup + softs->ipf_sync_wake_interval)) {
   1457 		ipf_sync_poll_wakeup(softc);
   1458 	}
   1459 }
   1460