Home | History | Annotate | Line # | Download | only in npf
npf_state.c revision 1.1
      1 /*	$NetBSD: npf_state.c,v 1.1 2010/11/11 06:30:39 rmind Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2010 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This material is based upon work partially supported by The
      8  * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * NPF state engine to track connections.
     34  */
     35 
     36 #include <sys/cdefs.h>
     37 __KERNEL_RCSID(0, "$NetBSD: npf_state.c,v 1.1 2010/11/11 06:30:39 rmind Exp $");
     38 
     39 #include <sys/param.h>
     40 #include <sys/systm.h>
     41 
     42 #include <sys/mutex.h>
     43 #include <netinet/in.h>
     44 #include <netinet/tcp.h>
     45 #include <netinet/tcp_seq.h>
     46 
     47 #include "npf_impl.h"
     48 
     49 #define	MAXACKWINDOW		66000
     50 
     51 /* Session expiration table.  XXX revisit later */
     52 static const u_int expire_table[ ] = {
     53 	[IPPROTO_TCP]		= 86400,	/* 24 hours */
     54 	[IPPROTO_UDP]		= 120,		/* 2 min */
     55 	[IPPROTO_ICMP]		= 30		/* 1 min */
     56 };
     57 
     58 static bool
     59 npf_tcp_inwindow(const npf_cache_t *npc, nbuf_t *nbuf, npf_state_t *nst,
     60     const bool forw)
     61 {
     62 	const struct tcphdr *th = &npc->npc_l4.tcp;
     63 	const int tcpfl = th->th_flags;
     64 	npf_tcpstate_t *fstate, *tstate;
     65 	int tcpdlen, wscale, ackskew;
     66 	tcp_seq seq, ack, end;
     67 	uint32_t win;
     68 
     69 	KASSERT(npf_iscached(npc, NPC_TCP));
     70 	tcpdlen = npf_tcpsaw(__UNCONST(npc), &seq, &ack, &win);
     71 	end = seq + tcpdlen;
     72 	if (tcpfl & TH_SYN) {
     73 		end++;
     74 	}
     75 	if (tcpfl & TH_FIN) {
     76 		end++;
     77 	}
     78 
     79 	/*
     80 	 * Perform SEQ/ACK numbers check against boundaries.  Reference:
     81 	 *
     82 	 *	Rooij G., "Real stateful TCP packet filtering in IP Filter",
     83 	 *	10th USENIX Security Symposium invited talk, Aug. 2001.
     84 	 */
     85 
     86 	fstate = &nst->nst_tcpst[forw ? 0 : 1];
     87 	tstate = &nst->nst_tcpst[forw ? 1 : 0];
     88 	win = win ? (win << fstate->nst_wscale) : 1;
     89 
     90 	if (tcpfl == TH_SYN) {
     91 		/*
     92 		 * First SYN or re-transmission of SYN.  Initialize all
     93 		 * values.  State of other side will get set with a SYN-ACK
     94 		 * reply (see below).
     95 		 */
     96 		fstate->nst_seqend = end;
     97 		fstate->nst_ackend = end;
     98 		fstate->nst_maxwin = win;
     99 		tstate->nst_ackend = 0;
    100 		tstate->nst_ackend = 0;
    101 		tstate->nst_maxwin = 0;
    102 		/*
    103 		 * Handle TCP Window Scaling (RFC 1323).  Both sides may
    104 		 * send this option in their SYN packets.
    105 		 */
    106 		if (npf_fetch_tcpopts(npc, nbuf, NULL, &wscale)) {
    107 			fstate->nst_wscale = wscale;
    108 		} else {
    109 			fstate->nst_wscale = 0;
    110 		}
    111 		tstate->nst_wscale = 0;
    112 		/* Done. */
    113 		return true;
    114 	}
    115 	if (fstate->nst_seqend == 0) {
    116 		/*
    117 		 * Should be a SYN-ACK reply to SYN.  If SYN is not set,
    118 		 * then we are in the middle connection and lost tracking.
    119 		 */
    120 		fstate->nst_seqend = end;
    121 		fstate->nst_ackend = end + 1;
    122 		fstate->nst_maxwin = 1;
    123 
    124 		/* Handle TCP Window Scaling (must be ignored if no SYN). */
    125 		if (tcpfl & TH_SYN) {
    126 			fstate->nst_wscale =
    127 			    npf_fetch_tcpopts(npc, nbuf, NULL, &wscale) ?
    128 			    wscale : 0;
    129 		}
    130 	}
    131 	if ((tcpfl & TH_ACK) == 0) {
    132 		/* Pretend that an ACK was sent. */
    133 		ack = tstate->nst_seqend;
    134 	} else if ((tcpfl & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST) && ack == 0) {
    135 		/* Workaround for some TCP stacks. */
    136 		ack = tstate->nst_seqend;
    137 	}
    138 	if (seq == end) {
    139 		/* If packet contains no data - assume it is valid. */
    140 		end = fstate->nst_seqend;
    141 		seq = end;
    142 	}
    143 
    144 	/*
    145 	 * Determine whether the data is within previously noted window,
    146 	 * that is, upper boundary for valid data (I).
    147 	 */
    148 	if (!SEQ_GEQ(fstate->nst_ackend, end)) {
    149 		return false;
    150 	}
    151 	/* Lower boundary (II), which is no more than one window back. */
    152 	if (!SEQ_GEQ(seq, fstate->nst_seqend - tstate->nst_maxwin)) {
    153 		return false;
    154 	}
    155 	/*
    156 	 * Boundaries for valid acknowledgments (III, IV) - on predicted
    157 	 * window up or down, since packets may be fragmented.
    158 	 */
    159 	ackskew = tstate->nst_seqend - ack;
    160 	if (ackskew < -MAXACKWINDOW || ackskew > MAXACKWINDOW) {
    161 		return false;
    162 	}
    163 
    164 	/*
    165 	 * Negative ackskew might be due to fragmented packets.  Since the
    166 	 * total length of the packet is unknown - bump the boundary.
    167 	 */
    168 	if (ackskew < 0) {
    169 		tstate->nst_seqend = end;
    170 	}
    171 	/* Keep track of the maximum window seen. */
    172 	if (fstate->nst_maxwin < win) {
    173 		fstate->nst_maxwin = win;
    174 	}
    175 	if (SEQ_GT(end, fstate->nst_seqend)) {
    176 		fstate->nst_seqend = end;
    177 	}
    178 	/* Note the window for upper boundary. */
    179 	if (SEQ_GEQ(ack + win, tstate->nst_ackend)) {
    180 		tstate->nst_ackend = ack + win;
    181 	}
    182 	return true;
    183 }
    184 
    185 static inline bool
    186 npf_state_tcp(const npf_cache_t *npc, nbuf_t *nbuf, npf_state_t *nst,
    187     const bool forw)
    188 {
    189 	const struct tcphdr *th = &npc->npc_l4.tcp;
    190 	const int tcpfl = th->th_flags;
    191 
    192 	/*
    193 	 * Handle 3-way handshake (SYN -> SYN,ACK -> ACK).
    194 	 */
    195 	switch (nst->nst_state) {
    196 	case ST_ESTABLISHED:
    197 		/* Common case - connection established. */
    198 		if (tcpfl & TH_ACK) {
    199 			/*
    200 			 * Data transmission.
    201 			 */
    202 		} else if (tcpfl & TH_FIN) {
    203 			/* XXX TODO */
    204 		}
    205 		break;
    206 	case ST_OPENING:
    207 		/* SYN has been sent, expecting SYN-ACK. */
    208 		if (tcpfl == (TH_SYN | TH_ACK) && !forw) {
    209 			/* Received backwards SYN-ACK. */
    210 			nst->nst_state = ST_ACKNOWLEDGE;
    211 		} else if (tcpfl == TH_SYN && forw) {
    212 			/* Re-transmission of SYN. */
    213 		} else {
    214 			return false;
    215 		}
    216 		break;
    217 	case ST_ACKNOWLEDGE:
    218 		/* SYN-ACK was seen, expecting ACK. */
    219 		if (tcpfl == TH_ACK && forw) {
    220 			nst->nst_state = ST_ESTABLISHED;
    221 		} else {
    222 			return false;
    223 		}
    224 		break;
    225 	case ST_CLOSING:
    226 		/* XXX TODO */
    227 		break;
    228 	default:
    229 		npf_state_dump(nst);
    230 		KASSERT(false);
    231 	}
    232 	return npf_tcp_inwindow(npc, nbuf, nst, forw);
    233 }
    234 
    235 bool
    236 npf_state_init(const npf_cache_t *npc, nbuf_t *nbuf, npf_state_t *nst)
    237 {
    238 	const int proto = npf_cache_ipproto(npc);
    239 
    240 	KASSERT(npf_iscached(npc, NPC_IP46 | NPC_LAYER4));
    241 	if (proto == IPPROTO_TCP) {
    242 		const struct tcphdr *th = &npc->npc_l4.tcp;
    243 		/* TCP case: must be SYN. */
    244 		KASSERT(npf_iscached(npc, NPC_TCP));
    245 		if (th->th_flags != TH_SYN) {
    246 			return false;
    247 		}
    248 		/* Initial values for TCP window and sequence tracking. */
    249 		if (!npf_tcp_inwindow(npc, nbuf, nst, true)) {
    250 			return false;
    251 		}
    252 	}
    253 	mutex_init(&nst->nst_lock, MUTEX_DEFAULT, IPL_SOFTNET);
    254 	nst->nst_state = ST_OPENING;
    255 	return true;
    256 }
    257 
    258 void
    259 npf_state_destroy(npf_state_t *nst)
    260 {
    261 
    262 	KASSERT(nst->nst_state != 0);
    263 	mutex_destroy(&nst->nst_lock);
    264 }
    265 
    266 bool
    267 npf_state_inspect(const npf_cache_t *npc, nbuf_t *nbuf,
    268     npf_state_t *nst, const bool forw)
    269 {
    270 	const int proto = npf_cache_ipproto(npc);
    271 	bool ret;
    272 
    273 	mutex_enter(&nst->nst_lock);
    274 	switch (proto) {
    275 	case IPPROTO_TCP:
    276 		/* Handle TCP. */
    277 		ret = npf_state_tcp(npc, nbuf, nst, forw);
    278 		break;
    279 	default:
    280 		/* Handle UDP or ICMP response for opening session. */
    281 		if (nst->nst_state == ST_OPENING && !forw) {
    282 			nst->nst_state = ST_ESTABLISHED;
    283 		}
    284 		ret = true;
    285 	}
    286 	mutex_exit(&nst->nst_lock);
    287 	return ret;
    288 }
    289 
    290 int
    291 npf_state_etime(const npf_state_t *nst, const int proto)
    292 {
    293 
    294 	if (nst->nst_state == ST_ESTABLISHED) {
    295 		return expire_table[proto];
    296 	}
    297 	return 10;	/* XXX TODO */
    298 }
    299 
    300 #if defined(DDB) || defined(_NPF_TESTING)
    301 
    302 void
    303 npf_state_dump(npf_state_t *nst)
    304 {
    305 	npf_tcpstate_t *fst = &nst->nst_tcpst[0], *tst = &nst->nst_tcpst[1];
    306 
    307 	printf("\tstate (%p) %d:\n\t\t"
    308 	    "F { seqend %u ackend %u mwin %u wscale %u }\n\t\t"
    309 	    "T { seqend %u, ackend %u mwin %u wscale %u }\n",
    310 	    nst, nst->nst_state,
    311 	    fst->nst_seqend, fst->nst_ackend, fst->nst_maxwin, fst->nst_wscale,
    312 	    tst->nst_seqend, tst->nst_ackend, tst->nst_maxwin, tst->nst_wscale
    313 	);
    314 }
    315 
    316 #endif
    317