Home | History | Annotate | Line # | Download | only in netinet
ip_flow.c revision 1.1
      1  1.1  matt /*	$NetBSD: ip_flow.c,v 1.1 1998/04/29 21:37:55 matt Exp $	*/
      2  1.1  matt 
      3  1.1  matt /*-
      4  1.1  matt  * Copyright (c) 1998 The NetBSD Foundation, Inc.
      5  1.1  matt  * All rights reserved.
      6  1.1  matt  *
      7  1.1  matt  * This code is derived from software contributed to The NetBSD Foundation
      8  1.1  matt  * by the 3am Software Foundry ("3am").  It was developed by Matt Thomas.
      9  1.1  matt  *
     10  1.1  matt  * Redistribution and use in source and binary forms, with or without
     11  1.1  matt  * modification, are permitted provided that the following conditions
     12  1.1  matt  * are met:
     13  1.1  matt  * 1. Redistributions of source code must retain the above copyright
     14  1.1  matt  *    notice, this list of conditions and the following disclaimer.
     15  1.1  matt  * 2. Redistributions in binary form must reproduce the above copyright
     16  1.1  matt  *    notice, this list of conditions and the following disclaimer in the
     17  1.1  matt  *    documentation and/or other materials provided with the distribution.
     18  1.1  matt  * 3. All advertising materials mentioning features or use of this software
     19  1.1  matt  *    must display the following acknowledgement:
     20  1.1  matt  *	This product includes software developed by the NetBSD
     21  1.1  matt  *	Foundation, Inc. and its contributors.
     22  1.1  matt  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  1.1  matt  *    contributors may be used to endorse or promote products derived
     24  1.1  matt  *    from this software without specific prior written permission.
     25  1.1  matt  *
     26  1.1  matt  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  1.1  matt  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  1.1  matt  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  1.1  matt  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  1.1  matt  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  1.1  matt  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  1.1  matt  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  1.1  matt  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  1.1  matt  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  1.1  matt  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  1.1  matt  * POSSIBILITY OF SUCH DAMAGE.
     37  1.1  matt  */
     38  1.1  matt 
     39  1.1  matt #include <sys/param.h>
     40  1.1  matt #include <sys/systm.h>
     41  1.1  matt #include <sys/malloc.h>
     42  1.1  matt #include <sys/mbuf.h>
     43  1.1  matt #include <sys/domain.h>
     44  1.1  matt #include <sys/protosw.h>
     45  1.1  matt #include <sys/socket.h>
     46  1.1  matt #include <sys/socketvar.h>
     47  1.1  matt #include <sys/errno.h>
     48  1.1  matt #include <sys/time.h>
     49  1.1  matt #include <sys/kernel.h>
     50  1.1  matt #include <sys/proc.h>
     51  1.1  matt 
     52  1.1  matt #include <vm/vm.h>
     53  1.1  matt #include <sys/sysctl.h>
     54  1.1  matt 
     55  1.1  matt #include <net/if.h>
     56  1.1  matt #include <net/if_dl.h>
     57  1.1  matt #include <net/route.h>
     58  1.1  matt #include <net/pfil.h>
     59  1.1  matt 
     60  1.1  matt #include <netinet/in.h>
     61  1.1  matt #include <netinet/in_systm.h>
     62  1.1  matt #include <netinet/ip.h>
     63  1.1  matt #include <netinet/in_pcb.h>
     64  1.1  matt #include <netinet/in_var.h>
     65  1.1  matt #include <netinet/ip_var.h>
     66  1.1  matt 
     67  1.1  matt #define	IPFLOW_TIMER		(5 * PR_SLOWHZ)
     68  1.1  matt #define	IPFLOW_HASHSIZE		(1 << IPFLOW_HASHBITS)
     69  1.1  matt static LIST_HEAD(ipflowhead, ipflow) ipflows[IPFLOW_HASHSIZE];
     70  1.1  matt static int ipflow_inuse;
     71  1.1  matt #define	IPFLOW_MAX		256
     72  1.1  matt 
     73  1.1  matt static int ipflow_active = 0;
     74  1.1  matt 
     75  1.1  matt static unsigned
     76  1.1  matt ipflow_hash(
     77  1.1  matt 	struct in_addr dst,
     78  1.1  matt 	struct in_addr src,
     79  1.1  matt 	unsigned tos)
     80  1.1  matt {
     81  1.1  matt 	unsigned hash = tos;
     82  1.1  matt 	int idx;
     83  1.1  matt 	for (idx = 0; idx < 32; idx += IPFLOW_HASHBITS)
     84  1.1  matt 		hash += (dst.s_addr >> (32 - idx)) + (src.s_addr >> idx);
     85  1.1  matt 	return hash & (IPFLOW_HASHSIZE-1);
     86  1.1  matt }
     87  1.1  matt 
     88  1.1  matt static struct ipflow *
     89  1.1  matt ipflow_lookup(
     90  1.1  matt 	const struct ip *ip)
     91  1.1  matt {
     92  1.1  matt 	unsigned hash;
     93  1.1  matt 	struct ipflow *ipf;
     94  1.1  matt 
     95  1.1  matt 	hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos);
     96  1.1  matt 
     97  1.1  matt 	ipf = LIST_FIRST(&ipflows[hash]);
     98  1.1  matt 	while (ipf != NULL) {
     99  1.1  matt 		if (ip->ip_dst.s_addr == ipf->ipf_dst.s_addr
    100  1.1  matt 		    && ip->ip_src.s_addr == ipf->ipf_src.s_addr
    101  1.1  matt 		    && ip->ip_tos == ipf->ipf_tos)
    102  1.1  matt 			break;
    103  1.1  matt 		ipf = LIST_NEXT(ipf, ipf_next);
    104  1.1  matt 	}
    105  1.1  matt 	return ipf;
    106  1.1  matt }
    107  1.1  matt 
    108  1.1  matt int
    109  1.1  matt ipflow_fastforward(
    110  1.1  matt 	struct mbuf *m)
    111  1.1  matt {
    112  1.1  matt 	struct ip *ip;
    113  1.1  matt 	struct ipflow *ipf;
    114  1.1  matt 	struct rtentry *rt;
    115  1.1  matt 	u_int32_t sum;
    116  1.1  matt 	int error;
    117  1.1  matt 
    118  1.1  matt 	/*
    119  1.1  matt 	 * Are we forwarding packets?  Big enough for an IP packet?
    120  1.1  matt 	 */
    121  1.1  matt 	if (!ipforwarding || !ipflow_active || m->m_len < sizeof(struct ip))
    122  1.1  matt 		return 0;
    123  1.1  matt 	/*
    124  1.1  matt 	 * IP header with no option and valid version and length
    125  1.1  matt 	 */
    126  1.1  matt 	ip = mtod(m, struct ip *);
    127  1.1  matt 	if (ip->ip_v != IPVERSION || ip->ip_hl != (sizeof(struct ip) >> 2)
    128  1.1  matt 	    || ntohs(ip->ip_len) > m->m_pkthdr.len)
    129  1.1  matt 		return 0;
    130  1.1  matt 	/*
    131  1.1  matt 	 * Find a flow.
    132  1.1  matt 	 */
    133  1.1  matt 	if ((ipf = ipflow_lookup(ip)) == NULL)
    134  1.1  matt 		return 0;
    135  1.1  matt 
    136  1.1  matt 	/*
    137  1.1  matt 	 * Route and interface still up?
    138  1.1  matt 	 */
    139  1.1  matt 	rt = ipf->ipf_ro.ro_rt;
    140  1.1  matt 	if ((rt->rt_flags & RTF_UP) == 0 || (rt->rt_ifp->if_flags & IFF_UP) == 0)
    141  1.1  matt 		return 0;
    142  1.1  matt 
    143  1.1  matt 	/*
    144  1.1  matt 	 * Packet size OK?  TTL?
    145  1.1  matt 	 */
    146  1.1  matt 	if (m->m_pkthdr.len > rt->rt_ifp->if_mtu || ip->ip_ttl <= IPTTLDEC)
    147  1.1  matt 		return 0;
    148  1.1  matt 
    149  1.1  matt 	/*
    150  1.1  matt 	 * Everything checks out and so we can forward this packet.
    151  1.1  matt 	 * Modify the TTL and incrementally change the checksum.
    152  1.1  matt 	 * On little endian machine, the TTL is in LSB position
    153  1.1  matt 	 * (so we can simply add) while on big-endian it's in the
    154  1.1  matt 	 * MSB position (so we have to do two calculation; the first
    155  1.1  matt 	 * is the add and second is to wrap the results into 17 bits,
    156  1.1  matt 	 * 16 bits and a carry).
    157  1.1  matt 	 */
    158  1.1  matt 	ip->ip_ttl -= IPTTLDEC;
    159  1.1  matt #if BYTE_ORDER == LITTLE_ENDIAN
    160  1.1  matt 	sum = ip->ip_sum + IPTTLDEC;
    161  1.1  matt #endif
    162  1.1  matt #if BYTE_ORDER == BIG_ENDIAN
    163  1.1  matt 	sum = ip->ip_sum + (IPTTLDEC << 8);
    164  1.1  matt 	sum = (sum & 0xFFFF) + (sum >> 16);
    165  1.1  matt #endif
    166  1.1  matt 	if (sum > 0x10000)		/* add in carry if needed */
    167  1.1  matt 		sum++;
    168  1.1  matt 	ip->ip_sum = sum;		/* bit 16 is dropped */
    169  1.1  matt 
    170  1.1  matt 	/*
    171  1.1  matt 	 * Send the packet on it's way.  All we can get back is ENOBUFS
    172  1.1  matt 	 */
    173  1.1  matt 	ipf->ipf_uses++;
    174  1.1  matt 	ipf->ipf_timer = IPFLOW_TIMER;
    175  1.1  matt 	if ((error = (*rt->rt_ifp->if_output)(rt->rt_ifp, m, &ipf->ipf_ro.ro_dst, rt)) != 0) {
    176  1.1  matt 		if (error == ENOBUFS)
    177  1.1  matt 			ipf->ipf_dropped++;
    178  1.1  matt 		else
    179  1.1  matt 			ipf->ipf_errors++;
    180  1.1  matt 	}
    181  1.1  matt 	return 1;
    182  1.1  matt }
    183  1.1  matt 
    184  1.1  matt static void
    186  1.1  matt ipflow_addstats(
    187  1.1  matt 	struct ipflow *ipf)
    188  1.1  matt {
    189  1.1  matt 	ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses;
    190  1.1  matt 	ipstat.ips_cantforward += ipf->ipf_errors + ipf->ipf_dropped;
    191  1.1  matt 	ipstat.ips_forward += ipf->ipf_uses;
    192  1.1  matt 	ipstat.ips_fastforward += ipf->ipf_uses;
    193  1.1  matt }
    194  1.1  matt 
    195  1.1  matt static void
    196  1.1  matt ipflow_free(
    197  1.1  matt 	struct ipflow *ipf)
    198  1.1  matt {
    199  1.1  matt 	int s;
    200  1.1  matt 	/*
    201  1.1  matt 	 * Remove the flow from the hash table (at elevated IPL).
    202  1.1  matt 	 * Once it's off the list, we can deal with it at normal
    203  1.1  matt 	 * network IPL.
    204  1.1  matt 	 */
    205  1.1  matt 	s = splimp();
    206  1.1  matt 	LIST_REMOVE(ipf, ipf_next);
    207  1.1  matt 	splx(s);
    208  1.1  matt 	ipflow_addstats(ipf);
    209  1.1  matt 	RTFREE(ipf->ipf_ro.ro_rt);
    210  1.1  matt 	ipflow_inuse--;
    211  1.1  matt 	FREE(ipf, M_IPFLOW);
    212  1.1  matt }
    213  1.1  matt 
    214  1.1  matt static struct ipflow *
    215  1.1  matt ipflow_reap(
    216  1.1  matt 	void)
    217  1.1  matt {
    218  1.1  matt 	struct ipflow *ipf, *maybe_ipf = NULL;
    219  1.1  matt 	int idx;
    220  1.1  matt 	int s;
    221  1.1  matt 
    222  1.1  matt 	for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) {
    223  1.1  matt 		ipf = LIST_FIRST(&ipflows[idx]);
    224  1.1  matt 		while (ipf != NULL) {
    225  1.1  matt 			/*
    226  1.1  matt 			 * If this no longer points to a valid route
    227  1.1  matt 			 * reclaim it.
    228  1.1  matt 			 */
    229  1.1  matt 			if ((ipf->ipf_ro.ro_rt->rt_flags & RTF_UP) == 0)
    230  1.1  matt 				goto done;
    231  1.1  matt 			/*
    232  1.1  matt 			 * choose the one that's been least recently used
    233  1.1  matt 			 * or has had the least uses in the last 1.5
    234  1.1  matt 			 * intervals.
    235  1.1  matt 			 */
    236  1.1  matt 			if (ipf == NULL
    237  1.1  matt 			    || ipf->ipf_timer < maybe_ipf->ipf_timer
    238  1.1  matt 			    || (ipf->ipf_timer == maybe_ipf->ipf_timer
    239  1.1  matt 				&& ipf->ipf_last_uses + ipf->ipf_uses <
    240  1.1  matt 				      maybe_ipf->ipf_last_uses +
    241  1.1  matt 					maybe_ipf->ipf_uses))
    242  1.1  matt 				maybe_ipf = ipf;
    243  1.1  matt 			ipf = LIST_NEXT(ipf, ipf_next);
    244  1.1  matt 		}
    245  1.1  matt 	}
    246  1.1  matt 	ipf = maybe_ipf;
    247  1.1  matt     done:
    248  1.1  matt 	/*
    249  1.1  matt 	 * Remove the entry from the flow table.
    250  1.1  matt 	 */
    251  1.1  matt 	s = splimp();
    252  1.1  matt 	LIST_REMOVE(ipf, ipf_next);
    253  1.1  matt 	splx(s);
    254  1.1  matt 	ipflow_addstats(ipf);
    255  1.1  matt 	RTFREE(ipf->ipf_ro.ro_rt);
    256  1.1  matt 	return ipf;
    257  1.1  matt }
    258  1.1  matt 
    259  1.1  matt void
    260  1.1  matt ipflow_slowtimo(
    261  1.1  matt 	void)
    262  1.1  matt {
    263  1.1  matt 	struct ipflow *ipf;
    264  1.1  matt 	int idx;
    265  1.1  matt 
    266  1.1  matt 	for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) {
    267  1.1  matt 		ipf = LIST_FIRST(&ipflows[idx]);
    268  1.1  matt 		while (ipf != NULL) {
    269  1.1  matt 			struct ipflow *next_ipf = LIST_NEXT(ipf, ipf_next);
    270  1.1  matt 			if (--ipf->ipf_timer == 0) {
    271  1.1  matt 				ipflow_free(ipf);
    272  1.1  matt 			} else {
    273  1.1  matt 				ipf->ipf_last_uses = ipf->ipf_uses;
    274  1.1  matt 				ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses;
    275  1.1  matt 				ipstat.ips_forward += ipf->ipf_uses;
    276  1.1  matt 				ipstat.ips_fastforward += ipf->ipf_uses;
    277  1.1  matt 				ipf->ipf_uses = 0;
    278  1.1  matt 			}
    279  1.1  matt 			ipf = next_ipf;
    280  1.1  matt 		}
    281  1.1  matt 	}
    282  1.1  matt }
    283  1.1  matt 
    284  1.1  matt void
    285  1.1  matt ipflow_create(
    286  1.1  matt 	const struct route *ro,
    287  1.1  matt 	struct mbuf *m)
    288  1.1  matt {
    289  1.1  matt 	const struct ip *const ip = mtod(m, struct ip *);
    290  1.1  matt 	struct ipflow *ipf;
    291  1.1  matt 	unsigned hash;
    292  1.1  matt 	int s;
    293  1.1  matt 
    294  1.1  matt 	/*
    295  1.1  matt 	 * Don't create cache entries for ICMP messages.
    296  1.1  matt 	 */
    297  1.1  matt 	if (!ipflow_active || ip->ip_p == IPPROTO_ICMP)
    298  1.1  matt 		return;
    299  1.1  matt 	/*
    300  1.1  matt 	 * See if an existing flow struct exists.  If so remove it from it's
    301  1.1  matt 	 * list and free the old route.  If not, try to malloc a new one
    302  1.1  matt 	 * (if we aren't at our limit).
    303  1.1  matt 	 */
    304  1.1  matt 	ipf = ipflow_lookup(ip);
    305  1.1  matt 	if (ipf == NULL) {
    306  1.1  matt 		if (ipflow_inuse == IPFLOW_MAX) {
    307  1.1  matt 			ipf = ipflow_reap();
    308  1.1  matt 		} else {
    309  1.1  matt 			ipf = (struct ipflow *) malloc(sizeof(*ipf), M_IPFLOW,
    310  1.1  matt 						       M_NOWAIT);
    311  1.1  matt 			if (ipf == NULL)
    312  1.1  matt 				return;
    313  1.1  matt 			ipflow_inuse++;
    314  1.1  matt 		}
    315  1.1  matt 		bzero((caddr_t) ipf, sizeof(*ipf));
    316  1.1  matt 	} else {
    317  1.1  matt 		s = splimp();
    318  1.1  matt 		LIST_REMOVE(ipf, ipf_next);
    319  1.1  matt 		splx(s);
    320  1.1  matt 		ipflow_addstats(ipf);
    321  1.1  matt 		RTFREE(ipf->ipf_ro.ro_rt);
    322  1.1  matt 		ipf->ipf_uses = ipf->ipf_last_uses = 0;
    323  1.1  matt 		ipf->ipf_errors = ipf->ipf_dropped = 0;
    324  1.1  matt 	}
    325  1.1  matt 
    326  1.1  matt 	/*
    327  1.1  matt 	 * Fill in the updated information.
    328  1.1  matt 	 */
    329  1.1  matt 	ipf->ipf_ro = *ro;
    330  1.1  matt 	ro->ro_rt->rt_refcnt++;
    331  1.1  matt 	ipf->ipf_dst = ip->ip_dst;
    332  1.1  matt 	ipf->ipf_src = ip->ip_src;
    333  1.1  matt 	ipf->ipf_tos = ip->ip_tos;
    334  1.1  matt 	ipf->ipf_timer = IPFLOW_TIMER;
    335  1.1  matt 	ipf->ipf_start = time.tv_sec;
    336  1.1  matt 	/*
    337  1.1  matt 	 * Insert into the approriate bucket of the flow table.
    338  1.1  matt 	 */
    339  1.1  matt 	hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos);
    340  1.1  matt 	s = splimp();
    341  1.1  matt 	LIST_INSERT_HEAD(&ipflows[hash], ipf, ipf_next);
    342  1.1  matt 	splx(s);
    343            }
    344