Home | History | Annotate | Line # | Download | only in netinet6
ip6_flow.c revision 1.40.16.1
      1  1.40.16.1   thorpej /*	$NetBSD: ip6_flow.c,v 1.40.16.1 2021/04/03 22:29:02 thorpej Exp $	*/
      2        1.1  liamjfoy 
      3       1.39      maxv /*
      4        1.1  liamjfoy  * Copyright (c) 2007 The NetBSD Foundation, Inc.
      5        1.1  liamjfoy  * All rights reserved.
      6        1.1  liamjfoy  *
      7        1.1  liamjfoy  * This code is derived from software contributed to The NetBSD Foundation
      8        1.1  liamjfoy  * by the 3am Software Foundry ("3am").  It was developed by Liam J. Foy
      9        1.1  liamjfoy  * <liamjfoy (at) netbsd.org> and Matt Thomas <matt (at) netbsd.org>.
     10        1.1  liamjfoy  *
     11        1.1  liamjfoy  * Redistribution and use in source and binary forms, with or without
     12        1.1  liamjfoy  * modification, are permitted provided that the following conditions
     13        1.1  liamjfoy  * are met:
     14        1.1  liamjfoy  * 1. Redistributions of source code must retain the above copyright
     15        1.1  liamjfoy  *    notice, this list of conditions and the following disclaimer.
     16        1.1  liamjfoy  * 2. Redistributions in binary form must reproduce the above copyright
     17        1.1  liamjfoy  *    notice, this list of conditions and the following disclaimer in the
     18        1.1  liamjfoy  *    documentation and/or other materials provided with the distribution.
     19        1.1  liamjfoy  *
     20        1.1  liamjfoy  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     21        1.1  liamjfoy  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     22        1.1  liamjfoy  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     23        1.1  liamjfoy  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     24        1.1  liamjfoy  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25        1.1  liamjfoy  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26        1.1  liamjfoy  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27        1.1  liamjfoy  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28        1.1  liamjfoy  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29        1.1  liamjfoy  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30        1.1  liamjfoy  * POSSIBILITY OF SUCH DAMAGE.
     31        1.1  liamjfoy  *
     32        1.1  liamjfoy  * IPv6 version was developed by Liam J. Foy. Original source existed in IPv4
     33        1.1  liamjfoy  * format developed by Matt Thomas. Thanks to Joerg Sonnenberger, Matt
     34       1.39      maxv  * Thomas and Christos Zoulas.
     35        1.1  liamjfoy  *
     36        1.1  liamjfoy  * Thanks to Liverpool John Moores University, especially Dr. David Llewellyn-Jones
     37        1.1  liamjfoy  * for providing resources (to test) and Professor Madjid Merabti.
     38        1.1  liamjfoy  */
     39        1.1  liamjfoy 
     40        1.1  liamjfoy #include <sys/cdefs.h>
     41  1.40.16.1   thorpej __KERNEL_RCSID(0, "$NetBSD: ip6_flow.c,v 1.40.16.1 2021/04/03 22:29:02 thorpej Exp $");
     42       1.32     ozaki 
     43       1.32     ozaki #ifdef _KERNEL_OPT
     44       1.32     ozaki #include "opt_net_mpsafe.h"
     45       1.32     ozaki #endif
     46        1.1  liamjfoy 
     47        1.1  liamjfoy #include <sys/param.h>
     48        1.1  liamjfoy #include <sys/systm.h>
     49        1.1  liamjfoy #include <sys/malloc.h>
     50        1.1  liamjfoy #include <sys/mbuf.h>
     51        1.1  liamjfoy #include <sys/socketvar.h>
     52        1.1  liamjfoy #include <sys/time.h>
     53        1.1  liamjfoy #include <sys/kernel.h>
     54        1.1  liamjfoy #include <sys/pool.h>
     55        1.1  liamjfoy #include <sys/sysctl.h>
     56       1.28     ozaki #include <sys/workqueue.h>
     57       1.29     ozaki #include <sys/atomic.h>
     58        1.1  liamjfoy 
     59        1.1  liamjfoy #include <net/if.h>
     60        1.1  liamjfoy #include <net/if_dl.h>
     61        1.1  liamjfoy #include <net/route.h>
     62        1.1  liamjfoy #include <net/pfil.h>
     63        1.1  liamjfoy 
     64        1.1  liamjfoy #include <netinet/in.h>
     65        1.1  liamjfoy #include <netinet6/in6_var.h>
     66        1.1  liamjfoy #include <netinet/in_systm.h>
     67        1.1  liamjfoy #include <netinet/ip6.h>
     68        1.1  liamjfoy #include <netinet6/ip6_var.h>
     69       1.15   thorpej #include <netinet6/ip6_private.h>
     70        1.1  liamjfoy 
     71        1.1  liamjfoy /*
     72        1.1  liamjfoy  * IPv6 Fast Forward caches/hashes flows from one source to destination.
     73        1.1  liamjfoy  *
     74        1.1  liamjfoy  * Upon a successful forward IPv6FF caches and hashes details such as the
     75        1.1  liamjfoy  * route, source and destination. Once another packet is received matching
     76        1.1  liamjfoy  * the source and destination the packet is forwarded straight onto if_output
     77        1.1  liamjfoy  * using the cached details.
     78        1.1  liamjfoy  *
     79        1.1  liamjfoy  * Example:
     80       1.20  christos  * ether/fddi_input -> ip6flow_fastforward -> if_output
     81        1.1  liamjfoy  */
     82        1.1  liamjfoy 
     83       1.18  liamjfoy static struct pool ip6flow_pool;
     84        1.1  liamjfoy 
     85       1.31  knakahar TAILQ_HEAD(ip6flowhead, ip6flow);
     86        1.1  liamjfoy 
     87        1.1  liamjfoy /*
     88        1.1  liamjfoy  * We could use IPv4 defines (IPFLOW_HASHBITS) but we'll
     89        1.1  liamjfoy  * use our own (possibly for future expansion).
     90        1.1  liamjfoy  */
     91        1.1  liamjfoy #define	IP6FLOW_TIMER		(5 * PR_SLOWHZ)
     92       1.39      maxv #define	IP6FLOW_DEFAULT_HASHSIZE	(1 << IP6FLOW_HASHBITS)
     93        1.1  liamjfoy 
     94       1.25  knakahar /*
     95       1.25  knakahar  * ip6_flow.c internal lock.
     96       1.25  knakahar  * If we use softnet_lock, it would cause recursive lock.
     97       1.25  knakahar  *
     98       1.25  knakahar  * This is a tentative workaround.
     99       1.25  knakahar  * We should make it scalable somehow in the future.
    100       1.25  knakahar  */
    101       1.39      maxv static kmutex_t ip6flow_lock __cacheline_aligned;
    102        1.4  liamjfoy static struct ip6flowhead *ip6flowtable = NULL;
    103        1.1  liamjfoy static struct ip6flowhead ip6flowlist;
    104       1.39      maxv static int ip6flow_inuse __cacheline_aligned;
    105        1.1  liamjfoy 
    106       1.28     ozaki static void ip6flow_slowtimo_work(struct work *, void *);
    107       1.28     ozaki static struct workqueue	*ip6flow_slowtimo_wq;
    108       1.28     ozaki static struct work	ip6flow_slowtimo_wk;
    109       1.28     ozaki 
    110       1.30  knakahar static int sysctl_net_inet6_ip6_hashsize(SYSCTLFN_PROTO);
    111       1.30  knakahar static int sysctl_net_inet6_ip6_maxflows(SYSCTLFN_PROTO);
    112       1.30  knakahar static void ip6flow_sysctl_init(struct sysctllog **);
    113       1.30  knakahar 
    114        1.1  liamjfoy /*
    115        1.1  liamjfoy  * Insert an ip6flow into the list.
    116        1.1  liamjfoy  */
    117       1.31  knakahar #define	IP6FLOW_INSERT(hashidx, ip6f) \
    118        1.1  liamjfoy do { \
    119       1.31  knakahar 	(ip6f)->ip6f_hashidx = (hashidx); \
    120       1.31  knakahar 	TAILQ_INSERT_HEAD(&ip6flowtable[(hashidx)], (ip6f), ip6f_hash); \
    121       1.31  knakahar 	TAILQ_INSERT_HEAD(&ip6flowlist, (ip6f), ip6f_list); \
    122        1.1  liamjfoy } while (/*CONSTCOND*/ 0)
    123        1.1  liamjfoy 
    124        1.1  liamjfoy /*
    125        1.1  liamjfoy  * Remove an ip6flow from the list.
    126        1.1  liamjfoy  */
    127       1.31  knakahar #define	IP6FLOW_REMOVE(hashidx, ip6f) \
    128        1.1  liamjfoy do { \
    129       1.31  knakahar 	TAILQ_REMOVE(&ip6flowtable[(hashidx)], (ip6f), ip6f_hash); \
    130       1.31  knakahar 	TAILQ_REMOVE(&ip6flowlist, (ip6f), ip6f_list); \
    131        1.1  liamjfoy } while (/*CONSTCOND*/ 0)
    132        1.1  liamjfoy 
    133        1.1  liamjfoy #ifndef IP6FLOW_DEFAULT
    134        1.1  liamjfoy #define	IP6FLOW_DEFAULT		256
    135        1.1  liamjfoy #endif
    136        1.1  liamjfoy 
    137        1.1  liamjfoy int ip6_maxflows = IP6FLOW_DEFAULT;
    138        1.4  liamjfoy int ip6_hashsize = IP6FLOW_DEFAULT_HASHSIZE;
    139        1.1  liamjfoy 
    140        1.1  liamjfoy /*
    141        1.1  liamjfoy  * Calculate hash table position.
    142        1.1  liamjfoy  */
    143       1.39      maxv static size_t
    144       1.13    dyoung ip6flow_hash(const struct ip6_hdr *ip6)
    145        1.1  liamjfoy {
    146        1.1  liamjfoy 	size_t hash;
    147        1.1  liamjfoy 	uint32_t dst_sum, src_sum;
    148        1.6  liamjfoy 	size_t idx;
    149        1.1  liamjfoy 
    150        1.1  liamjfoy 	src_sum = ip6->ip6_src.s6_addr32[0] + ip6->ip6_src.s6_addr32[1]
    151        1.1  liamjfoy 	    + ip6->ip6_src.s6_addr32[2] + ip6->ip6_src.s6_addr32[3];
    152        1.1  liamjfoy 	dst_sum = ip6->ip6_dst.s6_addr32[0] + ip6->ip6_dst.s6_addr32[1]
    153        1.1  liamjfoy 	    + ip6->ip6_dst.s6_addr32[2] + ip6->ip6_dst.s6_addr32[3];
    154        1.1  liamjfoy 
    155        1.1  liamjfoy 	hash = ip6->ip6_flow;
    156        1.1  liamjfoy 
    157        1.1  liamjfoy 	for (idx = 0; idx < 32; idx += IP6FLOW_HASHBITS)
    158        1.1  liamjfoy 		hash += (dst_sum >> (32 - idx)) + (src_sum >> idx);
    159        1.1  liamjfoy 
    160        1.4  liamjfoy 	return hash & (ip6_hashsize-1);
    161        1.1  liamjfoy }
    162        1.1  liamjfoy 
    163        1.1  liamjfoy /*
    164        1.1  liamjfoy  * Check to see if a flow already exists - if so return it.
    165        1.1  liamjfoy  */
    166        1.1  liamjfoy static struct ip6flow *
    167       1.13    dyoung ip6flow_lookup(const struct ip6_hdr *ip6)
    168        1.1  liamjfoy {
    169        1.1  liamjfoy 	size_t hash;
    170        1.1  liamjfoy 	struct ip6flow *ip6f;
    171        1.1  liamjfoy 
    172       1.25  knakahar 	KASSERT(mutex_owned(&ip6flow_lock));
    173       1.25  knakahar 
    174        1.1  liamjfoy 	hash = ip6flow_hash(ip6);
    175        1.1  liamjfoy 
    176       1.31  knakahar 	TAILQ_FOREACH(ip6f, &ip6flowtable[hash], ip6f_hash) {
    177        1.1  liamjfoy 		if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6f->ip6f_dst)
    178        1.1  liamjfoy 		    && IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &ip6f->ip6f_src)
    179        1.1  liamjfoy 		    && ip6f->ip6f_flow == ip6->ip6_flow) {
    180       1.39      maxv 			/* A cached flow has been found. */
    181        1.1  liamjfoy 			return ip6f;
    182        1.1  liamjfoy 		}
    183        1.1  liamjfoy 	}
    184        1.1  liamjfoy 
    185        1.1  liamjfoy 	return NULL;
    186        1.1  liamjfoy }
    187        1.1  liamjfoy 
    188       1.18  liamjfoy void
    189       1.18  liamjfoy ip6flow_poolinit(void)
    190       1.18  liamjfoy {
    191       1.18  liamjfoy 
    192       1.18  liamjfoy 	pool_init(&ip6flow_pool, sizeof(struct ip6flow), 0, 0, 0, "ip6flowpl",
    193       1.18  liamjfoy 			NULL, IPL_NET);
    194       1.18  liamjfoy }
    195       1.18  liamjfoy 
    196        1.1  liamjfoy /*
    197        1.4  liamjfoy  * Allocate memory and initialise lists. This function is called
    198        1.4  liamjfoy  * from ip6_init and called there after to resize the hash table.
    199        1.4  liamjfoy  * If a newly sized table cannot be malloc'ed we just continue
    200        1.4  liamjfoy  * to use the old one.
    201        1.1  liamjfoy  */
    202       1.25  knakahar static int
    203       1.25  knakahar ip6flow_init_locked(int table_size)
    204        1.1  liamjfoy {
    205        1.4  liamjfoy 	struct ip6flowhead *new_table;
    206        1.1  liamjfoy 	size_t i;
    207        1.1  liamjfoy 
    208       1.25  knakahar 	KASSERT(mutex_owned(&ip6flow_lock));
    209       1.25  knakahar 
    210        1.4  liamjfoy 	new_table = (struct ip6flowhead *)malloc(sizeof(struct ip6flowhead) *
    211        1.4  liamjfoy 	    table_size, M_RTABLE, M_NOWAIT);
    212        1.4  liamjfoy 
    213        1.4  liamjfoy 	if (new_table == NULL)
    214        1.4  liamjfoy 		return 1;
    215        1.4  liamjfoy 
    216        1.4  liamjfoy 	if (ip6flowtable != NULL)
    217        1.4  liamjfoy 		free(ip6flowtable, M_RTABLE);
    218        1.4  liamjfoy 
    219        1.4  liamjfoy 	ip6flowtable = new_table;
    220        1.4  liamjfoy 	ip6_hashsize = table_size;
    221        1.4  liamjfoy 
    222       1.31  knakahar 	TAILQ_INIT(&ip6flowlist);
    223        1.4  liamjfoy 	for (i = 0; i < ip6_hashsize; i++)
    224       1.31  knakahar 		TAILQ_INIT(&ip6flowtable[i]);
    225        1.4  liamjfoy 
    226        1.4  liamjfoy 	return 0;
    227        1.1  liamjfoy }
    228        1.1  liamjfoy 
    229       1.25  knakahar int
    230       1.25  knakahar ip6flow_init(int table_size)
    231       1.25  knakahar {
    232       1.28     ozaki 	int ret, error;
    233       1.28     ozaki 
    234       1.40     ozaki 	error = workqueue_create(&ip6flow_slowtimo_wq, "ip6flow",
    235       1.28     ozaki 	    ip6flow_slowtimo_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
    236       1.28     ozaki 	if (error != 0)
    237       1.28     ozaki 		panic("%s: workqueue_create failed (%d)\n", __func__, error);
    238       1.25  knakahar 
    239       1.25  knakahar 	mutex_init(&ip6flow_lock, MUTEX_DEFAULT, IPL_NONE);
    240       1.25  knakahar 
    241       1.25  knakahar 	mutex_enter(&ip6flow_lock);
    242       1.25  knakahar 	ret = ip6flow_init_locked(table_size);
    243       1.25  knakahar 	mutex_exit(&ip6flow_lock);
    244       1.30  knakahar 	ip6flow_sysctl_init(NULL);
    245       1.25  knakahar 
    246       1.25  knakahar 	return ret;
    247       1.25  knakahar }
    248       1.25  knakahar 
    249        1.1  liamjfoy /*
    250        1.1  liamjfoy  * IPv6 Fast Forward routine. Attempt to forward the packet -
    251       1.39      maxv  * if any problems are found return to the main IPv6 input
    252        1.1  liamjfoy  * routine to deal with.
    253        1.1  liamjfoy  */
    254        1.1  liamjfoy int
    255       1.20  christos ip6flow_fastforward(struct mbuf **mp)
    256        1.1  liamjfoy {
    257        1.1  liamjfoy 	struct ip6flow *ip6f;
    258        1.1  liamjfoy 	struct ip6_hdr *ip6;
    259       1.33     ozaki 	struct rtentry *rt = NULL;
    260       1.20  christos 	struct mbuf *m;
    261        1.7    dyoung 	const struct sockaddr *dst;
    262        1.1  liamjfoy 	int error;
    263       1.25  knakahar 	int ret = 0;
    264       1.25  knakahar 
    265       1.25  knakahar 	mutex_enter(&ip6flow_lock);
    266        1.1  liamjfoy 
    267        1.1  liamjfoy 	/*
    268        1.1  liamjfoy 	 * Are we forwarding packets and have flows?
    269        1.1  liamjfoy 	 */
    270        1.1  liamjfoy 	if (!ip6_forwarding || ip6flow_inuse == 0)
    271       1.25  knakahar 		goto out;
    272        1.1  liamjfoy 
    273       1.20  christos 	m = *mp;
    274        1.1  liamjfoy 	/*
    275        1.1  liamjfoy 	 * At least size of IPv6 Header?
    276        1.1  liamjfoy 	 */
    277        1.1  liamjfoy 	if (m->m_len < sizeof(struct ip6_hdr))
    278       1.25  knakahar 		goto out;
    279        1.1  liamjfoy 	/*
    280        1.1  liamjfoy 	 * Was packet received as a link-level multicast or broadcast?
    281        1.1  liamjfoy 	 * If so, don't try to fast forward.
    282        1.1  liamjfoy 	 */
    283        1.1  liamjfoy 	if ((m->m_flags & (M_BCAST|M_MCAST)) != 0)
    284       1.25  knakahar 		goto out;
    285        1.1  liamjfoy 
    286  1.40.16.1   thorpej 	if (ACCESSIBLE_POINTER(mtod(m, const void *), struct ip6_hdr) == 0) {
    287        1.1  liamjfoy 		if ((m = m_copyup(m, sizeof(struct ip6_hdr),
    288       1.36      maxv 		    (max_linkhdr + 3) & ~3)) == NULL) {
    289       1.36      maxv 			ret = 1;
    290       1.25  knakahar 			goto out;
    291        1.1  liamjfoy 		}
    292       1.20  christos 		*mp = m;
    293        1.1  liamjfoy 	}
    294        1.1  liamjfoy 
    295        1.1  liamjfoy 	ip6 = mtod(m, struct ip6_hdr *);
    296        1.1  liamjfoy 
    297        1.1  liamjfoy 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
    298        1.1  liamjfoy 		/* Bad version. */
    299       1.25  knakahar 		goto out;
    300        1.1  liamjfoy 	}
    301        1.1  liamjfoy 
    302        1.1  liamjfoy 	/*
    303        1.1  liamjfoy 	 * If we have a hop-by-hop extension we must process it.
    304       1.39      maxv 	 * We just leave this up to ip6_input to deal with.
    305        1.1  liamjfoy 	 */
    306        1.1  liamjfoy 	if (ip6->ip6_nxt == IPPROTO_HOPOPTS)
    307       1.25  knakahar 		goto out;
    308        1.1  liamjfoy 
    309        1.1  liamjfoy 	/*
    310        1.1  liamjfoy 	 * Attempt to find a flow.
    311        1.1  liamjfoy 	 */
    312        1.1  liamjfoy 	if ((ip6f = ip6flow_lookup(ip6)) == NULL) {
    313        1.1  liamjfoy 		/* No flow found. */
    314       1.25  knakahar 		goto out;
    315        1.1  liamjfoy 	}
    316        1.1  liamjfoy 
    317        1.1  liamjfoy 	/*
    318        1.1  liamjfoy 	 * Route and interface still up?
    319        1.1  liamjfoy 	 */
    320       1.12    dyoung 	if ((rt = rtcache_validate(&ip6f->ip6f_ro)) == NULL ||
    321       1.24       roy 	    (rt->rt_ifp->if_flags & IFF_UP) == 0 ||
    322       1.24       roy 	    (rt->rt_flags & RTF_BLACKHOLE) != 0)
    323       1.33     ozaki 		goto out_unref;
    324        1.1  liamjfoy 
    325        1.1  liamjfoy 	/*
    326        1.1  liamjfoy 	 * Packet size greater than MTU?
    327        1.1  liamjfoy 	 */
    328        1.1  liamjfoy 	if (m->m_pkthdr.len > rt->rt_ifp->if_mtu) {
    329        1.1  liamjfoy 		/* Return to main IPv6 input function. */
    330       1.33     ozaki 		goto out_unref;
    331        1.1  liamjfoy 	}
    332        1.1  liamjfoy 
    333       1.21   msaitoh 	/*
    334       1.21   msaitoh 	 * Clear any in-bound checksum flags for this packet.
    335       1.21   msaitoh 	 */
    336       1.21   msaitoh 	m->m_pkthdr.csum_flags = 0;
    337       1.21   msaitoh 
    338        1.1  liamjfoy 	if (ip6->ip6_hlim <= IPV6_HLIMDEC)
    339       1.33     ozaki 		goto out_unref;
    340        1.1  liamjfoy 
    341        1.1  liamjfoy 	/* Decrement hop limit (same as TTL) */
    342        1.1  liamjfoy 	ip6->ip6_hlim -= IPV6_HLIMDEC;
    343        1.1  liamjfoy 
    344        1.1  liamjfoy 	if (rt->rt_flags & RTF_GATEWAY)
    345        1.7    dyoung 		dst = rt->rt_gateway;
    346        1.1  liamjfoy 	else
    347        1.7    dyoung 		dst = rtcache_getdst(&ip6f->ip6f_ro);
    348        1.1  liamjfoy 
    349        1.1  liamjfoy 	PRT_SLOW_ARM(ip6f->ip6f_timer, IP6FLOW_TIMER);
    350        1.1  liamjfoy 
    351        1.1  liamjfoy 	ip6f->ip6f_uses++;
    352        1.1  liamjfoy 
    353       1.31  knakahar #if 0
    354       1.31  knakahar 	/*
    355       1.31  knakahar 	 * We use FIFO cache replacement instead of LRU the same ip_flow.c.
    356       1.31  knakahar 	 */
    357       1.31  knakahar 	/* move to head (LRU) for ip6flowlist. ip6flowtable does not care LRU. */
    358       1.31  knakahar 	TAILQ_REMOVE(&ip6flowlist, ip6f, ip6f_list);
    359       1.31  knakahar 	TAILQ_INSERT_HEAD(&ip6flowlist, ip6f, ip6f_list);
    360       1.31  knakahar #endif
    361       1.31  knakahar 
    362        1.1  liamjfoy 	/* Send on its way - straight to the interface output routine. */
    363       1.27  knakahar 	if ((error = if_output_lock(rt->rt_ifp, rt->rt_ifp, m, dst, rt)) != 0) {
    364        1.1  liamjfoy 		ip6f->ip6f_dropped++;
    365        1.1  liamjfoy 	} else {
    366        1.1  liamjfoy 		ip6f->ip6f_forwarded++;
    367        1.1  liamjfoy 	}
    368       1.25  knakahar 	ret = 1;
    369       1.33     ozaki out_unref:
    370       1.33     ozaki 	rtcache_unref(rt, &ip6f->ip6f_ro);
    371       1.33     ozaki out:
    372       1.25  knakahar 	mutex_exit(&ip6flow_lock);
    373       1.25  knakahar 	return ret;
    374        1.1  liamjfoy }
    375        1.1  liamjfoy 
    376        1.1  liamjfoy /*
    377        1.1  liamjfoy  * Add the IPv6 flow statistics to the main IPv6 statistics.
    378        1.1  liamjfoy  */
    379        1.1  liamjfoy static void
    380       1.33     ozaki ip6flow_addstats_rt(struct rtentry *rt, struct ip6flow *ip6f)
    381        1.1  liamjfoy {
    382       1.15   thorpej 	uint64_t *ip6s;
    383       1.11    dyoung 
    384       1.33     ozaki 	if (rt != NULL)
    385       1.11    dyoung 		rt->rt_use += ip6f->ip6f_uses;
    386       1.15   thorpej 	ip6s = IP6_STAT_GETREF();
    387       1.15   thorpej 	ip6s[IP6_STAT_FASTFORWARDFLOWS] = ip6flow_inuse;
    388       1.15   thorpej 	ip6s[IP6_STAT_CANTFORWARD] += ip6f->ip6f_dropped;
    389       1.15   thorpej 	ip6s[IP6_STAT_ODROPPED] += ip6f->ip6f_dropped;
    390       1.15   thorpej 	ip6s[IP6_STAT_TOTAL] += ip6f->ip6f_uses;
    391       1.15   thorpej 	ip6s[IP6_STAT_FORWARD] += ip6f->ip6f_forwarded;
    392       1.15   thorpej 	ip6s[IP6_STAT_FASTFORWARD] += ip6f->ip6f_forwarded;
    393       1.15   thorpej 	IP6_STAT_PUTREF();
    394        1.1  liamjfoy }
    395        1.1  liamjfoy 
    396       1.33     ozaki static void
    397       1.33     ozaki ip6flow_addstats(struct ip6flow *ip6f)
    398       1.33     ozaki {
    399       1.33     ozaki 	struct rtentry *rt;
    400       1.33     ozaki 
    401       1.33     ozaki 	rt = rtcache_validate(&ip6f->ip6f_ro);
    402       1.33     ozaki 	ip6flow_addstats_rt(rt, ip6f);
    403       1.33     ozaki 	rtcache_unref(rt, &ip6f->ip6f_ro);
    404       1.33     ozaki }
    405       1.33     ozaki 
    406        1.1  liamjfoy /*
    407        1.1  liamjfoy  * Add statistics and free the flow.
    408        1.1  liamjfoy  */
    409        1.1  liamjfoy static void
    410        1.1  liamjfoy ip6flow_free(struct ip6flow *ip6f)
    411        1.1  liamjfoy {
    412        1.1  liamjfoy 
    413       1.25  knakahar 	KASSERT(mutex_owned(&ip6flow_lock));
    414       1.25  knakahar 
    415        1.1  liamjfoy 	/*
    416        1.1  liamjfoy 	 * Remove the flow from the hash table (at elevated IPL).
    417        1.1  liamjfoy 	 * Once it's off the list, we can deal with it at normal
    418        1.1  liamjfoy 	 * network IPL.
    419        1.1  liamjfoy 	 */
    420       1.31  knakahar 	IP6FLOW_REMOVE(ip6f->ip6f_hashidx, ip6f);
    421       1.26  knakahar 
    422        1.1  liamjfoy 	ip6flow_inuse--;
    423        1.1  liamjfoy 	ip6flow_addstats(ip6f);
    424        1.7    dyoung 	rtcache_free(&ip6f->ip6f_ro);
    425        1.1  liamjfoy 	pool_put(&ip6flow_pool, ip6f);
    426        1.1  liamjfoy }
    427        1.1  liamjfoy 
    428       1.25  knakahar static struct ip6flow *
    429       1.25  knakahar ip6flow_reap_locked(int just_one)
    430        1.1  liamjfoy {
    431       1.31  knakahar 	struct ip6flow *ip6f;
    432       1.25  knakahar 
    433       1.25  knakahar 	KASSERT(mutex_owned(&ip6flow_lock));
    434       1.25  knakahar 
    435       1.31  knakahar 	/*
    436       1.31  knakahar 	 * This case must remove one ip6flow. Furthermore, this case is used in
    437       1.31  knakahar 	 * fast path(packet processing path). So, simply remove TAILQ_LAST one.
    438       1.31  knakahar 	 */
    439       1.31  knakahar 	if (just_one) {
    440       1.31  knakahar 		ip6f = TAILQ_LAST(&ip6flowlist, ip6flowhead);
    441       1.31  knakahar 		KASSERT(ip6f != NULL);
    442       1.31  knakahar 
    443       1.31  knakahar 		IP6FLOW_REMOVE(ip6f->ip6f_hashidx, ip6f);
    444       1.31  knakahar 
    445       1.31  knakahar 		ip6flow_addstats(ip6f);
    446       1.31  knakahar 		rtcache_free(&ip6f->ip6f_ro);
    447       1.31  knakahar 		return ip6f;
    448       1.31  knakahar 	}
    449        1.1  liamjfoy 
    450       1.31  knakahar 	/*
    451       1.31  knakahar 	 * This case is used in slow path(sysctl).
    452       1.31  knakahar 	 * At first, remove invalid rtcache ip6flow, and then remove TAILQ_LAST
    453       1.31  knakahar 	 * ip6flow if it is ensured least recently used by comparing last_uses.
    454       1.31  knakahar 	 */
    455       1.31  knakahar 	while (ip6flow_inuse > ip6_maxflows) {
    456       1.31  knakahar 		struct ip6flow *maybe_ip6f = TAILQ_LAST(&ip6flowlist, ip6flowhead);
    457       1.31  knakahar 
    458       1.31  knakahar 		TAILQ_FOREACH(ip6f, &ip6flowlist, ip6f_list) {
    459       1.33     ozaki 			struct rtentry *rt;
    460        1.1  liamjfoy 			/*
    461        1.1  liamjfoy 			 * If this no longer points to a valid route -
    462        1.1  liamjfoy 			 * reclaim it.
    463        1.1  liamjfoy 			 */
    464       1.33     ozaki 			if ((rt = rtcache_validate(&ip6f->ip6f_ro)) == NULL)
    465        1.1  liamjfoy 				goto done;
    466       1.33     ozaki 			rtcache_unref(rt, &ip6f->ip6f_ro);
    467        1.1  liamjfoy 			/*
    468        1.1  liamjfoy 			 * choose the one that's been least recently
    469        1.1  liamjfoy 			 * used or has had the least uses in the
    470        1.1  liamjfoy 			 * last 1.5 intervals.
    471        1.1  liamjfoy 			 */
    472       1.31  knakahar 			if (ip6f->ip6f_timer < maybe_ip6f->ip6f_timer
    473       1.31  knakahar 			    || ((ip6f->ip6f_timer == maybe_ip6f->ip6f_timer)
    474       1.31  knakahar 				&& (ip6f->ip6f_last_uses + ip6f->ip6f_uses
    475       1.31  knakahar 				    < maybe_ip6f->ip6f_last_uses + maybe_ip6f->ip6f_uses)))
    476        1.1  liamjfoy 				maybe_ip6f = ip6f;
    477        1.1  liamjfoy 		}
    478        1.1  liamjfoy 		ip6f = maybe_ip6f;
    479        1.1  liamjfoy 	    done:
    480        1.1  liamjfoy 		/*
    481        1.1  liamjfoy 		 * Remove the entry from the flow table
    482        1.1  liamjfoy 		 */
    483       1.31  knakahar 		IP6FLOW_REMOVE(ip6f->ip6f_hashidx, ip6f);
    484       1.26  knakahar 
    485        1.7    dyoung 		rtcache_free(&ip6f->ip6f_ro);
    486        1.1  liamjfoy 		ip6flow_inuse--;
    487        1.1  liamjfoy 		ip6flow_addstats(ip6f);
    488        1.1  liamjfoy 		pool_put(&ip6flow_pool, ip6f);
    489        1.1  liamjfoy 	}
    490        1.1  liamjfoy 	return NULL;
    491        1.1  liamjfoy }
    492        1.1  liamjfoy 
    493       1.25  knakahar /*
    494       1.25  knakahar  * Reap one or more flows - ip6flow_reap may remove
    495       1.39      maxv  * multiple flows if net.inet6.ip6.maxflows is reduced.
    496       1.25  knakahar  */
    497       1.25  knakahar struct ip6flow *
    498       1.25  knakahar ip6flow_reap(int just_one)
    499       1.25  knakahar {
    500       1.25  knakahar 	struct ip6flow *ip6f;
    501       1.25  knakahar 
    502       1.25  knakahar 	mutex_enter(&ip6flow_lock);
    503       1.25  knakahar 	ip6f = ip6flow_reap_locked(just_one);
    504       1.25  knakahar 	mutex_exit(&ip6flow_lock);
    505       1.25  knakahar 	return ip6f;
    506       1.25  knakahar }
    507       1.25  knakahar 
    508       1.29     ozaki static unsigned int ip6flow_work_enqueued = 0;
    509       1.28     ozaki 
    510        1.1  liamjfoy void
    511       1.28     ozaki ip6flow_slowtimo_work(struct work *wk, void *arg)
    512        1.1  liamjfoy {
    513        1.1  liamjfoy 	struct ip6flow *ip6f, *next_ip6f;
    514        1.1  liamjfoy 
    515       1.29     ozaki 	/* We can allow enqueuing another work at this point */
    516       1.29     ozaki 	atomic_swap_uint(&ip6flow_work_enqueued, 0);
    517       1.29     ozaki 
    518       1.35     ozaki 	SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
    519       1.25  knakahar 	mutex_enter(&ip6flow_lock);
    520       1.16        ad 
    521       1.31  knakahar 	for (ip6f = TAILQ_FIRST(&ip6flowlist); ip6f != NULL; ip6f = next_ip6f) {
    522       1.33     ozaki 		struct rtentry *rt = NULL;
    523       1.31  knakahar 		next_ip6f = TAILQ_NEXT(ip6f, ip6f_list);
    524        1.1  liamjfoy 		if (PRT_SLOW_ISEXPIRED(ip6f->ip6f_timer) ||
    525       1.33     ozaki 		    (rt = rtcache_validate(&ip6f->ip6f_ro)) == NULL) {
    526        1.1  liamjfoy 			ip6flow_free(ip6f);
    527        1.1  liamjfoy 		} else {
    528        1.1  liamjfoy 			ip6f->ip6f_last_uses = ip6f->ip6f_uses;
    529       1.33     ozaki 			ip6flow_addstats_rt(rt, ip6f);
    530        1.1  liamjfoy 			ip6f->ip6f_uses = 0;
    531        1.1  liamjfoy 			ip6f->ip6f_dropped = 0;
    532        1.1  liamjfoy 			ip6f->ip6f_forwarded = 0;
    533        1.1  liamjfoy 		}
    534       1.33     ozaki 		rtcache_unref(rt, &ip6f->ip6f_ro);
    535        1.1  liamjfoy 	}
    536       1.16        ad 
    537       1.32     ozaki 	mutex_exit(&ip6flow_lock);
    538       1.35     ozaki 	SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
    539        1.1  liamjfoy }
    540        1.1  liamjfoy 
    541       1.28     ozaki void
    542       1.28     ozaki ip6flow_slowtimo(void)
    543       1.28     ozaki {
    544       1.28     ozaki 
    545       1.28     ozaki 	/* Avoid enqueuing another work when one is already enqueued */
    546       1.29     ozaki 	if (atomic_swap_uint(&ip6flow_work_enqueued, 1) == 1)
    547       1.28     ozaki 		return;
    548       1.28     ozaki 
    549       1.28     ozaki 	workqueue_enqueue(ip6flow_slowtimo_wq, &ip6flow_slowtimo_wk, NULL);
    550       1.28     ozaki }
    551       1.28     ozaki 
    552        1.1  liamjfoy /*
    553        1.1  liamjfoy  * We have successfully forwarded a packet using the normal
    554        1.1  liamjfoy  * IPv6 stack. Now create/update a flow.
    555        1.1  liamjfoy  */
    556        1.1  liamjfoy void
    557       1.33     ozaki ip6flow_create(struct route *ro, struct mbuf *m)
    558        1.1  liamjfoy {
    559       1.13    dyoung 	const struct ip6_hdr *ip6;
    560        1.1  liamjfoy 	struct ip6flow *ip6f;
    561        1.1  liamjfoy 	size_t hash;
    562        1.1  liamjfoy 
    563       1.32     ozaki 	ip6 = mtod(m, const struct ip6_hdr *);
    564       1.32     ozaki 
    565       1.35     ozaki 	KERNEL_LOCK_UNLESS_NET_MPSAFE();
    566       1.25  knakahar 	mutex_enter(&ip6flow_lock);
    567       1.25  knakahar 
    568        1.1  liamjfoy 	/*
    569        1.1  liamjfoy 	 * If IPv6 Fast Forward is disabled, don't create a flow.
    570        1.1  liamjfoy 	 * It can be disabled by setting net.inet6.ip6.maxflows to 0.
    571        1.1  liamjfoy 	 *
    572        1.1  liamjfoy 	 * Don't create a flow for ICMPv6 messages.
    573        1.1  liamjfoy 	 */
    574       1.38  knakahar 	if (ip6_maxflows == 0 || ip6->ip6_nxt == IPPROTO_IPV6_ICMP)
    575       1.32     ozaki 		goto out;
    576       1.22     pooka 
    577        1.1  liamjfoy 	/*
    578        1.1  liamjfoy 	 * See if an existing flow exists.  If so:
    579        1.1  liamjfoy 	 *	- Remove the flow
    580        1.1  liamjfoy 	 *	- Add flow statistics
    581        1.1  liamjfoy 	 *	- Free the route
    582        1.1  liamjfoy 	 *	- Reset statistics
    583        1.1  liamjfoy 	 *
    584        1.1  liamjfoy 	 * If a flow doesn't exist allocate a new one if
    585        1.1  liamjfoy 	 * ip6_maxflows hasn't reached its limit. If it has
    586        1.1  liamjfoy 	 * been reached, reap some flows.
    587        1.1  liamjfoy 	 */
    588        1.1  liamjfoy 	ip6f = ip6flow_lookup(ip6);
    589        1.1  liamjfoy 	if (ip6f == NULL) {
    590        1.1  liamjfoy 		if (ip6flow_inuse >= ip6_maxflows) {
    591       1.25  knakahar 			ip6f = ip6flow_reap_locked(1);
    592        1.1  liamjfoy 		} else {
    593        1.1  liamjfoy 			ip6f = pool_get(&ip6flow_pool, PR_NOWAIT);
    594        1.1  liamjfoy 			if (ip6f == NULL)
    595       1.22     pooka 				goto out;
    596        1.1  liamjfoy 			ip6flow_inuse++;
    597        1.1  liamjfoy 		}
    598        1.1  liamjfoy 		memset(ip6f, 0, sizeof(*ip6f));
    599        1.1  liamjfoy 	} else {
    600       1.31  knakahar 		IP6FLOW_REMOVE(ip6f->ip6f_hashidx, ip6f);
    601       1.26  knakahar 
    602        1.1  liamjfoy 		ip6flow_addstats(ip6f);
    603        1.7    dyoung 		rtcache_free(&ip6f->ip6f_ro);
    604        1.1  liamjfoy 		ip6f->ip6f_uses = 0;
    605        1.1  liamjfoy 		ip6f->ip6f_last_uses = 0;
    606        1.1  liamjfoy 		ip6f->ip6f_dropped = 0;
    607        1.1  liamjfoy 		ip6f->ip6f_forwarded = 0;
    608        1.1  liamjfoy 	}
    609        1.1  liamjfoy 
    610        1.1  liamjfoy 	/*
    611        1.1  liamjfoy 	 * Fill in the updated/new details.
    612        1.1  liamjfoy 	 */
    613        1.7    dyoung 	rtcache_copy(&ip6f->ip6f_ro, ro);
    614        1.1  liamjfoy 	ip6f->ip6f_dst = ip6->ip6_dst;
    615        1.1  liamjfoy 	ip6f->ip6f_src = ip6->ip6_src;
    616        1.1  liamjfoy 	ip6f->ip6f_flow = ip6->ip6_flow;
    617        1.1  liamjfoy 	PRT_SLOW_ARM(ip6f->ip6f_timer, IP6FLOW_TIMER);
    618        1.1  liamjfoy 
    619        1.1  liamjfoy 	/*
    620       1.39      maxv 	 * Insert into the appropriate bucket of the flow table.
    621        1.1  liamjfoy 	 */
    622        1.1  liamjfoy 	hash = ip6flow_hash(ip6);
    623       1.31  knakahar 	IP6FLOW_INSERT(hash, ip6f);
    624       1.22     pooka 
    625       1.22     pooka  out:
    626       1.32     ozaki 	mutex_exit(&ip6flow_lock);
    627       1.35     ozaki 	KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
    628        1.1  liamjfoy }
    629        1.1  liamjfoy 
    630        1.1  liamjfoy /*
    631        1.4  liamjfoy  * Invalidate/remove all flows - if new_size is positive we
    632        1.4  liamjfoy  * resize the hash table.
    633        1.1  liamjfoy  */
    634        1.4  liamjfoy int
    635        1.4  liamjfoy ip6flow_invalidate_all(int new_size)
    636        1.1  liamjfoy {
    637        1.1  liamjfoy 	struct ip6flow *ip6f, *next_ip6f;
    638       1.26  knakahar 	int error;
    639        1.1  liamjfoy 
    640        1.4  liamjfoy 	error = 0;
    641       1.25  knakahar 
    642       1.25  knakahar 	mutex_enter(&ip6flow_lock);
    643       1.25  knakahar 
    644       1.31  knakahar 	for (ip6f = TAILQ_FIRST(&ip6flowlist); ip6f != NULL; ip6f = next_ip6f) {
    645       1.31  knakahar 		next_ip6f = TAILQ_NEXT(ip6f, ip6f_list);
    646        1.1  liamjfoy 		ip6flow_free(ip6f);
    647        1.1  liamjfoy 	}
    648        1.4  liamjfoy 
    649       1.39      maxv 	if (new_size)
    650       1.25  knakahar 		error = ip6flow_init_locked(new_size);
    651        1.4  liamjfoy 
    652       1.25  knakahar 	mutex_exit(&ip6flow_lock);
    653       1.25  knakahar 
    654        1.4  liamjfoy 	return error;
    655        1.1  liamjfoy }
    656       1.30  knakahar 
    657       1.30  knakahar /*
    658       1.30  knakahar  * sysctl helper routine for net.inet.ip6.maxflows. Since
    659       1.30  knakahar  * we could reduce this value, call ip6flow_reap();
    660       1.30  knakahar  */
    661       1.30  knakahar static int
    662       1.30  knakahar sysctl_net_inet6_ip6_maxflows(SYSCTLFN_ARGS)
    663       1.30  knakahar {
    664       1.30  knakahar 	int error;
    665       1.30  knakahar 
    666       1.30  knakahar 	error = sysctl_lookup(SYSCTLFN_CALL(rnode));
    667       1.30  knakahar 	if (error || newp == NULL)
    668       1.30  knakahar 		return (error);
    669       1.30  knakahar 
    670       1.35     ozaki 	SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
    671       1.30  knakahar 
    672       1.30  knakahar 	ip6flow_reap(0);
    673       1.30  knakahar 
    674       1.35     ozaki 	SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
    675       1.30  knakahar 
    676       1.30  knakahar 	return (0);
    677       1.30  knakahar }
    678       1.30  knakahar 
    679       1.30  knakahar static int
    680       1.30  knakahar sysctl_net_inet6_ip6_hashsize(SYSCTLFN_ARGS)
    681       1.30  knakahar {
    682       1.30  knakahar 	int error, tmp;
    683       1.30  knakahar 	struct sysctlnode node;
    684       1.30  knakahar 
    685       1.30  knakahar 	node = *rnode;
    686       1.30  knakahar 	tmp = ip6_hashsize;
    687       1.30  knakahar 	node.sysctl_data = &tmp;
    688       1.30  knakahar 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
    689       1.30  knakahar 	if (error || newp == NULL)
    690       1.30  knakahar 		return (error);
    691       1.30  knakahar 
    692       1.30  knakahar 	if ((tmp & (tmp - 1)) == 0 && tmp != 0) {
    693       1.30  knakahar 		/*
    694       1.30  knakahar 		 * Can only fail due to malloc()
    695       1.30  knakahar 		 */
    696       1.35     ozaki 		SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
    697       1.30  knakahar 		error = ip6flow_invalidate_all(tmp);
    698       1.35     ozaki 		SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
    699       1.30  knakahar 	} else {
    700       1.30  knakahar 		/*
    701       1.30  knakahar 		 * EINVAL if not a power of 2
    702       1.30  knakahar 		 */
    703       1.30  knakahar 		error = EINVAL;
    704       1.30  knakahar 	}
    705       1.30  knakahar 
    706       1.30  knakahar 	return error;
    707       1.30  knakahar }
    708       1.30  knakahar 
    709       1.30  knakahar static void
    710       1.30  knakahar ip6flow_sysctl_init(struct sysctllog **clog)
    711       1.30  knakahar {
    712       1.30  knakahar 
    713       1.30  knakahar 	sysctl_createv(clog, 0, NULL, NULL,
    714       1.30  knakahar 		       CTLFLAG_PERMANENT,
    715       1.30  knakahar 		       CTLTYPE_NODE, "inet6",
    716       1.30  knakahar 		       SYSCTL_DESCR("PF_INET6 related settings"),
    717       1.30  knakahar 		       NULL, 0, NULL, 0,
    718       1.30  knakahar 		       CTL_NET, PF_INET6, CTL_EOL);
    719       1.30  knakahar 	sysctl_createv(clog, 0, NULL, NULL,
    720       1.30  knakahar 		       CTLFLAG_PERMANENT,
    721       1.30  knakahar 		       CTLTYPE_NODE, "ip6",
    722       1.30  knakahar 		       SYSCTL_DESCR("IPv6 related settings"),
    723       1.30  knakahar 		       NULL, 0, NULL, 0,
    724       1.30  knakahar 		       CTL_NET, PF_INET6, IPPROTO_IPV6, CTL_EOL);
    725       1.30  knakahar 
    726       1.30  knakahar 	sysctl_createv(clog, 0, NULL, NULL,
    727       1.39      maxv 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
    728       1.39      maxv 		       CTLTYPE_INT, "maxflows",
    729       1.39      maxv 		       SYSCTL_DESCR("Number of flows for fast forwarding (IPv6)"),
    730       1.39      maxv 		       sysctl_net_inet6_ip6_maxflows, 0, &ip6_maxflows, 0,
    731       1.39      maxv 		       CTL_NET, PF_INET6, IPPROTO_IPV6,
    732       1.39      maxv 		       CTL_CREATE, CTL_EOL);
    733       1.30  knakahar 	sysctl_createv(clog, 0, NULL, NULL,
    734       1.39      maxv 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
    735       1.39      maxv 		       CTLTYPE_INT, "hashsize",
    736       1.39      maxv 		       SYSCTL_DESCR("Size of hash table for fast forwarding (IPv6)"),
    737       1.39      maxv 		       sysctl_net_inet6_ip6_hashsize, 0, &ip6_hashsize, 0,
    738       1.39      maxv 		       CTL_NET, PF_INET6, IPPROTO_IPV6,
    739       1.39      maxv 		       CTL_CREATE, CTL_EOL);
    740       1.30  knakahar }
    741