Home | History | Annotate | Line # | Download | only in netinet
      1 /*	$NetBSD: wqinput.c,v 1.8 2020/02/07 12:35:33 thorpej Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2017 Internet Initiative Japan Inc.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 #ifdef _KERNEL_OPT
     30 #include "opt_net_mpsafe.h"
     31 #endif
     32 
     33 #include <sys/param.h>
     34 #include <sys/kmem.h>
     35 #include <sys/mbuf.h>
     36 #include <sys/protosw.h>
     37 #include <sys/socketvar.h>
     38 #include <sys/syslog.h>
     39 #include <sys/workqueue.h>
     40 #include <sys/atomic.h>
     41 #include <sys/queue.h>
     42 #include <sys/percpu.h>
     43 #include <sys/sysctl.h>
     44 #include <sys/xcall.h>
     45 
     46 #include <net/if.h>
     47 #include <netinet/wqinput.h>
     48 
     49 #define WQINPUT_LIST_MAXLEN	IFQ_MAXLEN
     50 
     51 struct wqinput_work {
     52 	struct mbuf	*ww_mbuf;
     53 	int		ww_off;
     54 	int		ww_proto;
     55 	struct wqinput_work *ww_next;
     56 };
     57 
     58 struct wqinput_worklist {
     59 	/*
     60 	 * XXX: TAILQ cannot be used because TAILQ_INIT memories the address
     61 	 * of percpu data while percpu(9) may move percpu data during bootup.
     62 	 */
     63 	struct wqinput_work *wwl_head;
     64 	struct wqinput_work *wwl_tail;
     65 	unsigned int	wwl_len;
     66 	uint64_t	wwl_dropped;
     67 	struct work	wwl_work;
     68 	bool		wwl_wq_is_active;
     69 };
     70 
     71 struct wqinput {
     72 	struct workqueue *wqi_wq;
     73 	struct pool	wqi_work_pool;
     74 	struct percpu	*wqi_worklists; /* struct wqinput_worklist */
     75 	void    	(*wqi_input)(struct mbuf *, int, int);
     76 };
     77 
     78 static void wqinput_work(struct work *, void *);
     79 static void wqinput_sysctl_setup(const char *, struct wqinput *);
     80 
     81 static void
     82 wqinput_drops(void *p, void *arg, struct cpu_info *ci __unused)
     83 {
     84 	struct wqinput_worklist **const wwlp = p;
     85 	struct wqinput_worklist *const wwl = *wwlp;
     86 	uint64_t *sum = arg;
     87 
     88 	*sum += wwl->wwl_dropped;
     89 }
     90 
     91 static int
     92 wqinput_sysctl_drops_handler(SYSCTLFN_ARGS)
     93 {
     94 	struct sysctlnode node;
     95 	struct wqinput *wqi;
     96 	uint64_t sum = 0;
     97 	int error;
     98 
     99 	node = *rnode;
    100 	wqi = node.sysctl_data;
    101 
    102 	percpu_foreach_xcall(wqi->wqi_worklists, XC_HIGHPRI_IPL(IPL_SOFTNET),
    103 	    wqinput_drops, &sum);
    104 
    105 	node.sysctl_data = &sum;
    106 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
    107 	if (error != 0 || newp == NULL)
    108 		return error;
    109 
    110 	return 0;
    111 }
    112 
    113 static void
    114 wqinput_sysctl_setup(const char *name, struct wqinput *wqi)
    115 {
    116 	const struct sysctlnode *cnode, *rnode;
    117 	int error;
    118 
    119 	error = sysctl_createv(NULL, 0, NULL, &rnode,
    120 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "wqinput",
    121 	    SYSCTL_DESCR("workqueue-based pr_input controls"),
    122 	    NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL);
    123 	if (error != 0)
    124 		goto bad;
    125 
    126 	error = sysctl_createv(NULL, 0, &rnode, &rnode,
    127 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, name,
    128 	    SYSCTL_DESCR("Protocol controls for workqueue-based pr_input"),
    129 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
    130 	if (error != 0)
    131 		goto bad;
    132 
    133 	error = sysctl_createv(NULL, 0, &rnode, &rnode,
    134 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "inputq",
    135 	    SYSCTL_DESCR("wqinput input queue controls"),
    136 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
    137 	if (error != 0)
    138 		goto bad;
    139 
    140 	error = sysctl_createv(NULL, 0, &rnode, &cnode,
    141 	    CTLFLAG_PERMANENT, CTLTYPE_QUAD, "drops",
    142 	    SYSCTL_DESCR("Total packets dropped due to full input queue"),
    143 	    wqinput_sysctl_drops_handler, 0, (void *)wqi, 0, CTL_CREATE, CTL_EOL);
    144 	if (error != 0)
    145 		goto bad;
    146 
    147 	return;
    148 bad:
    149 	log(LOG_ERR, "%s: could not create a sysctl node for %s\n",
    150 	    __func__, name);
    151 	return;
    152 }
    153 
    154 static struct wqinput_worklist *
    155 wqinput_percpu_getref(percpu_t *pc)
    156 {
    157 
    158 	return *(struct wqinput_worklist **)percpu_getref(pc);
    159 }
    160 
    161 static void
    162 wqinput_percpu_putref(percpu_t *pc)
    163 {
    164 
    165 	percpu_putref(pc);
    166 }
    167 
    168 static void
    169 wqinput_percpu_init_cpu(void *p, void *arg __unused, struct cpu_info *ci __unused)
    170 {
    171 	struct wqinput_worklist **wwlp = p;
    172 
    173 	*wwlp = kmem_zalloc(sizeof(**wwlp), KM_SLEEP);
    174 }
    175 
    176 struct wqinput *
    177 wqinput_create(const char *name, void (*func)(struct mbuf *, int, int))
    178 {
    179 	struct wqinput *wqi;
    180 	int error;
    181 	char namebuf[32];
    182 
    183 	snprintf(namebuf, sizeof(namebuf), "%s_wqinput", name);
    184 
    185 	wqi = kmem_alloc(sizeof(*wqi), KM_SLEEP);
    186 
    187 	error = workqueue_create(&wqi->wqi_wq, namebuf, wqinput_work, wqi,
    188 	    PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE|WQ_PERCPU);
    189 	if (error != 0)
    190 		panic("%s: workqueue_create failed (%d)\n", __func__, error);
    191 	pool_init(&wqi->wqi_work_pool, sizeof(struct wqinput_work), 0, 0, 0,
    192 	    name, NULL, IPL_SOFTNET);
    193 	wqi->wqi_worklists = percpu_create(sizeof(struct wqinput_worklist *),
    194 	    wqinput_percpu_init_cpu, NULL, NULL);
    195 	wqi->wqi_input = func;
    196 
    197 	wqinput_sysctl_setup(name, wqi);
    198 
    199 	return wqi;
    200 }
    201 
    202 static struct wqinput_work *
    203 wqinput_work_get(struct wqinput_worklist *wwl)
    204 {
    205 	struct wqinput_work *work;
    206 
    207 	/* Must be called at IPL_SOFTNET */
    208 
    209 	work = wwl->wwl_head;
    210 	if (work != NULL) {
    211 		KASSERTMSG(wwl->wwl_len > 0, "wwl->wwl_len=%d", wwl->wwl_len);
    212 		wwl->wwl_len--;
    213 		wwl->wwl_head = work->ww_next;
    214 		work->ww_next = NULL;
    215 
    216 		if (wwl->wwl_head == NULL)
    217 			wwl->wwl_tail = NULL;
    218 	} else {
    219 		KASSERT(wwl->wwl_len == 0);
    220 	}
    221 
    222 	return work;
    223 }
    224 
    225 static void
    226 wqinput_work(struct work *wk, void *arg)
    227 {
    228 	struct wqinput *wqi = arg;
    229 	struct wqinput_work *work;
    230 	struct wqinput_worklist *wwl;
    231 	int s;
    232 
    233 	/* Users expect to run at IPL_SOFTNET */
    234 	s = splsoftnet();
    235 	/* This also prevents LWP migrations between CPUs */
    236 	wwl = wqinput_percpu_getref(wqi->wqi_worklists);
    237 
    238 	/* We can allow enqueuing another work at this point */
    239 	wwl->wwl_wq_is_active = false;
    240 
    241 	while ((work = wqinput_work_get(wwl)) != NULL) {
    242 		mutex_enter(softnet_lock);
    243 		KERNEL_LOCK_UNLESS_NET_MPSAFE();
    244 		wqi->wqi_input(work->ww_mbuf, work->ww_off, work->ww_proto);
    245 		KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
    246 		mutex_exit(softnet_lock);
    247 
    248 		pool_put(&wqi->wqi_work_pool, work);
    249 	}
    250 
    251 	wqinput_percpu_putref(wqi->wqi_worklists);
    252 	splx(s);
    253 }
    254 
    255 static void
    256 wqinput_work_put(struct wqinput_worklist *wwl, struct wqinput_work *work)
    257 {
    258 
    259 	if (wwl->wwl_tail != NULL) {
    260 		wwl->wwl_tail->ww_next = work;
    261 	} else {
    262 		wwl->wwl_head = work;
    263 	}
    264 	wwl->wwl_tail = work;
    265 	wwl->wwl_len++;
    266 }
    267 
    268 void
    269 wqinput_input(struct wqinput *wqi, struct mbuf *m, int off, int proto)
    270 {
    271 	struct wqinput_work *work;
    272 	struct wqinput_worklist *wwl;
    273 
    274 	wwl = wqinput_percpu_getref(wqi->wqi_worklists);
    275 
    276 	/* Prevent too much work and mbuf from being queued */
    277 	if (wwl->wwl_len >= WQINPUT_LIST_MAXLEN) {
    278 		wwl->wwl_dropped++;
    279 		m_freem(m);
    280 		goto out;
    281 	}
    282 
    283 	work = pool_get(&wqi->wqi_work_pool, PR_NOWAIT);
    284 	if (work == NULL) {
    285 		wwl->wwl_dropped++;
    286 		m_freem(m);
    287 		goto out;
    288 	}
    289 	work->ww_mbuf = m;
    290 	work->ww_off = off;
    291 	work->ww_proto = proto;
    292 	work->ww_next = NULL;
    293 
    294 	wqinput_work_put(wwl, work);
    295 
    296 	/* Avoid enqueuing another work when one is already enqueued */
    297 	if (wwl->wwl_wq_is_active)
    298 		goto out;
    299 	wwl->wwl_wq_is_active = true;
    300 
    301 	workqueue_enqueue(wqi->wqi_wq, &wwl->wwl_work, NULL);
    302 out:
    303 	wqinput_percpu_putref(wqi->wqi_worklists);
    304 }
    305