Home | History | Annotate | Line # | Download | only in net
nd.c revision 1.4
      1 /*	$NetBSD: nd.c,v 1.4 2020/09/15 23:40:03 roy Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2020 The NetBSD Foundation, Inc.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Roy Marples.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28  */
     29 
     30 #include <sys/cdefs.h>
     31 __KERNEL_RCSID(0, "$NetBSD: nd.c,v 1.4 2020/09/15 23:40:03 roy Exp $");
     32 
     33 #include <sys/callout.h>
     34 #include <sys/mbuf.h>
     35 #include <sys/socketvar.h> /* for softnet_lock */
     36 
     37 #include <net/if_llatbl.h>
     38 #include <net/nd.h>
     39 #include <net/route.h>
     40 
     41 #include <netinet/in.h>
     42 #include <netinet/ip6.h>
     43 
     44 static struct nd_domain *nd_domains[AF_MAX];
     45 
     46 static int nd_gctimer = (60 * 60 * 24); /* 1 day: garbage collection timer */
     47 
     48 static void nd_set_timertick(struct llentry *, time_t);
     49 static struct nd_domain *nd_find_domain(int);
     50 
     51 static void
     52 nd_timer(void *arg)
     53 {
     54 	struct llentry *ln = arg;
     55 	struct nd_domain *nd;
     56 	struct ifnet *ifp = NULL;
     57 	struct psref psref;
     58 	struct mbuf *m = NULL;
     59 	bool send_ns = false;
     60 	int16_t missed = ND_LLINFO_NOSTATE;
     61 	union l3addr taddr, *daddrp = NULL;
     62 
     63 	SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
     64 	LLE_WLOCK(ln);
     65 
     66 	if (!(ln->la_flags & LLE_LINKED))
     67 		goto out;
     68 	if (ln->ln_ntick > 0) {
     69 		nd_set_timer(ln, ND_TIMER_TICK);
     70 		goto out;
     71 	}
     72 
     73 	nd = nd_find_domain(ln->lle_tbl->llt_af);
     74 	ifp = ln->lle_tbl->llt_ifp;
     75 	KASSERT(ifp != NULL);
     76 	if_acquire(ifp, &psref);
     77 
     78 	memcpy(&taddr, &ln->r_l3addr, sizeof(taddr));
     79 
     80 	switch (ln->ln_state) {
     81 	case ND_LLINFO_WAITDELETE:
     82 		LLE_REMREF(ln);
     83 		nd->nd_free(ln, 0);
     84 		ln = NULL;
     85 		break;
     86 
     87 	case ND_LLINFO_INCOMPLETE:
     88 		send_ns = true;
     89 		if (ln->ln_asked++ < nd->nd_mmaxtries)
     90 			break;
     91 
     92 		if (ln->ln_hold) {
     93 			struct mbuf *m0, *mnxt;
     94 
     95 			/*
     96 			 * Assuming every packet in ln_hold
     97 			 * has the same IP header.
     98 			 */
     99 			m = ln->ln_hold;
    100 			for (m0 = m->m_nextpkt; m0 != NULL; m0 = mnxt) {
    101 				mnxt = m0->m_nextpkt;
    102 				m0->m_nextpkt = NULL;
    103 				m_freem(m0);
    104 			}
    105 
    106 			m->m_nextpkt = NULL;
    107 			ln->ln_hold = NULL;
    108 		}
    109 
    110 		missed = ND_LLINFO_INCOMPLETE;
    111 		ln->ln_state = ND_LLINFO_WAITDELETE;
    112 		break;
    113 
    114 	case ND_LLINFO_REACHABLE:
    115 		if (!ND_IS_LLINFO_PERMANENT(ln)) {
    116 			ln->ln_state = ND_LLINFO_STALE;
    117 			nd_set_timer(ln, ND_TIMER_GC);
    118 		}
    119 		break;
    120 
    121 	case ND_LLINFO_PURGE: /* FALLTHROUGH */
    122 	case ND_LLINFO_STALE:
    123 		if (!ND_IS_LLINFO_PERMANENT(ln)) {
    124 			LLE_REMREF(ln);
    125 			nd->nd_free(ln, 1);
    126 			ln = NULL;
    127 		}
    128 		break;
    129 
    130 	case ND_LLINFO_DELAY:
    131 		if (nd->nd_nud_enabled(ifp)) {
    132 			ln->ln_asked = 1;
    133 			ln->ln_state = ND_LLINFO_PROBE;
    134 			send_ns = true;
    135 			daddrp = &taddr;
    136 		} else {
    137 			ln->ln_state = ND_LLINFO_STALE;
    138 			nd_set_timer(ln, ND_TIMER_GC);
    139 		}
    140 		break;
    141 
    142 	case ND_LLINFO_PROBE:
    143 		send_ns = true;
    144 		if (ln->ln_asked++ < nd->nd_umaxtries) {
    145 			daddrp = &taddr;
    146 		} else {
    147 			ln->ln_state = ND_LLINFO_UNREACHABLE;
    148 			ln->ln_asked = 1;
    149 			missed = ND_LLINFO_PROBE;
    150 			/* nd_missed() consumers can use missed to know if
    151 			 * they need to send ICMP UNREACHABLE or not. */
    152 		}
    153 		break;
    154 	case ND_LLINFO_UNREACHABLE:
    155 		/*
    156 		 * RFC 7048 Section 3 says in the UNREACHABLE state
    157 		 * packets continue to be sent to the link-layer address and
    158 		 * then backoff exponentially.
    159 		 * We adjust this slightly and move to the INCOMPLETE state
    160 		 * after nd_mmaxtries probes and then start backing off.
    161 		 *
    162 		 * This results in simpler code whilst providing a more robust
    163 		 * model which doubles the time to failure over what we did
    164 		 * before. We don't want to be back to the old ARP model where
    165 		 * no unreachability errors are returned because very
    166 		 * few applications would look at unreachability hints provided
    167 		 * such as ND_LLINFO_UNREACHABLE or RTM_MISS.
    168 		 */
    169 		send_ns = true;
    170 		if (ln->ln_asked++ < nd->nd_mmaxtries)
    171 			break;
    172 
    173 		missed = ND_LLINFO_UNREACHABLE;
    174 		ln->ln_state = ND_LLINFO_WAITDELETE;
    175 		ln->la_flags &= ~LLE_VALID;
    176 		break;
    177 	}
    178 
    179 	if (send_ns) {
    180 		uint8_t lladdr[255], *lladdrp;
    181 		union l3addr src, *psrc;
    182 
    183 		if (ln->ln_state == ND_LLINFO_WAITDELETE)
    184 			nd_set_timer(ln, ND_TIMER_RETRANS_BACKOFF);
    185 		else
    186 			nd_set_timer(ln, ND_TIMER_RETRANS);
    187 		if (ln->ln_state > ND_LLINFO_INCOMPLETE &&
    188 		    ln->la_flags & LLE_VALID)
    189 		{
    190 			KASSERT(sizeof(lladdr) >= ifp->if_addrlen);
    191 			memcpy(lladdr, &ln->ll_addr, ifp->if_addrlen);
    192 			lladdrp = lladdr;
    193 		} else
    194 			lladdrp = NULL;
    195 		psrc = nd->nd_holdsrc(ln, &src);
    196 		LLE_FREE_LOCKED(ln);
    197 		ln = NULL;
    198 		nd->nd_output(ifp, daddrp, &taddr, lladdrp, psrc);
    199 	}
    200 
    201 out:
    202 	if (ln != NULL)
    203 		LLE_FREE_LOCKED(ln);
    204 	SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
    205 
    206 	if (missed != ND_LLINFO_NOSTATE)
    207 		nd->nd_missed(ifp, &taddr, missed, m);
    208 	if (ifp != NULL)
    209 		if_release(ifp, &psref);
    210 }
    211 
    212 static void
    213 nd_set_timertick(struct llentry *ln, time_t xtick)
    214 {
    215 
    216 	CTASSERT(sizeof(time_t) > sizeof(int));
    217 	KASSERT(xtick >= 0);
    218 
    219 	/*
    220 	 * We have to take care of a reference leak which occurs if
    221 	 * callout_reset overwrites a pending callout schedule.  Unfortunately
    222 	 * we don't have a mean to know the overwrite, so we need to know it
    223 	 * using callout_stop.  We need to call callout_pending first to exclude
    224 	 * the case that the callout has never been scheduled.
    225 	 */
    226 	if (callout_pending(&ln->la_timer)) {
    227 		bool expired;
    228 
    229 		expired = callout_stop(&ln->la_timer);
    230 		if (!expired)
    231 			LLE_REMREF(ln);
    232 	}
    233 
    234 	ln->ln_expire = time_uptime + xtick / hz;
    235 	LLE_ADDREF(ln);
    236 	if (xtick > INT_MAX) {
    237 		ln->ln_ntick = xtick - INT_MAX;
    238 		xtick = INT_MAX;
    239 	} else {
    240 		ln->ln_ntick = 0;
    241 	}
    242 	callout_reset(&ln->ln_timer_ch, xtick, nd_timer, ln);
    243 }
    244 
    245 void
    246 nd_set_timer(struct llentry *ln, int type)
    247 {
    248 	time_t xtick;
    249 	struct ifnet *ifp;
    250 	struct nd_domain *nd;
    251 
    252 	LLE_WLOCK_ASSERT(ln);
    253 
    254 	ifp = ln->lle_tbl->llt_ifp;
    255 	nd = nd_find_domain(ln->lle_tbl->llt_af);
    256 
    257 	switch (type) {
    258 	case ND_TIMER_IMMEDIATE:
    259 		xtick = 0;
    260 		break;
    261 	case ND_TIMER_TICK:
    262 		xtick = ln->ln_ntick;
    263 		break;
    264 	case ND_TIMER_RETRANS:
    265 		xtick = nd->nd_retrans(ifp) * hz / 1000;
    266 		break;
    267 	case ND_TIMER_RETRANS_BACKOFF:
    268 	{
    269 		unsigned int retrans = nd->nd_retrans(ifp);
    270 		unsigned int attempts = ln->ln_asked - nd->nd_mmaxtries;
    271 
    272 		xtick = retrans;
    273 		while (attempts-- != 0) {
    274 			xtick *= nd->nd_retransmultiple;
    275 			if (xtick > nd->nd_maxretrans || xtick < retrans) {
    276 				xtick = nd->nd_maxretrans;
    277 				break;
    278 			}
    279 		}
    280 		xtick = xtick * hz / 1000;
    281 		break;
    282 	}
    283 	case ND_TIMER_REACHABLE:
    284 		xtick = nd->nd_reachable(ifp) * hz / 1000;
    285 		break;
    286 	case ND_TIMER_EXPIRE:
    287 		if (ln->ln_expire > time_uptime)
    288 			xtick = (ln->ln_expire - time_uptime) * hz;
    289 		else
    290 			xtick = nd_gctimer * hz;
    291 		break;
    292 	case ND_TIMER_DELAY:
    293 		xtick = nd->nd_delay * hz;
    294 		break;
    295 	case ND_TIMER_GC:
    296 		xtick = nd_gctimer * hz;
    297 		break;
    298 	default:
    299 		panic("%s: invalid timer type\n", __func__);
    300 	}
    301 
    302 	nd_set_timertick(ln, xtick);
    303 }
    304 
    305 int
    306 nd_resolve(struct llentry *ln, const struct rtentry *rt, struct mbuf *m,
    307     uint8_t *lldst, size_t dstsize)
    308 {
    309 	struct ifnet *ifp;
    310 	struct nd_domain *nd;
    311 	int error;
    312 
    313 	LLE_WLOCK_ASSERT(ln);
    314 
    315 	ifp = ln->lle_tbl->llt_ifp;
    316 	nd = nd_find_domain(ln->lle_tbl->llt_af);
    317 
    318 	/* We don't have to do link-layer address resolution on a p2p link. */
    319 	if (ifp->if_flags & IFF_POINTOPOINT &&
    320 	    ln->ln_state < ND_LLINFO_REACHABLE)
    321 	{
    322 		ln->ln_state = ND_LLINFO_STALE;
    323 		nd_set_timer(ln, ND_TIMER_GC);
    324 	}
    325 
    326 	/*
    327 	 * The first time we send a packet to a neighbor whose entry is
    328 	 * STALE, we have to change the state to DELAY and a sets a timer to
    329 	 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
    330 	 * neighbor unreachability detection on expiration.
    331 	 * (RFC 2461 7.3.3)
    332 	 */
    333 	if (ln->ln_state == ND_LLINFO_STALE) {
    334 		ln->ln_asked = 0;
    335 		ln->ln_state = ND_LLINFO_DELAY;
    336 		nd_set_timer(ln, ND_TIMER_DELAY);
    337 	}
    338 
    339 	/*
    340 	 * If the neighbor cache entry has a state other than INCOMPLETE
    341 	 * (i.e. its link-layer address is already resolved), just
    342 	 * send the packet.
    343 	 */
    344 	if (ln->ln_state > ND_LLINFO_INCOMPLETE) {
    345 		KASSERT((ln->la_flags & LLE_VALID) != 0);
    346 		memcpy(lldst, &ln->ll_addr, MIN(dstsize, ifp->if_addrlen));
    347 		LLE_WUNLOCK(ln);
    348 		return 0;
    349 	}
    350 
    351 	/*
    352 	 * There is a neighbor cache entry, but no ethernet address
    353 	 * response yet.  Append this latest packet to the end of the
    354 	 * packet queue in the mbuf, unless the number of the packet
    355 	 * does not exceed maxqueuelen.  When it exceeds maxqueuelen,
    356 	 * the oldest packet in the queue will be removed.
    357 	 */
    358 	if (ln->ln_state == ND_LLINFO_NOSTATE ||
    359 	    ln->ln_state == ND_LLINFO_WAITDELETE)
    360 		ln->ln_state = ND_LLINFO_INCOMPLETE;
    361 
    362 	if (ln->ln_hold != NULL) {
    363 		struct mbuf *m_hold;
    364 		int i;
    365 
    366 		i = 0;
    367 		for (m_hold = ln->ln_hold; m_hold; m_hold = m_hold->m_nextpkt) {
    368 			i++;
    369 			if (m_hold->m_nextpkt == NULL) {
    370 				m_hold->m_nextpkt = m;
    371 				break;
    372 			}
    373 		}
    374 		while (i >= nd->nd_maxqueuelen) {
    375 			m_hold = ln->ln_hold;
    376 			ln->ln_hold = ln->ln_hold->m_nextpkt;
    377 			m_freem(m_hold);
    378 			i--;
    379 		}
    380 	} else
    381 		ln->ln_hold = m;
    382 
    383 	if (ln->ln_asked >= nd->nd_mmaxtries)
    384 		error = (rt != NULL && rt->rt_flags & RTF_GATEWAY) ?
    385 		    EHOSTUNREACH : EHOSTDOWN;
    386 	else
    387 		error = EWOULDBLOCK;
    388 
    389 	/*
    390 	 * If there has been no NS for the neighbor after entering the
    391 	 * INCOMPLETE state, send the first solicitation.
    392 	 */
    393 	if (!ND_IS_LLINFO_PERMANENT(ln) && ln->ln_asked == 0) {
    394 		struct psref psref;
    395 		union l3addr dst, src, *psrc;
    396 
    397 		ln->ln_asked++;
    398 		nd_set_timer(ln, ND_TIMER_RETRANS);
    399 		memcpy(&dst, &ln->r_l3addr, sizeof(dst));
    400 		psrc = nd->nd_holdsrc(ln, &src);
    401 		if_acquire(ifp, &psref);
    402 		LLE_WUNLOCK(ln);
    403 
    404 		nd->nd_output(ifp, NULL, &dst, NULL, psrc);
    405 		if_release(ifp, &psref);
    406 	} else
    407 		LLE_WUNLOCK(ln);
    408 
    409 	return error;
    410 }
    411 
    412 void
    413 nd_nud_hint(struct llentry *ln)
    414 {
    415 	struct nd_domain *nd;
    416 
    417 	if (ln == NULL)
    418 		return;
    419 
    420 	LLE_WLOCK_ASSERT(ln);
    421 
    422 	if (ln->ln_state < ND_LLINFO_REACHABLE)
    423 		goto done;
    424 
    425 	nd = nd_find_domain(ln->lle_tbl->llt_af);
    426 
    427 	/*
    428 	 * if we get upper-layer reachability confirmation many times,
    429 	 * it is possible we have false information.
    430 	 */
    431 	ln->ln_byhint++;
    432 	if (ln->ln_byhint > nd->nd_maxnudhint)
    433 		goto done;
    434 
    435 	ln->ln_state = ND_LLINFO_REACHABLE;
    436 	if (!ND_IS_LLINFO_PERMANENT(ln))
    437 		nd_set_timer(ln, ND_TIMER_REACHABLE);
    438 
    439 done:
    440 	LLE_WUNLOCK(ln);
    441 
    442 	return;
    443 }
    444 
    445 static struct nd_domain *
    446 nd_find_domain(int af)
    447 {
    448 
    449 	KASSERT(af < __arraycount(nd_domains) && nd_domains[af] != NULL);
    450 	return nd_domains[af];
    451 }
    452 
    453 void
    454 nd_attach_domain(struct nd_domain *nd)
    455 {
    456 
    457 	KASSERT(nd->nd_family < __arraycount(nd_domains));
    458 	nd_domains[nd->nd_family] = nd;
    459 }
    460