Home | History | Annotate | Line # | Download | only in net
nd.c revision 1.5.2.1
      1 /*	$NetBSD: nd.c,v 1.5.2.1 2024/09/11 16:18:36 martin Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2020 The NetBSD Foundation, Inc.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Roy Marples.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28  */
     29 
     30 #include <sys/cdefs.h>
     31 __KERNEL_RCSID(0, "$NetBSD: nd.c,v 1.5.2.1 2024/09/11 16:18:36 martin Exp $");
     32 
     33 #include <sys/callout.h>
     34 #include <sys/mbuf.h>
     35 #include <sys/socketvar.h> /* for softnet_lock */
     36 
     37 #include <net/if_llatbl.h>
     38 #include <net/nd.h>
     39 #include <net/route.h>
     40 
     41 #include <netinet/in.h>
     42 #include <netinet/ip6.h>
     43 
     44 static struct nd_domain *nd_domains[AF_MAX];
     45 
     46 static int nd_gctimer = (60 * 60 * 24); /* 1 day: garbage collection timer */
     47 
     48 static void nd_set_timertick(struct llentry *, time_t);
     49 static struct nd_domain *nd_find_domain(int);
     50 
     51 static void
     52 nd_timer(void *arg)
     53 {
     54 	struct llentry *ln = arg;
     55 	struct nd_domain *nd;
     56 	struct ifnet *ifp = NULL;
     57 	struct psref psref;
     58 	struct mbuf *m = NULL;
     59 	bool send_ns = false;
     60 	int16_t missed = ND_LLINFO_NOSTATE;
     61 	union l3addr taddr, *daddrp = NULL;
     62 
     63 	SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
     64 	LLE_WLOCK(ln);
     65 
     66 	if (!(ln->la_flags & LLE_LINKED))
     67 		goto out;
     68 	if (ln->ln_ntick > 0) {
     69 		nd_set_timer(ln, ND_TIMER_TICK);
     70 		goto out;
     71 	}
     72 
     73 	nd = nd_find_domain(ln->lle_tbl->llt_af);
     74 	ifp = ln->lle_tbl->llt_ifp;
     75 	KASSERT(ifp != NULL);
     76 	if_acquire(ifp, &psref);
     77 
     78 	memcpy(&taddr, &ln->r_l3addr, sizeof(taddr));
     79 
     80 	switch (ln->ln_state) {
     81 	case ND_LLINFO_WAITDELETE:
     82 		LLE_REMREF(ln);
     83 		nd->nd_free(ln, 0);
     84 		ln = NULL;
     85 		break;
     86 
     87 	case ND_LLINFO_INCOMPLETE:
     88 		send_ns = true;
     89 		if (ln->ln_asked++ < nd->nd_mmaxtries)
     90 			break;
     91 
     92 		if (ln->ln_hold) {
     93 			struct mbuf *m0, *mnxt;
     94 
     95 			/*
     96 			 * Assuming every packet in ln_hold
     97 			 * has the same IP header.
     98 			 */
     99 			m = ln->ln_hold;
    100 			for (m0 = m->m_nextpkt; m0 != NULL; m0 = mnxt) {
    101 				mnxt = m0->m_nextpkt;
    102 				m0->m_nextpkt = NULL;
    103 				m_freem(m0);
    104 			}
    105 
    106 			m->m_nextpkt = NULL;
    107 			ln->ln_hold = NULL;
    108 			ln->la_numheld = 0;
    109 		}
    110 
    111 		KASSERTMSG(ln->la_numheld == 0, "la_numheld=%d",
    112 		    ln->la_numheld);
    113 
    114 		missed = ND_LLINFO_INCOMPLETE;
    115 		ln->ln_state = ND_LLINFO_WAITDELETE;
    116 		break;
    117 
    118 	case ND_LLINFO_REACHABLE:
    119 		if (!ND_IS_LLINFO_PERMANENT(ln)) {
    120 			ln->ln_state = ND_LLINFO_STALE;
    121 			nd_set_timer(ln, ND_TIMER_GC);
    122 		}
    123 		break;
    124 
    125 	case ND_LLINFO_PURGE: /* FALLTHROUGH */
    126 	case ND_LLINFO_STALE:
    127 		if (!ND_IS_LLINFO_PERMANENT(ln)) {
    128 			LLE_REMREF(ln);
    129 			nd->nd_free(ln, 1);
    130 			ln = NULL;
    131 		}
    132 		break;
    133 
    134 	case ND_LLINFO_DELAY:
    135 		if (nd->nd_nud_enabled(ifp)) {
    136 			ln->ln_asked = 1;
    137 			ln->ln_state = ND_LLINFO_PROBE;
    138 			send_ns = true;
    139 			daddrp = &taddr;
    140 		} else {
    141 			ln->ln_state = ND_LLINFO_STALE;
    142 			nd_set_timer(ln, ND_TIMER_GC);
    143 		}
    144 		break;
    145 
    146 	case ND_LLINFO_PROBE:
    147 		send_ns = true;
    148 		if (ln->ln_asked++ < nd->nd_umaxtries) {
    149 			daddrp = &taddr;
    150 		} else {
    151 			ln->ln_state = ND_LLINFO_UNREACHABLE;
    152 			ln->ln_asked = 1;
    153 			missed = ND_LLINFO_PROBE;
    154 			/* nd_missed() consumers can use missed to know if
    155 			 * they need to send ICMP UNREACHABLE or not. */
    156 		}
    157 		break;
    158 	case ND_LLINFO_UNREACHABLE:
    159 		/*
    160 		 * RFC 7048 Section 3 says in the UNREACHABLE state
    161 		 * packets continue to be sent to the link-layer address and
    162 		 * then backoff exponentially.
    163 		 * We adjust this slightly and move to the INCOMPLETE state
    164 		 * after nd_mmaxtries probes and then start backing off.
    165 		 *
    166 		 * This results in simpler code whilst providing a more robust
    167 		 * model which doubles the time to failure over what we did
    168 		 * before. We don't want to be back to the old ARP model where
    169 		 * no unreachability errors are returned because very
    170 		 * few applications would look at unreachability hints provided
    171 		 * such as ND_LLINFO_UNREACHABLE or RTM_MISS.
    172 		 */
    173 		send_ns = true;
    174 		if (ln->ln_asked++ < nd->nd_mmaxtries)
    175 			break;
    176 
    177 		missed = ND_LLINFO_UNREACHABLE;
    178 		ln->ln_state = ND_LLINFO_WAITDELETE;
    179 		ln->la_flags &= ~LLE_VALID;
    180 		break;
    181 	}
    182 
    183 	if (send_ns) {
    184 		uint8_t lladdr[255], *lladdrp;
    185 		union l3addr src, *psrc;
    186 
    187 		if (ln->ln_state == ND_LLINFO_WAITDELETE)
    188 			nd_set_timer(ln, ND_TIMER_RETRANS_BACKOFF);
    189 		else
    190 			nd_set_timer(ln, ND_TIMER_RETRANS);
    191 		if (ln->ln_state > ND_LLINFO_INCOMPLETE &&
    192 		    ln->la_flags & LLE_VALID)
    193 		{
    194 			KASSERT(sizeof(lladdr) >= ifp->if_addrlen);
    195 			memcpy(lladdr, &ln->ll_addr, ifp->if_addrlen);
    196 			lladdrp = lladdr;
    197 		} else
    198 			lladdrp = NULL;
    199 		psrc = nd->nd_holdsrc(ln, &src);
    200 		LLE_FREE_LOCKED(ln);
    201 		ln = NULL;
    202 		nd->nd_output(ifp, daddrp, &taddr, lladdrp, psrc);
    203 	}
    204 
    205 out:
    206 	if (ln != NULL)
    207 		LLE_FREE_LOCKED(ln);
    208 	SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
    209 
    210 	if (missed != ND_LLINFO_NOSTATE)
    211 		nd->nd_missed(ifp, &taddr, missed, m);
    212 	if (ifp != NULL)
    213 		if_release(ifp, &psref);
    214 }
    215 
    216 static void
    217 nd_set_timertick(struct llentry *ln, time_t xtick)
    218 {
    219 
    220 	CTASSERT(sizeof(time_t) > sizeof(int));
    221 	KASSERT(xtick >= 0);
    222 
    223 	/*
    224 	 * We have to take care of a reference leak which occurs if
    225 	 * callout_reset overwrites a pending callout schedule.  Unfortunately
    226 	 * we don't have a mean to know the overwrite, so we need to know it
    227 	 * using callout_stop.  We need to call callout_pending first to exclude
    228 	 * the case that the callout has never been scheduled.
    229 	 */
    230 	if (callout_pending(&ln->la_timer)) {
    231 		bool expired;
    232 
    233 		expired = callout_stop(&ln->la_timer);
    234 		if (!expired)
    235 			LLE_REMREF(ln);
    236 	}
    237 
    238 	ln->ln_expire = time_uptime + xtick / hz;
    239 	LLE_ADDREF(ln);
    240 	if (xtick > INT_MAX) {
    241 		ln->ln_ntick = xtick - INT_MAX;
    242 		xtick = INT_MAX;
    243 	} else {
    244 		ln->ln_ntick = 0;
    245 	}
    246 	callout_reset(&ln->ln_timer_ch, xtick, nd_timer, ln);
    247 }
    248 
    249 void
    250 nd_set_timer(struct llentry *ln, int type)
    251 {
    252 	time_t xtick;
    253 	struct ifnet *ifp;
    254 	struct nd_domain *nd;
    255 
    256 	LLE_WLOCK_ASSERT(ln);
    257 
    258 	ifp = ln->lle_tbl->llt_ifp;
    259 	nd = nd_find_domain(ln->lle_tbl->llt_af);
    260 
    261 	switch (type) {
    262 	case ND_TIMER_IMMEDIATE:
    263 		xtick = 0;
    264 		break;
    265 	case ND_TIMER_TICK:
    266 		xtick = ln->ln_ntick;
    267 		break;
    268 	case ND_TIMER_RETRANS:
    269 		xtick = nd->nd_retrans(ifp) * hz / 1000;
    270 		break;
    271 	case ND_TIMER_RETRANS_BACKOFF:
    272 	{
    273 		unsigned int retrans = nd->nd_retrans(ifp);
    274 		unsigned int attempts = ln->ln_asked - nd->nd_mmaxtries;
    275 
    276 		xtick = retrans;
    277 		while (attempts-- != 0) {
    278 			xtick *= nd->nd_retransmultiple;
    279 			if (xtick > nd->nd_maxretrans || xtick < retrans) {
    280 				xtick = nd->nd_maxretrans;
    281 				break;
    282 			}
    283 		}
    284 		xtick = xtick * hz / 1000;
    285 		break;
    286 	}
    287 	case ND_TIMER_REACHABLE:
    288 		xtick = nd->nd_reachable(ifp) * hz / 1000;
    289 		break;
    290 	case ND_TIMER_EXPIRE:
    291 		if (ln->ln_expire > time_uptime)
    292 			xtick = (ln->ln_expire - time_uptime) * hz;
    293 		else
    294 			xtick = nd_gctimer * hz;
    295 		break;
    296 	case ND_TIMER_DELAY:
    297 		xtick = nd->nd_delay * hz;
    298 		break;
    299 	case ND_TIMER_GC:
    300 		xtick = nd_gctimer * hz;
    301 		break;
    302 	default:
    303 		panic("%s: invalid timer type\n", __func__);
    304 	}
    305 
    306 	nd_set_timertick(ln, xtick);
    307 }
    308 
    309 int
    310 nd_resolve(struct llentry *ln, const struct rtentry *rt, struct mbuf *m,
    311     uint8_t *lldst, size_t dstsize)
    312 {
    313 	struct ifnet *ifp;
    314 	struct nd_domain *nd;
    315 	int error;
    316 
    317 	LLE_WLOCK_ASSERT(ln);
    318 
    319 	ifp = ln->lle_tbl->llt_ifp;
    320 	nd = nd_find_domain(ln->lle_tbl->llt_af);
    321 
    322 	/* We don't have to do link-layer address resolution on a p2p link. */
    323 	if (ifp->if_flags & IFF_POINTOPOINT &&
    324 	    ln->ln_state < ND_LLINFO_REACHABLE)
    325 	{
    326 		ln->ln_state = ND_LLINFO_STALE;
    327 		nd_set_timer(ln, ND_TIMER_GC);
    328 	}
    329 
    330 	/*
    331 	 * The first time we send a packet to a neighbor whose entry is
    332 	 * STALE, we have to change the state to DELAY and a sets a timer to
    333 	 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
    334 	 * neighbor unreachability detection on expiration.
    335 	 * (RFC 2461 7.3.3)
    336 	 */
    337 	if (ln->ln_state == ND_LLINFO_STALE) {
    338 		ln->ln_asked = 0;
    339 		ln->ln_state = ND_LLINFO_DELAY;
    340 		nd_set_timer(ln, ND_TIMER_DELAY);
    341 	}
    342 
    343 	/*
    344 	 * If the neighbor cache entry has a state other than INCOMPLETE
    345 	 * (i.e. its link-layer address is already resolved), just
    346 	 * send the packet.
    347 	 */
    348 	if (ln->ln_state > ND_LLINFO_INCOMPLETE) {
    349 		KASSERT((ln->la_flags & LLE_VALID) != 0);
    350 		memcpy(lldst, &ln->ll_addr, MIN(dstsize, ifp->if_addrlen));
    351 		LLE_WUNLOCK(ln);
    352 		return 0;
    353 	}
    354 
    355 	/*
    356 	 * There is a neighbor cache entry, but no ethernet address
    357 	 * response yet.  Append this latest packet to the end of the
    358 	 * packet queue in the mbuf, unless the number of the packet
    359 	 * does not exceed maxqueuelen.  When it exceeds maxqueuelen,
    360 	 * the oldest packet in the queue will be removed.
    361 	 */
    362 	if (ln->ln_state == ND_LLINFO_NOSTATE ||
    363 	    ln->ln_state == ND_LLINFO_WAITDELETE)
    364 		ln->ln_state = ND_LLINFO_INCOMPLETE;
    365 
    366 #ifdef MBUFTRACE
    367 	m_claimm(m, ln->lle_tbl->llt_mowner);
    368 #endif
    369 	if (ln->ln_hold != NULL) {
    370 		struct mbuf *m_hold;
    371 		int i;
    372 
    373 		i = 0;
    374 		for (m_hold = ln->ln_hold; m_hold; m_hold = m_hold->m_nextpkt) {
    375 			i++;
    376 			if (m_hold->m_nextpkt == NULL) {
    377 				m_hold->m_nextpkt = m;
    378 				break;
    379 			}
    380 		}
    381 		KASSERTMSG(ln->la_numheld == i, "la_numheld=%d i=%d",
    382 		    ln->la_numheld, i);
    383 		while (i >= nd->nd_maxqueuelen) {
    384 			m_hold = ln->ln_hold;
    385 			ln->ln_hold = ln->ln_hold->m_nextpkt;
    386 			m_freem(m_hold);
    387 			i--;
    388 			ln->la_numheld--;
    389 		}
    390 	} else {
    391 		KASSERTMSG(ln->la_numheld == 0, "la_numheld=%d",
    392 		    ln->la_numheld);
    393 		ln->ln_hold = m;
    394 	}
    395 
    396 	KASSERTMSG(ln->la_numheld < nd->nd_maxqueuelen,
    397 	    "la_numheld=%d nd_maxqueuelen=%d",
    398 	    ln->la_numheld, nd->nd_maxqueuelen);
    399 	ln->la_numheld++;
    400 
    401 	if (ln->ln_asked >= nd->nd_mmaxtries)
    402 		error = (rt != NULL && rt->rt_flags & RTF_GATEWAY) ?
    403 		    EHOSTUNREACH : EHOSTDOWN;
    404 	else
    405 		error = EWOULDBLOCK;
    406 
    407 	/*
    408 	 * If there has been no NS for the neighbor after entering the
    409 	 * INCOMPLETE state, send the first solicitation.
    410 	 */
    411 	if (!ND_IS_LLINFO_PERMANENT(ln) && ln->ln_asked == 0) {
    412 		struct psref psref;
    413 		union l3addr dst, src, *psrc;
    414 
    415 		ln->ln_asked++;
    416 		nd_set_timer(ln, ND_TIMER_RETRANS);
    417 		memcpy(&dst, &ln->r_l3addr, sizeof(dst));
    418 		psrc = nd->nd_holdsrc(ln, &src);
    419 		if_acquire(ifp, &psref);
    420 		LLE_WUNLOCK(ln);
    421 
    422 		nd->nd_output(ifp, NULL, &dst, NULL, psrc);
    423 		if_release(ifp, &psref);
    424 	} else
    425 		LLE_WUNLOCK(ln);
    426 
    427 	return error;
    428 }
    429 
    430 void
    431 nd_nud_hint(struct llentry *ln)
    432 {
    433 	struct nd_domain *nd;
    434 
    435 	if (ln == NULL)
    436 		return;
    437 
    438 	LLE_WLOCK_ASSERT(ln);
    439 
    440 	if (ln->ln_state < ND_LLINFO_REACHABLE)
    441 		goto done;
    442 
    443 	nd = nd_find_domain(ln->lle_tbl->llt_af);
    444 
    445 	/*
    446 	 * if we get upper-layer reachability confirmation many times,
    447 	 * it is possible we have false information.
    448 	 */
    449 	ln->ln_byhint++;
    450 	if (ln->ln_byhint > nd->nd_maxnudhint)
    451 		goto done;
    452 
    453 	ln->ln_state = ND_LLINFO_REACHABLE;
    454 	if (!ND_IS_LLINFO_PERMANENT(ln))
    455 		nd_set_timer(ln, ND_TIMER_REACHABLE);
    456 
    457 done:
    458 	LLE_WUNLOCK(ln);
    459 
    460 	return;
    461 }
    462 
    463 static struct nd_domain *
    464 nd_find_domain(int af)
    465 {
    466 
    467 	KASSERT(af < __arraycount(nd_domains) && nd_domains[af] != NULL);
    468 	return nd_domains[af];
    469 }
    470 
    471 void
    472 nd_attach_domain(struct nd_domain *nd)
    473 {
    474 
    475 	KASSERT(nd->nd_family < __arraycount(nd_domains));
    476 	nd_domains[nd->nd_family] = nd;
    477 }
    478