Home | History | Annotate | Line # | Download | only in net
      1 /*	$NetBSD: nd.c,v 1.10 2026/03/26 04:35:17 ozaki-r Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2020 The NetBSD Foundation, Inc.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Roy Marples.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28  */
     29 
     30 #include <sys/cdefs.h>
     31 __KERNEL_RCSID(0, "$NetBSD: nd.c,v 1.10 2026/03/26 04:35:17 ozaki-r Exp $");
     32 
     33 #include <sys/callout.h>
     34 #include <sys/mbuf.h>
     35 #include <sys/socketvar.h> /* for softnet_lock */
     36 
     37 #include <net/if_llatbl.h>
     38 #include <net/nd.h>
     39 #include <net/route.h>
     40 
     41 #include <netinet/in.h>
     42 #include <netinet/ip6.h>
     43 
     44 static struct nd_domain *nd_domains[AF_MAX];
     45 
     46 static void nd_set_timertick(struct llentry *, time_t);
     47 static struct nd_domain *nd_find_domain(int);
     48 
     49 static void
     50 nd_timer(void *arg)
     51 {
     52 	struct llentry *ln = arg;
     53 	struct nd_domain *nd;
     54 	struct ifnet *ifp = NULL;
     55 	struct psref psref;
     56 	struct mbuf *m = NULL;
     57 	bool send_ns = false;
     58 	int16_t missed = ND_LLINFO_NOSTATE;
     59 	union l3addr taddr, *daddrp = NULL;
     60 
     61 	SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
     62 	LLE_WLOCK(ln);
     63 
     64 	if (!(ln->la_flags & LLE_LINKED))
     65 		goto out;
     66 	if (ln->ln_ntick > 0) {
     67 		nd_set_timer(ln, ND_TIMER_TICK);
     68 		goto out;
     69 	}
     70 
     71 	nd = nd_find_domain(ln->lle_tbl->llt_af);
     72 	ifp = ln->lle_tbl->llt_ifp;
     73 	KASSERT(ifp != NULL);
     74 	if_acquire(ifp, &psref);
     75 
     76 	memcpy(&taddr, &ln->r_l3addr, sizeof(taddr));
     77 
     78 	switch (ln->ln_state) {
     79 	case ND_LLINFO_WAITDELETE:
     80 		LLE_REMREF(ln);
     81 		nd->nd_free(ln, 0);
     82 		ln = NULL;
     83 		break;
     84 
     85 	case ND_LLINFO_INCOMPLETE:
     86 		if (ln->ln_asked < nd->nd_mmaxtries) {
     87 			ln->ln_asked++;
     88 			send_ns = true;
     89 			break;
     90 		}
     91 
     92 		if (ln->ln_hold) {
     93 			struct mbuf *m0, *mnxt;
     94 
     95 			/*
     96 			 * Assuming every packet in ln_hold
     97 			 * has the same IP header.
     98 			 */
     99 			m = ln->ln_hold;
    100 			for (m0 = m->m_nextpkt; m0 != NULL; m0 = mnxt) {
    101 				mnxt = m0->m_nextpkt;
    102 				m0->m_nextpkt = NULL;
    103 				m_freem(m0);
    104 			}
    105 
    106 			m->m_nextpkt = NULL;
    107 			ln->ln_hold = NULL;
    108 			ln->la_numheld = 0;
    109 		}
    110 
    111 		KASSERTMSG(ln->la_numheld == 0, "la_numheld=%d",
    112 		    ln->la_numheld);
    113 
    114 		missed = ND_LLINFO_INCOMPLETE;
    115 		ln->ln_state = ND_LLINFO_WAITDELETE;
    116 		ln->la_flags |= LLE_UNRESOLVED;
    117 		break;
    118 
    119 	case ND_LLINFO_REACHABLE:
    120 		if (!ND_IS_LLINFO_PERMANENT(ln))
    121 			ln->ln_state = ND_LLINFO_STALE;
    122 		break;
    123 
    124 	case ND_LLINFO_PURGE: /* FALLTHROUGH */
    125 	case ND_LLINFO_STALE:
    126 		if (!ND_IS_LLINFO_PERMANENT(ln)) {
    127 			LLE_REMREF(ln);
    128 			nd->nd_free(ln, 1);
    129 			ln = NULL;
    130 		}
    131 		break;
    132 
    133 	case ND_LLINFO_DELAY:
    134 		if (nd->nd_nud_enabled(ifp)) {
    135 			ln->ln_asked = 1;
    136 			ln->ln_state = ND_LLINFO_PROBE;
    137 			send_ns = true;
    138 			daddrp = &taddr;
    139 		} else
    140 			ln->ln_state = ND_LLINFO_STALE;
    141 		break;
    142 
    143 	case ND_LLINFO_PROBE:
    144 		send_ns = true;
    145 		if (ln->ln_asked++ < nd->nd_umaxtries) {
    146 			daddrp = &taddr;
    147 		} else {
    148 			ln->ln_state = ND_LLINFO_UNREACHABLE;
    149 			ln->ln_asked = 1;
    150 			missed = ND_LLINFO_PROBE;
    151 			/* nd_missed() consumers can use missed to know if
    152 			 * they need to send ICMP UNREACHABLE or not. */
    153 		}
    154 		break;
    155 	case ND_LLINFO_UNREACHABLE:
    156 		/*
    157 		 * RFC 7048 Section 3 says in the UNREACHABLE state
    158 		 * packets continue to be sent to the link-layer address and
    159 		 * then backoff exponentially.
    160 		 * We adjust this slightly and move to the INCOMPLETE state
    161 		 * after nd_mmaxtries probes and then start backing off.
    162 		 *
    163 		 * This results in simpler code whilst providing a more robust
    164 		 * model which doubles the time to failure over what we did
    165 		 * before. We don't want to be back to the old ARP model where
    166 		 * no unreachability errors are returned because very
    167 		 * few applications would look at unreachability hints provided
    168 		 * such as ND_LLINFO_UNREACHABLE or RTM_MISS.
    169 		 */
    170 		send_ns = true;
    171 		if (ln->ln_asked++ < nd->nd_mmaxtries)
    172 			break;
    173 
    174 		missed = ND_LLINFO_UNREACHABLE;
    175 		ln->ln_state = ND_LLINFO_WAITDELETE;
    176 		ln->la_flags &= ~LLE_VALID;
    177 		break;
    178 	}
    179 
    180 	if (ln != NULL) {
    181 		int type = ND_TIMER_RETRANS;
    182 		if (ln->ln_state == ND_LLINFO_WAITDELETE)
    183 			type = ND_TIMER_RETRANS_BACKOFF;
    184 		else if (ln->ln_state == ND_LLINFO_STALE)
    185 			type = ND_TIMER_GC;
    186 		nd_set_timer(ln, type);
    187 	}
    188 	if (send_ns) {
    189 		uint8_t lladdr[255], *lladdrp;
    190 		union l3addr src, *psrc;
    191 
    192 		if (ln->ln_state > ND_LLINFO_INCOMPLETE &&
    193 		    ln->la_flags & LLE_VALID)
    194 		{
    195 			KASSERT(sizeof(lladdr) >= ifp->if_addrlen);
    196 			memcpy(lladdr, &ln->ll_addr, ifp->if_addrlen);
    197 			lladdrp = lladdr;
    198 		} else
    199 			lladdrp = NULL;
    200 		psrc = nd->nd_holdsrc(ln, &src);
    201 		LLE_FREE_LOCKED(ln);
    202 		ln = NULL;
    203 		nd->nd_output(ifp, daddrp, &taddr, lladdrp, psrc);
    204 	}
    205 
    206 out:
    207 	if (ln != NULL)
    208 		LLE_FREE_LOCKED(ln);
    209 	SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
    210 
    211 	if (missed != ND_LLINFO_NOSTATE)
    212 		nd->nd_missed(ifp, &taddr, missed, m);
    213 	if (ifp != NULL)
    214 		if_release(ifp, &psref);
    215 }
    216 
    217 static void
    218 nd_set_timertick(struct llentry *ln, time_t xtick)
    219 {
    220 
    221 	CTASSERT(sizeof(time_t) > sizeof(int));
    222 	KASSERT(xtick >= 0);
    223 
    224 	/*
    225 	 * We have to take care of a reference leak which occurs if
    226 	 * callout_reset overwrites a pending callout schedule.  Unfortunately
    227 	 * we don't have a mean to know the overwrite, so we need to know it
    228 	 * using callout_stop.  We need to call callout_pending first to exclude
    229 	 * the case that the callout has never been scheduled.
    230 	 */
    231 	if (callout_pending(&ln->la_timer)) {
    232 		bool expired;
    233 
    234 		expired = callout_stop(&ln->la_timer);
    235 		if (!expired)
    236 			LLE_REMREF(ln);
    237 	}
    238 
    239 	ln->ln_expire = time_uptime + xtick / hz;
    240 	LLE_ADDREF(ln);
    241 	if (xtick > INT_MAX) {
    242 		ln->ln_ntick = xtick - INT_MAX;
    243 		xtick = INT_MAX;
    244 	} else {
    245 		ln->ln_ntick = 0;
    246 	}
    247 	callout_reset(&ln->ln_timer_ch, xtick, nd_timer, ln);
    248 }
    249 
    250 void
    251 nd_set_timer(struct llentry *ln, int type)
    252 {
    253 	time_t xtick;
    254 	struct ifnet *ifp;
    255 	struct nd_domain *nd;
    256 
    257 	LLE_WLOCK_ASSERT(ln);
    258 
    259 	ifp = ln->lle_tbl->llt_ifp;
    260 	nd = nd_find_domain(ln->lle_tbl->llt_af);
    261 
    262 	switch (type) {
    263 	case ND_TIMER_IMMEDIATE:
    264 		xtick = 0;
    265 		break;
    266 	case ND_TIMER_TICK:
    267 		xtick = ln->ln_ntick;
    268 		break;
    269 	case ND_TIMER_RETRANS:
    270 		xtick = nd->nd_retrans(ifp) * hz / 1000;
    271 		break;
    272 	case ND_TIMER_RETRANS_BACKOFF:
    273 	{
    274 		unsigned int retrans = nd->nd_retrans(ifp);
    275 		unsigned int attempts = ln->ln_asked - nd->nd_mmaxtries;
    276 
    277 		xtick = retrans;
    278 		while (attempts-- != 0) {
    279 			xtick *= nd->nd_retransmultiple;
    280 			if (xtick > nd->nd_maxretrans || xtick < retrans) {
    281 				xtick = nd->nd_maxretrans;
    282 				break;
    283 			}
    284 		}
    285 		xtick = xtick * hz / 1000;
    286 		break;
    287 	}
    288 	case ND_TIMER_REACHABLE:
    289 		xtick = nd->nd_reachable(ifp) * hz / 1000;
    290 		break;
    291 	case ND_TIMER_EXPIRE:
    292 		if (ln->ln_expire > time_uptime)
    293 			xtick = (ln->ln_expire - time_uptime) * hz;
    294 		else
    295 			xtick = nd->nd_gctimer * hz;
    296 		break;
    297 	case ND_TIMER_DELAY:
    298 		xtick = nd->nd_delay * hz;
    299 		break;
    300 	case ND_TIMER_GC:
    301 		xtick = nd->nd_gctimer * hz;
    302 		break;
    303 	default:
    304 		panic("%s: invalid timer type\n", __func__);
    305 	}
    306 
    307 	nd_set_timertick(ln, xtick);
    308 }
    309 
    310 int
    311 nd_resolve(struct llentry *ln, const struct rtentry *rt, struct mbuf *m,
    312     uint8_t *lldst, size_t dstsize)
    313 {
    314 	struct ifnet *ifp;
    315 	struct nd_domain *nd;
    316 	int error;
    317 
    318 	LLE_WLOCK_ASSERT(ln);
    319 
    320 	ifp = ln->lle_tbl->llt_ifp;
    321 	nd = nd_find_domain(ln->lle_tbl->llt_af);
    322 
    323 	/* We don't have to do link-layer address resolution on a p2p link. */
    324 	if (ifp->if_flags & IFF_POINTOPOINT &&
    325 	    ln->ln_state < ND_LLINFO_REACHABLE)
    326 	{
    327 		ln->ln_state = ND_LLINFO_STALE;
    328 		nd_set_timer(ln, ND_TIMER_GC);
    329 	}
    330 
    331 	/*
    332 	 * The first time we send a packet to a neighbor whose entry is
    333 	 * STALE, we have to change the state to DELAY and a sets a timer to
    334 	 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
    335 	 * neighbor unreachability detection on expiration.
    336 	 * (RFC 2461 7.3.3)
    337 	 */
    338 	if (ln->ln_state == ND_LLINFO_STALE) {
    339 		ln->ln_asked = 0;
    340 		ln->ln_state = ND_LLINFO_DELAY;
    341 		nd_set_timer(ln, ND_TIMER_DELAY);
    342 	}
    343 
    344 	/*
    345 	 * If the neighbor cache entry has a state other than INCOMPLETE
    346 	 * (i.e. its link-layer address is already resolved), just
    347 	 * send the packet.
    348 	 */
    349 	if (ln->ln_state > ND_LLINFO_INCOMPLETE) {
    350 		KASSERT((ln->la_flags & LLE_VALID) != 0);
    351 		memcpy(lldst, &ln->ll_addr, MIN(dstsize, ifp->if_addrlen));
    352 		LLE_WUNLOCK(ln);
    353 		return 0;
    354 	}
    355 
    356 	/*
    357 	 * There is a neighbor cache entry, but no ethernet address
    358 	 * response yet.  Append this latest packet to the end of the
    359 	 * packet queue in the mbuf, unless the number of the packet
    360 	 * does not exceed maxqueuelen.  When it exceeds maxqueuelen,
    361 	 * the oldest packet in the queue will be removed.
    362 	 */
    363 	if (ln->ln_state == ND_LLINFO_NOSTATE ||
    364 	    ln->ln_state == ND_LLINFO_WAITDELETE) {
    365 		ln->ln_asked = 0;
    366 		ln->ln_state = ND_LLINFO_INCOMPLETE;
    367 	}
    368 
    369 #ifdef MBUFTRACE
    370 	m_claimm(m, ln->lle_tbl->llt_mowner);
    371 #endif
    372 	if (ln->ln_hold != NULL) {
    373 		struct mbuf *m_hold;
    374 		int i;
    375 
    376 		i = 0;
    377 		for (m_hold = ln->ln_hold; m_hold; m_hold = m_hold->m_nextpkt) {
    378 			i++;
    379 			if (m_hold->m_nextpkt == NULL) {
    380 				m_hold->m_nextpkt = m;
    381 				break;
    382 			}
    383 		}
    384 		KASSERTMSG(ln->la_numheld == i, "la_numheld=%d i=%d",
    385 		    ln->la_numheld, i);
    386 		while (i >= nd->nd_maxqueuelen) {
    387 			m_hold = ln->ln_hold;
    388 			ln->ln_hold = ln->ln_hold->m_nextpkt;
    389 			m_freem(m_hold);
    390 			i--;
    391 			ln->la_numheld--;
    392 		}
    393 	} else {
    394 		KASSERTMSG(ln->la_numheld == 0, "la_numheld=%d",
    395 		    ln->la_numheld);
    396 		ln->ln_hold = m;
    397 	}
    398 
    399 	KASSERTMSG(ln->la_numheld < nd->nd_maxqueuelen,
    400 	    "la_numheld=%d nd_maxqueuelen=%d",
    401 	    ln->la_numheld, nd->nd_maxqueuelen);
    402 	ln->la_numheld++;
    403 
    404 	if ((ln->la_flags & LLE_UNRESOLVED) != 0)
    405 		error = (rt != NULL && rt->rt_flags & RTF_GATEWAY) ?
    406 		    EHOSTUNREACH : EHOSTDOWN;
    407 	else
    408 		error = EWOULDBLOCK;
    409 
    410 	/*
    411 	 * If there has been no NS for the neighbor after entering the
    412 	 * INCOMPLETE state, send the first solicitation.
    413 	 */
    414 	if (!ND_IS_LLINFO_PERMANENT(ln) && ln->ln_asked == 0) {
    415 		struct psref psref;
    416 		union l3addr dst, src, *psrc;
    417 
    418 		ln->ln_asked++;
    419 		nd_set_timer(ln, ND_TIMER_RETRANS);
    420 		memcpy(&dst, &ln->r_l3addr, sizeof(dst));
    421 		psrc = nd->nd_holdsrc(ln, &src);
    422 		if_acquire(ifp, &psref);
    423 		LLE_WUNLOCK(ln);
    424 
    425 		nd->nd_output(ifp, NULL, &dst, NULL, psrc);
    426 		if_release(ifp, &psref);
    427 	} else
    428 		LLE_WUNLOCK(ln);
    429 
    430 	return error;
    431 }
    432 
    433 void
    434 nd_nud_hint(struct llentry *ln)
    435 {
    436 	struct nd_domain *nd;
    437 
    438 	if (ln == NULL)
    439 		return;
    440 
    441 	LLE_WLOCK_ASSERT(ln);
    442 
    443 	if (ln->ln_state < ND_LLINFO_REACHABLE)
    444 		goto done;
    445 
    446 	nd = nd_find_domain(ln->lle_tbl->llt_af);
    447 
    448 	/*
    449 	 * if we get upper-layer reachability confirmation many times,
    450 	 * it is possible we have false information.
    451 	 */
    452 	ln->ln_byhint++;
    453 	if (ln->ln_byhint > nd->nd_maxnudhint)
    454 		goto done;
    455 
    456 	ln->ln_state = ND_LLINFO_REACHABLE;
    457 	if (!ND_IS_LLINFO_PERMANENT(ln))
    458 		nd_set_timer(ln, ND_TIMER_REACHABLE);
    459 
    460 done:
    461 	LLE_WUNLOCK(ln);
    462 
    463 	return;
    464 }
    465 
    466 static struct nd_domain *
    467 nd_find_domain(int af)
    468 {
    469 
    470 	KASSERT(af < __arraycount(nd_domains) && nd_domains[af] != NULL);
    471 	return nd_domains[af];
    472 }
    473 
    474 void
    475 nd_attach_domain(struct nd_domain *nd)
    476 {
    477 
    478 	KASSERT(nd->nd_family < __arraycount(nd_domains));
    479 	nd_domains[nd->nd_family] = nd;
    480 }
    481