Home | History | Annotate | Line # | Download | only in net
      1 /*	$NetBSD: nd.c,v 1.8 2025/08/18 06:46:43 ozaki-r Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2020 The NetBSD Foundation, Inc.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Roy Marples.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28  */
     29 
     30 #include <sys/cdefs.h>
     31 __KERNEL_RCSID(0, "$NetBSD: nd.c,v 1.8 2025/08/18 06:46:43 ozaki-r Exp $");
     32 
     33 #include <sys/callout.h>
     34 #include <sys/mbuf.h>
     35 #include <sys/socketvar.h> /* for softnet_lock */
     36 
     37 #include <net/if_llatbl.h>
     38 #include <net/nd.h>
     39 #include <net/route.h>
     40 
     41 #include <netinet/in.h>
     42 #include <netinet/ip6.h>
     43 
     44 static struct nd_domain *nd_domains[AF_MAX];
     45 
     46 static int nd_gctimer = (60 * 60 * 24); /* 1 day: garbage collection timer */
     47 
     48 static void nd_set_timertick(struct llentry *, time_t);
     49 static struct nd_domain *nd_find_domain(int);
     50 
     51 static void
     52 nd_timer(void *arg)
     53 {
     54 	struct llentry *ln = arg;
     55 	struct nd_domain *nd;
     56 	struct ifnet *ifp = NULL;
     57 	struct psref psref;
     58 	struct mbuf *m = NULL;
     59 	bool send_ns = false;
     60 	int16_t missed = ND_LLINFO_NOSTATE;
     61 	union l3addr taddr, *daddrp = NULL;
     62 
     63 	SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
     64 	LLE_WLOCK(ln);
     65 
     66 	if (!(ln->la_flags & LLE_LINKED))
     67 		goto out;
     68 	if (ln->ln_ntick > 0) {
     69 		nd_set_timer(ln, ND_TIMER_TICK);
     70 		goto out;
     71 	}
     72 
     73 	nd = nd_find_domain(ln->lle_tbl->llt_af);
     74 	ifp = ln->lle_tbl->llt_ifp;
     75 	KASSERT(ifp != NULL);
     76 	if_acquire(ifp, &psref);
     77 
     78 	memcpy(&taddr, &ln->r_l3addr, sizeof(taddr));
     79 
     80 	switch (ln->ln_state) {
     81 	case ND_LLINFO_WAITDELETE:
     82 		LLE_REMREF(ln);
     83 		nd->nd_free(ln, 0);
     84 		ln = NULL;
     85 		break;
     86 
     87 	case ND_LLINFO_INCOMPLETE:
     88 		if (ln->ln_asked < nd->nd_mmaxtries) {
     89 			ln->ln_asked++;
     90 			send_ns = true;
     91 			break;
     92 		}
     93 
     94 		if (ln->ln_hold) {
     95 			struct mbuf *m0, *mnxt;
     96 
     97 			/*
     98 			 * Assuming every packet in ln_hold
     99 			 * has the same IP header.
    100 			 */
    101 			m = ln->ln_hold;
    102 			for (m0 = m->m_nextpkt; m0 != NULL; m0 = mnxt) {
    103 				mnxt = m0->m_nextpkt;
    104 				m0->m_nextpkt = NULL;
    105 				m_freem(m0);
    106 			}
    107 
    108 			m->m_nextpkt = NULL;
    109 			ln->ln_hold = NULL;
    110 			ln->la_numheld = 0;
    111 		}
    112 
    113 		KASSERTMSG(ln->la_numheld == 0, "la_numheld=%d",
    114 		    ln->la_numheld);
    115 
    116 		missed = ND_LLINFO_INCOMPLETE;
    117 		ln->ln_state = ND_LLINFO_WAITDELETE;
    118 		break;
    119 
    120 	case ND_LLINFO_REACHABLE:
    121 		if (!ND_IS_LLINFO_PERMANENT(ln))
    122 			ln->ln_state = ND_LLINFO_STALE;
    123 		break;
    124 
    125 	case ND_LLINFO_PURGE: /* FALLTHROUGH */
    126 	case ND_LLINFO_STALE:
    127 		if (!ND_IS_LLINFO_PERMANENT(ln)) {
    128 			LLE_REMREF(ln);
    129 			nd->nd_free(ln, 1);
    130 			ln = NULL;
    131 		}
    132 		break;
    133 
    134 	case ND_LLINFO_DELAY:
    135 		if (nd->nd_nud_enabled(ifp)) {
    136 			ln->ln_asked = 1;
    137 			ln->ln_state = ND_LLINFO_PROBE;
    138 			send_ns = true;
    139 			daddrp = &taddr;
    140 		} else
    141 			ln->ln_state = ND_LLINFO_STALE;
    142 		break;
    143 
    144 	case ND_LLINFO_PROBE:
    145 		send_ns = true;
    146 		if (ln->ln_asked++ < nd->nd_umaxtries) {
    147 			daddrp = &taddr;
    148 		} else {
    149 			ln->ln_state = ND_LLINFO_UNREACHABLE;
    150 			ln->ln_asked = 1;
    151 			missed = ND_LLINFO_PROBE;
    152 			/* nd_missed() consumers can use missed to know if
    153 			 * they need to send ICMP UNREACHABLE or not. */
    154 		}
    155 		break;
    156 	case ND_LLINFO_UNREACHABLE:
    157 		/*
    158 		 * RFC 7048 Section 3 says in the UNREACHABLE state
    159 		 * packets continue to be sent to the link-layer address and
    160 		 * then backoff exponentially.
    161 		 * We adjust this slightly and move to the INCOMPLETE state
    162 		 * after nd_mmaxtries probes and then start backing off.
    163 		 *
    164 		 * This results in simpler code whilst providing a more robust
    165 		 * model which doubles the time to failure over what we did
    166 		 * before. We don't want to be back to the old ARP model where
    167 		 * no unreachability errors are returned because very
    168 		 * few applications would look at unreachability hints provided
    169 		 * such as ND_LLINFO_UNREACHABLE or RTM_MISS.
    170 		 */
    171 		send_ns = true;
    172 		if (ln->ln_asked++ < nd->nd_mmaxtries)
    173 			break;
    174 
    175 		missed = ND_LLINFO_UNREACHABLE;
    176 		ln->ln_state = ND_LLINFO_WAITDELETE;
    177 		ln->la_flags &= ~LLE_VALID;
    178 		break;
    179 	}
    180 
    181 	if (ln != NULL) {
    182 		int type = ND_TIMER_RETRANS;
    183 		if (ln->ln_state == ND_LLINFO_WAITDELETE)
    184 			type = ND_TIMER_RETRANS_BACKOFF;
    185 		else if (ln->ln_state == ND_LLINFO_STALE)
    186 			type = ND_TIMER_GC;
    187 		nd_set_timer(ln, type);
    188 	}
    189 	if (send_ns) {
    190 		uint8_t lladdr[255], *lladdrp;
    191 		union l3addr src, *psrc;
    192 
    193 		if (ln->ln_state > ND_LLINFO_INCOMPLETE &&
    194 		    ln->la_flags & LLE_VALID)
    195 		{
    196 			KASSERT(sizeof(lladdr) >= ifp->if_addrlen);
    197 			memcpy(lladdr, &ln->ll_addr, ifp->if_addrlen);
    198 			lladdrp = lladdr;
    199 		} else
    200 			lladdrp = NULL;
    201 		psrc = nd->nd_holdsrc(ln, &src);
    202 		LLE_FREE_LOCKED(ln);
    203 		ln = NULL;
    204 		nd->nd_output(ifp, daddrp, &taddr, lladdrp, psrc);
    205 	}
    206 
    207 out:
    208 	if (ln != NULL)
    209 		LLE_FREE_LOCKED(ln);
    210 	SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
    211 
    212 	if (missed != ND_LLINFO_NOSTATE)
    213 		nd->nd_missed(ifp, &taddr, missed, m);
    214 	if (ifp != NULL)
    215 		if_release(ifp, &psref);
    216 }
    217 
    218 static void
    219 nd_set_timertick(struct llentry *ln, time_t xtick)
    220 {
    221 
    222 	CTASSERT(sizeof(time_t) > sizeof(int));
    223 	KASSERT(xtick >= 0);
    224 
    225 	/*
    226 	 * We have to take care of a reference leak which occurs if
    227 	 * callout_reset overwrites a pending callout schedule.  Unfortunately
    228 	 * we don't have a mean to know the overwrite, so we need to know it
    229 	 * using callout_stop.  We need to call callout_pending first to exclude
    230 	 * the case that the callout has never been scheduled.
    231 	 */
    232 	if (callout_pending(&ln->la_timer)) {
    233 		bool expired;
    234 
    235 		expired = callout_stop(&ln->la_timer);
    236 		if (!expired)
    237 			LLE_REMREF(ln);
    238 	}
    239 
    240 	ln->ln_expire = time_uptime + xtick / hz;
    241 	LLE_ADDREF(ln);
    242 	if (xtick > INT_MAX) {
    243 		ln->ln_ntick = xtick - INT_MAX;
    244 		xtick = INT_MAX;
    245 	} else {
    246 		ln->ln_ntick = 0;
    247 	}
    248 	callout_reset(&ln->ln_timer_ch, xtick, nd_timer, ln);
    249 }
    250 
    251 void
    252 nd_set_timer(struct llentry *ln, int type)
    253 {
    254 	time_t xtick;
    255 	struct ifnet *ifp;
    256 	struct nd_domain *nd;
    257 
    258 	LLE_WLOCK_ASSERT(ln);
    259 
    260 	ifp = ln->lle_tbl->llt_ifp;
    261 	nd = nd_find_domain(ln->lle_tbl->llt_af);
    262 
    263 	switch (type) {
    264 	case ND_TIMER_IMMEDIATE:
    265 		xtick = 0;
    266 		break;
    267 	case ND_TIMER_TICK:
    268 		xtick = ln->ln_ntick;
    269 		break;
    270 	case ND_TIMER_RETRANS:
    271 		xtick = nd->nd_retrans(ifp) * hz / 1000;
    272 		break;
    273 	case ND_TIMER_RETRANS_BACKOFF:
    274 	{
    275 		unsigned int retrans = nd->nd_retrans(ifp);
    276 		unsigned int attempts = ln->ln_asked - nd->nd_mmaxtries;
    277 
    278 		xtick = retrans;
    279 		while (attempts-- != 0) {
    280 			xtick *= nd->nd_retransmultiple;
    281 			if (xtick > nd->nd_maxretrans || xtick < retrans) {
    282 				xtick = nd->nd_maxretrans;
    283 				break;
    284 			}
    285 		}
    286 		xtick = xtick * hz / 1000;
    287 		break;
    288 	}
    289 	case ND_TIMER_REACHABLE:
    290 		xtick = nd->nd_reachable(ifp) * hz / 1000;
    291 		break;
    292 	case ND_TIMER_EXPIRE:
    293 		if (ln->ln_expire > time_uptime)
    294 			xtick = (ln->ln_expire - time_uptime) * hz;
    295 		else
    296 			xtick = nd_gctimer * hz;
    297 		break;
    298 	case ND_TIMER_DELAY:
    299 		xtick = nd->nd_delay * hz;
    300 		break;
    301 	case ND_TIMER_GC:
    302 		xtick = nd_gctimer * hz;
    303 		break;
    304 	default:
    305 		panic("%s: invalid timer type\n", __func__);
    306 	}
    307 
    308 	nd_set_timertick(ln, xtick);
    309 }
    310 
    311 int
    312 nd_resolve(struct llentry *ln, const struct rtentry *rt, struct mbuf *m,
    313     uint8_t *lldst, size_t dstsize)
    314 {
    315 	struct ifnet *ifp;
    316 	struct nd_domain *nd;
    317 	int error;
    318 
    319 	LLE_WLOCK_ASSERT(ln);
    320 
    321 	ifp = ln->lle_tbl->llt_ifp;
    322 	nd = nd_find_domain(ln->lle_tbl->llt_af);
    323 
    324 	/* We don't have to do link-layer address resolution on a p2p link. */
    325 	if (ifp->if_flags & IFF_POINTOPOINT &&
    326 	    ln->ln_state < ND_LLINFO_REACHABLE)
    327 	{
    328 		ln->ln_state = ND_LLINFO_STALE;
    329 		nd_set_timer(ln, ND_TIMER_GC);
    330 	}
    331 
    332 	/*
    333 	 * The first time we send a packet to a neighbor whose entry is
    334 	 * STALE, we have to change the state to DELAY and a sets a timer to
    335 	 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
    336 	 * neighbor unreachability detection on expiration.
    337 	 * (RFC 2461 7.3.3)
    338 	 */
    339 	if (ln->ln_state == ND_LLINFO_STALE) {
    340 		ln->ln_asked = 0;
    341 		ln->ln_state = ND_LLINFO_DELAY;
    342 		nd_set_timer(ln, ND_TIMER_DELAY);
    343 	}
    344 
    345 	/*
    346 	 * If the neighbor cache entry has a state other than INCOMPLETE
    347 	 * (i.e. its link-layer address is already resolved), just
    348 	 * send the packet.
    349 	 */
    350 	if (ln->ln_state > ND_LLINFO_INCOMPLETE) {
    351 		KASSERT((ln->la_flags & LLE_VALID) != 0);
    352 		memcpy(lldst, &ln->ll_addr, MIN(dstsize, ifp->if_addrlen));
    353 		LLE_WUNLOCK(ln);
    354 		return 0;
    355 	}
    356 
    357 	/*
    358 	 * There is a neighbor cache entry, but no ethernet address
    359 	 * response yet.  Append this latest packet to the end of the
    360 	 * packet queue in the mbuf, unless the number of the packet
    361 	 * does not exceed maxqueuelen.  When it exceeds maxqueuelen,
    362 	 * the oldest packet in the queue will be removed.
    363 	 */
    364 	if (ln->ln_state == ND_LLINFO_NOSTATE ||
    365 	    ln->ln_state == ND_LLINFO_WAITDELETE)
    366 		ln->ln_state = ND_LLINFO_INCOMPLETE;
    367 
    368 #ifdef MBUFTRACE
    369 	m_claimm(m, ln->lle_tbl->llt_mowner);
    370 #endif
    371 	if (ln->ln_hold != NULL) {
    372 		struct mbuf *m_hold;
    373 		int i;
    374 
    375 		i = 0;
    376 		for (m_hold = ln->ln_hold; m_hold; m_hold = m_hold->m_nextpkt) {
    377 			i++;
    378 			if (m_hold->m_nextpkt == NULL) {
    379 				m_hold->m_nextpkt = m;
    380 				break;
    381 			}
    382 		}
    383 		KASSERTMSG(ln->la_numheld == i, "la_numheld=%d i=%d",
    384 		    ln->la_numheld, i);
    385 		while (i >= nd->nd_maxqueuelen) {
    386 			m_hold = ln->ln_hold;
    387 			ln->ln_hold = ln->ln_hold->m_nextpkt;
    388 			m_freem(m_hold);
    389 			i--;
    390 			ln->la_numheld--;
    391 		}
    392 	} else {
    393 		KASSERTMSG(ln->la_numheld == 0, "la_numheld=%d",
    394 		    ln->la_numheld);
    395 		ln->ln_hold = m;
    396 	}
    397 
    398 	KASSERTMSG(ln->la_numheld < nd->nd_maxqueuelen,
    399 	    "la_numheld=%d nd_maxqueuelen=%d",
    400 	    ln->la_numheld, nd->nd_maxqueuelen);
    401 	ln->la_numheld++;
    402 
    403 	if (ln->ln_asked >= nd->nd_mmaxtries)
    404 		error = (rt != NULL && rt->rt_flags & RTF_GATEWAY) ?
    405 		    EHOSTUNREACH : EHOSTDOWN;
    406 	else
    407 		error = EWOULDBLOCK;
    408 
    409 	/*
    410 	 * If there has been no NS for the neighbor after entering the
    411 	 * INCOMPLETE state, send the first solicitation.
    412 	 */
    413 	if (!ND_IS_LLINFO_PERMANENT(ln) && ln->ln_asked == 0) {
    414 		struct psref psref;
    415 		union l3addr dst, src, *psrc;
    416 
    417 		ln->ln_asked++;
    418 		nd_set_timer(ln, ND_TIMER_RETRANS);
    419 		memcpy(&dst, &ln->r_l3addr, sizeof(dst));
    420 		psrc = nd->nd_holdsrc(ln, &src);
    421 		if_acquire(ifp, &psref);
    422 		LLE_WUNLOCK(ln);
    423 
    424 		nd->nd_output(ifp, NULL, &dst, NULL, psrc);
    425 		if_release(ifp, &psref);
    426 	} else
    427 		LLE_WUNLOCK(ln);
    428 
    429 	return error;
    430 }
    431 
    432 void
    433 nd_nud_hint(struct llentry *ln)
    434 {
    435 	struct nd_domain *nd;
    436 
    437 	if (ln == NULL)
    438 		return;
    439 
    440 	LLE_WLOCK_ASSERT(ln);
    441 
    442 	if (ln->ln_state < ND_LLINFO_REACHABLE)
    443 		goto done;
    444 
    445 	nd = nd_find_domain(ln->lle_tbl->llt_af);
    446 
    447 	/*
    448 	 * if we get upper-layer reachability confirmation many times,
    449 	 * it is possible we have false information.
    450 	 */
    451 	ln->ln_byhint++;
    452 	if (ln->ln_byhint > nd->nd_maxnudhint)
    453 		goto done;
    454 
    455 	ln->ln_state = ND_LLINFO_REACHABLE;
    456 	if (!ND_IS_LLINFO_PERMANENT(ln))
    457 		nd_set_timer(ln, ND_TIMER_REACHABLE);
    458 
    459 done:
    460 	LLE_WUNLOCK(ln);
    461 
    462 	return;
    463 }
    464 
    465 static struct nd_domain *
    466 nd_find_domain(int af)
    467 {
    468 
    469 	KASSERT(af < __arraycount(nd_domains) && nd_domains[af] != NULL);
    470 	return nd_domains[af];
    471 }
    472 
    473 void
    474 nd_attach_domain(struct nd_domain *nd)
    475 {
    476 
    477 	KASSERT(nd->nd_family < __arraycount(nd_domains));
    478 	nd_domains[nd->nd_family] = nd;
    479 }
    480