1 /* $NetBSD: nd.c,v 1.10 2026/03/26 04:35:17 ozaki-r Exp $ */ 2 3 /* 4 * Copyright (c) 2020 The NetBSD Foundation, Inc. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Roy Marples. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __KERNEL_RCSID(0, "$NetBSD: nd.c,v 1.10 2026/03/26 04:35:17 ozaki-r Exp $"); 32 33 #include <sys/callout.h> 34 #include <sys/mbuf.h> 35 #include <sys/socketvar.h> /* for softnet_lock */ 36 37 #include <net/if_llatbl.h> 38 #include <net/nd.h> 39 #include <net/route.h> 40 41 #include <netinet/in.h> 42 #include <netinet/ip6.h> 43 44 static struct nd_domain *nd_domains[AF_MAX]; 45 46 static void nd_set_timertick(struct llentry *, time_t); 47 static struct nd_domain *nd_find_domain(int); 48 49 static void 50 nd_timer(void *arg) 51 { 52 struct llentry *ln = arg; 53 struct nd_domain *nd; 54 struct ifnet *ifp = NULL; 55 struct psref psref; 56 struct mbuf *m = NULL; 57 bool send_ns = false; 58 int16_t missed = ND_LLINFO_NOSTATE; 59 union l3addr taddr, *daddrp = NULL; 60 61 SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE(); 62 LLE_WLOCK(ln); 63 64 if (!(ln->la_flags & LLE_LINKED)) 65 goto out; 66 if (ln->ln_ntick > 0) { 67 nd_set_timer(ln, ND_TIMER_TICK); 68 goto out; 69 } 70 71 nd = nd_find_domain(ln->lle_tbl->llt_af); 72 ifp = ln->lle_tbl->llt_ifp; 73 KASSERT(ifp != NULL); 74 if_acquire(ifp, &psref); 75 76 memcpy(&taddr, &ln->r_l3addr, sizeof(taddr)); 77 78 switch (ln->ln_state) { 79 case ND_LLINFO_WAITDELETE: 80 LLE_REMREF(ln); 81 nd->nd_free(ln, 0); 82 ln = NULL; 83 break; 84 85 case ND_LLINFO_INCOMPLETE: 86 if (ln->ln_asked < nd->nd_mmaxtries) { 87 ln->ln_asked++; 88 send_ns = true; 89 break; 90 } 91 92 if (ln->ln_hold) { 93 struct mbuf *m0, *mnxt; 94 95 /* 96 * Assuming every packet in ln_hold 97 * has the same IP header. 98 */ 99 m = ln->ln_hold; 100 for (m0 = m->m_nextpkt; m0 != NULL; m0 = mnxt) { 101 mnxt = m0->m_nextpkt; 102 m0->m_nextpkt = NULL; 103 m_freem(m0); 104 } 105 106 m->m_nextpkt = NULL; 107 ln->ln_hold = NULL; 108 ln->la_numheld = 0; 109 } 110 111 KASSERTMSG(ln->la_numheld == 0, "la_numheld=%d", 112 ln->la_numheld); 113 114 missed = ND_LLINFO_INCOMPLETE; 115 ln->ln_state = ND_LLINFO_WAITDELETE; 116 ln->la_flags |= LLE_UNRESOLVED; 117 break; 118 119 case ND_LLINFO_REACHABLE: 120 if (!ND_IS_LLINFO_PERMANENT(ln)) 121 ln->ln_state = ND_LLINFO_STALE; 122 break; 123 124 case ND_LLINFO_PURGE: /* FALLTHROUGH */ 125 case ND_LLINFO_STALE: 126 if (!ND_IS_LLINFO_PERMANENT(ln)) { 127 LLE_REMREF(ln); 128 nd->nd_free(ln, 1); 129 ln = NULL; 130 } 131 break; 132 133 case ND_LLINFO_DELAY: 134 if (nd->nd_nud_enabled(ifp)) { 135 ln->ln_asked = 1; 136 ln->ln_state = ND_LLINFO_PROBE; 137 send_ns = true; 138 daddrp = &taddr; 139 } else 140 ln->ln_state = ND_LLINFO_STALE; 141 break; 142 143 case ND_LLINFO_PROBE: 144 send_ns = true; 145 if (ln->ln_asked++ < nd->nd_umaxtries) { 146 daddrp = &taddr; 147 } else { 148 ln->ln_state = ND_LLINFO_UNREACHABLE; 149 ln->ln_asked = 1; 150 missed = ND_LLINFO_PROBE; 151 /* nd_missed() consumers can use missed to know if 152 * they need to send ICMP UNREACHABLE or not. */ 153 } 154 break; 155 case ND_LLINFO_UNREACHABLE: 156 /* 157 * RFC 7048 Section 3 says in the UNREACHABLE state 158 * packets continue to be sent to the link-layer address and 159 * then backoff exponentially. 160 * We adjust this slightly and move to the INCOMPLETE state 161 * after nd_mmaxtries probes and then start backing off. 162 * 163 * This results in simpler code whilst providing a more robust 164 * model which doubles the time to failure over what we did 165 * before. We don't want to be back to the old ARP model where 166 * no unreachability errors are returned because very 167 * few applications would look at unreachability hints provided 168 * such as ND_LLINFO_UNREACHABLE or RTM_MISS. 169 */ 170 send_ns = true; 171 if (ln->ln_asked++ < nd->nd_mmaxtries) 172 break; 173 174 missed = ND_LLINFO_UNREACHABLE; 175 ln->ln_state = ND_LLINFO_WAITDELETE; 176 ln->la_flags &= ~LLE_VALID; 177 break; 178 } 179 180 if (ln != NULL) { 181 int type = ND_TIMER_RETRANS; 182 if (ln->ln_state == ND_LLINFO_WAITDELETE) 183 type = ND_TIMER_RETRANS_BACKOFF; 184 else if (ln->ln_state == ND_LLINFO_STALE) 185 type = ND_TIMER_GC; 186 nd_set_timer(ln, type); 187 } 188 if (send_ns) { 189 uint8_t lladdr[255], *lladdrp; 190 union l3addr src, *psrc; 191 192 if (ln->ln_state > ND_LLINFO_INCOMPLETE && 193 ln->la_flags & LLE_VALID) 194 { 195 KASSERT(sizeof(lladdr) >= ifp->if_addrlen); 196 memcpy(lladdr, &ln->ll_addr, ifp->if_addrlen); 197 lladdrp = lladdr; 198 } else 199 lladdrp = NULL; 200 psrc = nd->nd_holdsrc(ln, &src); 201 LLE_FREE_LOCKED(ln); 202 ln = NULL; 203 nd->nd_output(ifp, daddrp, &taddr, lladdrp, psrc); 204 } 205 206 out: 207 if (ln != NULL) 208 LLE_FREE_LOCKED(ln); 209 SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE(); 210 211 if (missed != ND_LLINFO_NOSTATE) 212 nd->nd_missed(ifp, &taddr, missed, m); 213 if (ifp != NULL) 214 if_release(ifp, &psref); 215 } 216 217 static void 218 nd_set_timertick(struct llentry *ln, time_t xtick) 219 { 220 221 CTASSERT(sizeof(time_t) > sizeof(int)); 222 KASSERT(xtick >= 0); 223 224 /* 225 * We have to take care of a reference leak which occurs if 226 * callout_reset overwrites a pending callout schedule. Unfortunately 227 * we don't have a mean to know the overwrite, so we need to know it 228 * using callout_stop. We need to call callout_pending first to exclude 229 * the case that the callout has never been scheduled. 230 */ 231 if (callout_pending(&ln->la_timer)) { 232 bool expired; 233 234 expired = callout_stop(&ln->la_timer); 235 if (!expired) 236 LLE_REMREF(ln); 237 } 238 239 ln->ln_expire = time_uptime + xtick / hz; 240 LLE_ADDREF(ln); 241 if (xtick > INT_MAX) { 242 ln->ln_ntick = xtick - INT_MAX; 243 xtick = INT_MAX; 244 } else { 245 ln->ln_ntick = 0; 246 } 247 callout_reset(&ln->ln_timer_ch, xtick, nd_timer, ln); 248 } 249 250 void 251 nd_set_timer(struct llentry *ln, int type) 252 { 253 time_t xtick; 254 struct ifnet *ifp; 255 struct nd_domain *nd; 256 257 LLE_WLOCK_ASSERT(ln); 258 259 ifp = ln->lle_tbl->llt_ifp; 260 nd = nd_find_domain(ln->lle_tbl->llt_af); 261 262 switch (type) { 263 case ND_TIMER_IMMEDIATE: 264 xtick = 0; 265 break; 266 case ND_TIMER_TICK: 267 xtick = ln->ln_ntick; 268 break; 269 case ND_TIMER_RETRANS: 270 xtick = nd->nd_retrans(ifp) * hz / 1000; 271 break; 272 case ND_TIMER_RETRANS_BACKOFF: 273 { 274 unsigned int retrans = nd->nd_retrans(ifp); 275 unsigned int attempts = ln->ln_asked - nd->nd_mmaxtries; 276 277 xtick = retrans; 278 while (attempts-- != 0) { 279 xtick *= nd->nd_retransmultiple; 280 if (xtick > nd->nd_maxretrans || xtick < retrans) { 281 xtick = nd->nd_maxretrans; 282 break; 283 } 284 } 285 xtick = xtick * hz / 1000; 286 break; 287 } 288 case ND_TIMER_REACHABLE: 289 xtick = nd->nd_reachable(ifp) * hz / 1000; 290 break; 291 case ND_TIMER_EXPIRE: 292 if (ln->ln_expire > time_uptime) 293 xtick = (ln->ln_expire - time_uptime) * hz; 294 else 295 xtick = nd->nd_gctimer * hz; 296 break; 297 case ND_TIMER_DELAY: 298 xtick = nd->nd_delay * hz; 299 break; 300 case ND_TIMER_GC: 301 xtick = nd->nd_gctimer * hz; 302 break; 303 default: 304 panic("%s: invalid timer type\n", __func__); 305 } 306 307 nd_set_timertick(ln, xtick); 308 } 309 310 int 311 nd_resolve(struct llentry *ln, const struct rtentry *rt, struct mbuf *m, 312 uint8_t *lldst, size_t dstsize) 313 { 314 struct ifnet *ifp; 315 struct nd_domain *nd; 316 int error; 317 318 LLE_WLOCK_ASSERT(ln); 319 320 ifp = ln->lle_tbl->llt_ifp; 321 nd = nd_find_domain(ln->lle_tbl->llt_af); 322 323 /* We don't have to do link-layer address resolution on a p2p link. */ 324 if (ifp->if_flags & IFF_POINTOPOINT && 325 ln->ln_state < ND_LLINFO_REACHABLE) 326 { 327 ln->ln_state = ND_LLINFO_STALE; 328 nd_set_timer(ln, ND_TIMER_GC); 329 } 330 331 /* 332 * The first time we send a packet to a neighbor whose entry is 333 * STALE, we have to change the state to DELAY and a sets a timer to 334 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do 335 * neighbor unreachability detection on expiration. 336 * (RFC 2461 7.3.3) 337 */ 338 if (ln->ln_state == ND_LLINFO_STALE) { 339 ln->ln_asked = 0; 340 ln->ln_state = ND_LLINFO_DELAY; 341 nd_set_timer(ln, ND_TIMER_DELAY); 342 } 343 344 /* 345 * If the neighbor cache entry has a state other than INCOMPLETE 346 * (i.e. its link-layer address is already resolved), just 347 * send the packet. 348 */ 349 if (ln->ln_state > ND_LLINFO_INCOMPLETE) { 350 KASSERT((ln->la_flags & LLE_VALID) != 0); 351 memcpy(lldst, &ln->ll_addr, MIN(dstsize, ifp->if_addrlen)); 352 LLE_WUNLOCK(ln); 353 return 0; 354 } 355 356 /* 357 * There is a neighbor cache entry, but no ethernet address 358 * response yet. Append this latest packet to the end of the 359 * packet queue in the mbuf, unless the number of the packet 360 * does not exceed maxqueuelen. When it exceeds maxqueuelen, 361 * the oldest packet in the queue will be removed. 362 */ 363 if (ln->ln_state == ND_LLINFO_NOSTATE || 364 ln->ln_state == ND_LLINFO_WAITDELETE) { 365 ln->ln_asked = 0; 366 ln->ln_state = ND_LLINFO_INCOMPLETE; 367 } 368 369 #ifdef MBUFTRACE 370 m_claimm(m, ln->lle_tbl->llt_mowner); 371 #endif 372 if (ln->ln_hold != NULL) { 373 struct mbuf *m_hold; 374 int i; 375 376 i = 0; 377 for (m_hold = ln->ln_hold; m_hold; m_hold = m_hold->m_nextpkt) { 378 i++; 379 if (m_hold->m_nextpkt == NULL) { 380 m_hold->m_nextpkt = m; 381 break; 382 } 383 } 384 KASSERTMSG(ln->la_numheld == i, "la_numheld=%d i=%d", 385 ln->la_numheld, i); 386 while (i >= nd->nd_maxqueuelen) { 387 m_hold = ln->ln_hold; 388 ln->ln_hold = ln->ln_hold->m_nextpkt; 389 m_freem(m_hold); 390 i--; 391 ln->la_numheld--; 392 } 393 } else { 394 KASSERTMSG(ln->la_numheld == 0, "la_numheld=%d", 395 ln->la_numheld); 396 ln->ln_hold = m; 397 } 398 399 KASSERTMSG(ln->la_numheld < nd->nd_maxqueuelen, 400 "la_numheld=%d nd_maxqueuelen=%d", 401 ln->la_numheld, nd->nd_maxqueuelen); 402 ln->la_numheld++; 403 404 if ((ln->la_flags & LLE_UNRESOLVED) != 0) 405 error = (rt != NULL && rt->rt_flags & RTF_GATEWAY) ? 406 EHOSTUNREACH : EHOSTDOWN; 407 else 408 error = EWOULDBLOCK; 409 410 /* 411 * If there has been no NS for the neighbor after entering the 412 * INCOMPLETE state, send the first solicitation. 413 */ 414 if (!ND_IS_LLINFO_PERMANENT(ln) && ln->ln_asked == 0) { 415 struct psref psref; 416 union l3addr dst, src, *psrc; 417 418 ln->ln_asked++; 419 nd_set_timer(ln, ND_TIMER_RETRANS); 420 memcpy(&dst, &ln->r_l3addr, sizeof(dst)); 421 psrc = nd->nd_holdsrc(ln, &src); 422 if_acquire(ifp, &psref); 423 LLE_WUNLOCK(ln); 424 425 nd->nd_output(ifp, NULL, &dst, NULL, psrc); 426 if_release(ifp, &psref); 427 } else 428 LLE_WUNLOCK(ln); 429 430 return error; 431 } 432 433 void 434 nd_nud_hint(struct llentry *ln) 435 { 436 struct nd_domain *nd; 437 438 if (ln == NULL) 439 return; 440 441 LLE_WLOCK_ASSERT(ln); 442 443 if (ln->ln_state < ND_LLINFO_REACHABLE) 444 goto done; 445 446 nd = nd_find_domain(ln->lle_tbl->llt_af); 447 448 /* 449 * if we get upper-layer reachability confirmation many times, 450 * it is possible we have false information. 451 */ 452 ln->ln_byhint++; 453 if (ln->ln_byhint > nd->nd_maxnudhint) 454 goto done; 455 456 ln->ln_state = ND_LLINFO_REACHABLE; 457 if (!ND_IS_LLINFO_PERMANENT(ln)) 458 nd_set_timer(ln, ND_TIMER_REACHABLE); 459 460 done: 461 LLE_WUNLOCK(ln); 462 463 return; 464 } 465 466 static struct nd_domain * 467 nd_find_domain(int af) 468 { 469 470 KASSERT(af < __arraycount(nd_domains) && nd_domains[af] != NULL); 471 return nd_domains[af]; 472 } 473 474 void 475 nd_attach_domain(struct nd_domain *nd) 476 { 477 478 KASSERT(nd->nd_family < __arraycount(nd_domains)); 479 nd_domains[nd->nd_family] = nd; 480 } 481