1 /* $NetBSD: nd.c,v 1.8 2025/08/18 06:46:43 ozaki-r Exp $ */ 2 3 /* 4 * Copyright (c) 2020 The NetBSD Foundation, Inc. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Roy Marples. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __KERNEL_RCSID(0, "$NetBSD: nd.c,v 1.8 2025/08/18 06:46:43 ozaki-r Exp $"); 32 33 #include <sys/callout.h> 34 #include <sys/mbuf.h> 35 #include <sys/socketvar.h> /* for softnet_lock */ 36 37 #include <net/if_llatbl.h> 38 #include <net/nd.h> 39 #include <net/route.h> 40 41 #include <netinet/in.h> 42 #include <netinet/ip6.h> 43 44 static struct nd_domain *nd_domains[AF_MAX]; 45 46 static int nd_gctimer = (60 * 60 * 24); /* 1 day: garbage collection timer */ 47 48 static void nd_set_timertick(struct llentry *, time_t); 49 static struct nd_domain *nd_find_domain(int); 50 51 static void 52 nd_timer(void *arg) 53 { 54 struct llentry *ln = arg; 55 struct nd_domain *nd; 56 struct ifnet *ifp = NULL; 57 struct psref psref; 58 struct mbuf *m = NULL; 59 bool send_ns = false; 60 int16_t missed = ND_LLINFO_NOSTATE; 61 union l3addr taddr, *daddrp = NULL; 62 63 SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE(); 64 LLE_WLOCK(ln); 65 66 if (!(ln->la_flags & LLE_LINKED)) 67 goto out; 68 if (ln->ln_ntick > 0) { 69 nd_set_timer(ln, ND_TIMER_TICK); 70 goto out; 71 } 72 73 nd = nd_find_domain(ln->lle_tbl->llt_af); 74 ifp = ln->lle_tbl->llt_ifp; 75 KASSERT(ifp != NULL); 76 if_acquire(ifp, &psref); 77 78 memcpy(&taddr, &ln->r_l3addr, sizeof(taddr)); 79 80 switch (ln->ln_state) { 81 case ND_LLINFO_WAITDELETE: 82 LLE_REMREF(ln); 83 nd->nd_free(ln, 0); 84 ln = NULL; 85 break; 86 87 case ND_LLINFO_INCOMPLETE: 88 if (ln->ln_asked < nd->nd_mmaxtries) { 89 ln->ln_asked++; 90 send_ns = true; 91 break; 92 } 93 94 if (ln->ln_hold) { 95 struct mbuf *m0, *mnxt; 96 97 /* 98 * Assuming every packet in ln_hold 99 * has the same IP header. 100 */ 101 m = ln->ln_hold; 102 for (m0 = m->m_nextpkt; m0 != NULL; m0 = mnxt) { 103 mnxt = m0->m_nextpkt; 104 m0->m_nextpkt = NULL; 105 m_freem(m0); 106 } 107 108 m->m_nextpkt = NULL; 109 ln->ln_hold = NULL; 110 ln->la_numheld = 0; 111 } 112 113 KASSERTMSG(ln->la_numheld == 0, "la_numheld=%d", 114 ln->la_numheld); 115 116 missed = ND_LLINFO_INCOMPLETE; 117 ln->ln_state = ND_LLINFO_WAITDELETE; 118 break; 119 120 case ND_LLINFO_REACHABLE: 121 if (!ND_IS_LLINFO_PERMANENT(ln)) 122 ln->ln_state = ND_LLINFO_STALE; 123 break; 124 125 case ND_LLINFO_PURGE: /* FALLTHROUGH */ 126 case ND_LLINFO_STALE: 127 if (!ND_IS_LLINFO_PERMANENT(ln)) { 128 LLE_REMREF(ln); 129 nd->nd_free(ln, 1); 130 ln = NULL; 131 } 132 break; 133 134 case ND_LLINFO_DELAY: 135 if (nd->nd_nud_enabled(ifp)) { 136 ln->ln_asked = 1; 137 ln->ln_state = ND_LLINFO_PROBE; 138 send_ns = true; 139 daddrp = &taddr; 140 } else 141 ln->ln_state = ND_LLINFO_STALE; 142 break; 143 144 case ND_LLINFO_PROBE: 145 send_ns = true; 146 if (ln->ln_asked++ < nd->nd_umaxtries) { 147 daddrp = &taddr; 148 } else { 149 ln->ln_state = ND_LLINFO_UNREACHABLE; 150 ln->ln_asked = 1; 151 missed = ND_LLINFO_PROBE; 152 /* nd_missed() consumers can use missed to know if 153 * they need to send ICMP UNREACHABLE or not. */ 154 } 155 break; 156 case ND_LLINFO_UNREACHABLE: 157 /* 158 * RFC 7048 Section 3 says in the UNREACHABLE state 159 * packets continue to be sent to the link-layer address and 160 * then backoff exponentially. 161 * We adjust this slightly and move to the INCOMPLETE state 162 * after nd_mmaxtries probes and then start backing off. 163 * 164 * This results in simpler code whilst providing a more robust 165 * model which doubles the time to failure over what we did 166 * before. We don't want to be back to the old ARP model where 167 * no unreachability errors are returned because very 168 * few applications would look at unreachability hints provided 169 * such as ND_LLINFO_UNREACHABLE or RTM_MISS. 170 */ 171 send_ns = true; 172 if (ln->ln_asked++ < nd->nd_mmaxtries) 173 break; 174 175 missed = ND_LLINFO_UNREACHABLE; 176 ln->ln_state = ND_LLINFO_WAITDELETE; 177 ln->la_flags &= ~LLE_VALID; 178 break; 179 } 180 181 if (ln != NULL) { 182 int type = ND_TIMER_RETRANS; 183 if (ln->ln_state == ND_LLINFO_WAITDELETE) 184 type = ND_TIMER_RETRANS_BACKOFF; 185 else if (ln->ln_state == ND_LLINFO_STALE) 186 type = ND_TIMER_GC; 187 nd_set_timer(ln, type); 188 } 189 if (send_ns) { 190 uint8_t lladdr[255], *lladdrp; 191 union l3addr src, *psrc; 192 193 if (ln->ln_state > ND_LLINFO_INCOMPLETE && 194 ln->la_flags & LLE_VALID) 195 { 196 KASSERT(sizeof(lladdr) >= ifp->if_addrlen); 197 memcpy(lladdr, &ln->ll_addr, ifp->if_addrlen); 198 lladdrp = lladdr; 199 } else 200 lladdrp = NULL; 201 psrc = nd->nd_holdsrc(ln, &src); 202 LLE_FREE_LOCKED(ln); 203 ln = NULL; 204 nd->nd_output(ifp, daddrp, &taddr, lladdrp, psrc); 205 } 206 207 out: 208 if (ln != NULL) 209 LLE_FREE_LOCKED(ln); 210 SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE(); 211 212 if (missed != ND_LLINFO_NOSTATE) 213 nd->nd_missed(ifp, &taddr, missed, m); 214 if (ifp != NULL) 215 if_release(ifp, &psref); 216 } 217 218 static void 219 nd_set_timertick(struct llentry *ln, time_t xtick) 220 { 221 222 CTASSERT(sizeof(time_t) > sizeof(int)); 223 KASSERT(xtick >= 0); 224 225 /* 226 * We have to take care of a reference leak which occurs if 227 * callout_reset overwrites a pending callout schedule. Unfortunately 228 * we don't have a mean to know the overwrite, so we need to know it 229 * using callout_stop. We need to call callout_pending first to exclude 230 * the case that the callout has never been scheduled. 231 */ 232 if (callout_pending(&ln->la_timer)) { 233 bool expired; 234 235 expired = callout_stop(&ln->la_timer); 236 if (!expired) 237 LLE_REMREF(ln); 238 } 239 240 ln->ln_expire = time_uptime + xtick / hz; 241 LLE_ADDREF(ln); 242 if (xtick > INT_MAX) { 243 ln->ln_ntick = xtick - INT_MAX; 244 xtick = INT_MAX; 245 } else { 246 ln->ln_ntick = 0; 247 } 248 callout_reset(&ln->ln_timer_ch, xtick, nd_timer, ln); 249 } 250 251 void 252 nd_set_timer(struct llentry *ln, int type) 253 { 254 time_t xtick; 255 struct ifnet *ifp; 256 struct nd_domain *nd; 257 258 LLE_WLOCK_ASSERT(ln); 259 260 ifp = ln->lle_tbl->llt_ifp; 261 nd = nd_find_domain(ln->lle_tbl->llt_af); 262 263 switch (type) { 264 case ND_TIMER_IMMEDIATE: 265 xtick = 0; 266 break; 267 case ND_TIMER_TICK: 268 xtick = ln->ln_ntick; 269 break; 270 case ND_TIMER_RETRANS: 271 xtick = nd->nd_retrans(ifp) * hz / 1000; 272 break; 273 case ND_TIMER_RETRANS_BACKOFF: 274 { 275 unsigned int retrans = nd->nd_retrans(ifp); 276 unsigned int attempts = ln->ln_asked - nd->nd_mmaxtries; 277 278 xtick = retrans; 279 while (attempts-- != 0) { 280 xtick *= nd->nd_retransmultiple; 281 if (xtick > nd->nd_maxretrans || xtick < retrans) { 282 xtick = nd->nd_maxretrans; 283 break; 284 } 285 } 286 xtick = xtick * hz / 1000; 287 break; 288 } 289 case ND_TIMER_REACHABLE: 290 xtick = nd->nd_reachable(ifp) * hz / 1000; 291 break; 292 case ND_TIMER_EXPIRE: 293 if (ln->ln_expire > time_uptime) 294 xtick = (ln->ln_expire - time_uptime) * hz; 295 else 296 xtick = nd_gctimer * hz; 297 break; 298 case ND_TIMER_DELAY: 299 xtick = nd->nd_delay * hz; 300 break; 301 case ND_TIMER_GC: 302 xtick = nd_gctimer * hz; 303 break; 304 default: 305 panic("%s: invalid timer type\n", __func__); 306 } 307 308 nd_set_timertick(ln, xtick); 309 } 310 311 int 312 nd_resolve(struct llentry *ln, const struct rtentry *rt, struct mbuf *m, 313 uint8_t *lldst, size_t dstsize) 314 { 315 struct ifnet *ifp; 316 struct nd_domain *nd; 317 int error; 318 319 LLE_WLOCK_ASSERT(ln); 320 321 ifp = ln->lle_tbl->llt_ifp; 322 nd = nd_find_domain(ln->lle_tbl->llt_af); 323 324 /* We don't have to do link-layer address resolution on a p2p link. */ 325 if (ifp->if_flags & IFF_POINTOPOINT && 326 ln->ln_state < ND_LLINFO_REACHABLE) 327 { 328 ln->ln_state = ND_LLINFO_STALE; 329 nd_set_timer(ln, ND_TIMER_GC); 330 } 331 332 /* 333 * The first time we send a packet to a neighbor whose entry is 334 * STALE, we have to change the state to DELAY and a sets a timer to 335 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do 336 * neighbor unreachability detection on expiration. 337 * (RFC 2461 7.3.3) 338 */ 339 if (ln->ln_state == ND_LLINFO_STALE) { 340 ln->ln_asked = 0; 341 ln->ln_state = ND_LLINFO_DELAY; 342 nd_set_timer(ln, ND_TIMER_DELAY); 343 } 344 345 /* 346 * If the neighbor cache entry has a state other than INCOMPLETE 347 * (i.e. its link-layer address is already resolved), just 348 * send the packet. 349 */ 350 if (ln->ln_state > ND_LLINFO_INCOMPLETE) { 351 KASSERT((ln->la_flags & LLE_VALID) != 0); 352 memcpy(lldst, &ln->ll_addr, MIN(dstsize, ifp->if_addrlen)); 353 LLE_WUNLOCK(ln); 354 return 0; 355 } 356 357 /* 358 * There is a neighbor cache entry, but no ethernet address 359 * response yet. Append this latest packet to the end of the 360 * packet queue in the mbuf, unless the number of the packet 361 * does not exceed maxqueuelen. When it exceeds maxqueuelen, 362 * the oldest packet in the queue will be removed. 363 */ 364 if (ln->ln_state == ND_LLINFO_NOSTATE || 365 ln->ln_state == ND_LLINFO_WAITDELETE) 366 ln->ln_state = ND_LLINFO_INCOMPLETE; 367 368 #ifdef MBUFTRACE 369 m_claimm(m, ln->lle_tbl->llt_mowner); 370 #endif 371 if (ln->ln_hold != NULL) { 372 struct mbuf *m_hold; 373 int i; 374 375 i = 0; 376 for (m_hold = ln->ln_hold; m_hold; m_hold = m_hold->m_nextpkt) { 377 i++; 378 if (m_hold->m_nextpkt == NULL) { 379 m_hold->m_nextpkt = m; 380 break; 381 } 382 } 383 KASSERTMSG(ln->la_numheld == i, "la_numheld=%d i=%d", 384 ln->la_numheld, i); 385 while (i >= nd->nd_maxqueuelen) { 386 m_hold = ln->ln_hold; 387 ln->ln_hold = ln->ln_hold->m_nextpkt; 388 m_freem(m_hold); 389 i--; 390 ln->la_numheld--; 391 } 392 } else { 393 KASSERTMSG(ln->la_numheld == 0, "la_numheld=%d", 394 ln->la_numheld); 395 ln->ln_hold = m; 396 } 397 398 KASSERTMSG(ln->la_numheld < nd->nd_maxqueuelen, 399 "la_numheld=%d nd_maxqueuelen=%d", 400 ln->la_numheld, nd->nd_maxqueuelen); 401 ln->la_numheld++; 402 403 if (ln->ln_asked >= nd->nd_mmaxtries) 404 error = (rt != NULL && rt->rt_flags & RTF_GATEWAY) ? 405 EHOSTUNREACH : EHOSTDOWN; 406 else 407 error = EWOULDBLOCK; 408 409 /* 410 * If there has been no NS for the neighbor after entering the 411 * INCOMPLETE state, send the first solicitation. 412 */ 413 if (!ND_IS_LLINFO_PERMANENT(ln) && ln->ln_asked == 0) { 414 struct psref psref; 415 union l3addr dst, src, *psrc; 416 417 ln->ln_asked++; 418 nd_set_timer(ln, ND_TIMER_RETRANS); 419 memcpy(&dst, &ln->r_l3addr, sizeof(dst)); 420 psrc = nd->nd_holdsrc(ln, &src); 421 if_acquire(ifp, &psref); 422 LLE_WUNLOCK(ln); 423 424 nd->nd_output(ifp, NULL, &dst, NULL, psrc); 425 if_release(ifp, &psref); 426 } else 427 LLE_WUNLOCK(ln); 428 429 return error; 430 } 431 432 void 433 nd_nud_hint(struct llentry *ln) 434 { 435 struct nd_domain *nd; 436 437 if (ln == NULL) 438 return; 439 440 LLE_WLOCK_ASSERT(ln); 441 442 if (ln->ln_state < ND_LLINFO_REACHABLE) 443 goto done; 444 445 nd = nd_find_domain(ln->lle_tbl->llt_af); 446 447 /* 448 * if we get upper-layer reachability confirmation many times, 449 * it is possible we have false information. 450 */ 451 ln->ln_byhint++; 452 if (ln->ln_byhint > nd->nd_maxnudhint) 453 goto done; 454 455 ln->ln_state = ND_LLINFO_REACHABLE; 456 if (!ND_IS_LLINFO_PERMANENT(ln)) 457 nd_set_timer(ln, ND_TIMER_REACHABLE); 458 459 done: 460 LLE_WUNLOCK(ln); 461 462 return; 463 } 464 465 static struct nd_domain * 466 nd_find_domain(int af) 467 { 468 469 KASSERT(af < __arraycount(nd_domains) && nd_domains[af] != NULL); 470 return nd_domains[af]; 471 } 472 473 void 474 nd_attach_domain(struct nd_domain *nd) 475 { 476 477 KASSERT(nd->nd_family < __arraycount(nd_domains)); 478 nd_domains[nd->nd_family] = nd; 479 } 480