nd.c revision 1.7.4.1 1 /* $NetBSD: nd.c,v 1.7.4.1 2025/08/29 15:19:39 martin Exp $ */
2
3 /*
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Roy Marples.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: nd.c,v 1.7.4.1 2025/08/29 15:19:39 martin Exp $");
32
33 #include <sys/callout.h>
34 #include <sys/mbuf.h>
35 #include <sys/socketvar.h> /* for softnet_lock */
36
37 #include <net/if_llatbl.h>
38 #include <net/nd.h>
39 #include <net/route.h>
40
41 #include <netinet/in.h>
42 #include <netinet/ip6.h>
43
44 static struct nd_domain *nd_domains[AF_MAX];
45
46 static int nd_gctimer = (60 * 60 * 24); /* 1 day: garbage collection timer */
47
48 static void nd_set_timertick(struct llentry *, time_t);
49 static struct nd_domain *nd_find_domain(int);
50
51 static void
52 nd_timer(void *arg)
53 {
54 struct llentry *ln = arg;
55 struct nd_domain *nd;
56 struct ifnet *ifp = NULL;
57 struct psref psref;
58 struct mbuf *m = NULL;
59 bool send_ns = false;
60 int16_t missed = ND_LLINFO_NOSTATE;
61 union l3addr taddr, *daddrp = NULL;
62
63 SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
64 LLE_WLOCK(ln);
65
66 if (!(ln->la_flags & LLE_LINKED))
67 goto out;
68 if (ln->ln_ntick > 0) {
69 nd_set_timer(ln, ND_TIMER_TICK);
70 goto out;
71 }
72
73 nd = nd_find_domain(ln->lle_tbl->llt_af);
74 ifp = ln->lle_tbl->llt_ifp;
75 KASSERT(ifp != NULL);
76 if_acquire(ifp, &psref);
77
78 memcpy(&taddr, &ln->r_l3addr, sizeof(taddr));
79
80 switch (ln->ln_state) {
81 case ND_LLINFO_WAITDELETE:
82 LLE_REMREF(ln);
83 nd->nd_free(ln, 0);
84 ln = NULL;
85 break;
86
87 case ND_LLINFO_INCOMPLETE:
88 if (ln->ln_asked < nd->nd_mmaxtries) {
89 ln->ln_asked++;
90 send_ns = true;
91 break;
92 }
93
94 if (ln->ln_hold) {
95 struct mbuf *m0, *mnxt;
96
97 /*
98 * Assuming every packet in ln_hold
99 * has the same IP header.
100 */
101 m = ln->ln_hold;
102 for (m0 = m->m_nextpkt; m0 != NULL; m0 = mnxt) {
103 mnxt = m0->m_nextpkt;
104 m0->m_nextpkt = NULL;
105 m_freem(m0);
106 }
107
108 m->m_nextpkt = NULL;
109 ln->ln_hold = NULL;
110 ln->la_numheld = 0;
111 }
112
113 KASSERTMSG(ln->la_numheld == 0, "la_numheld=%d",
114 ln->la_numheld);
115
116 missed = ND_LLINFO_INCOMPLETE;
117 ln->ln_state = ND_LLINFO_WAITDELETE;
118 break;
119
120 case ND_LLINFO_REACHABLE:
121 if (!ND_IS_LLINFO_PERMANENT(ln))
122 ln->ln_state = ND_LLINFO_STALE;
123 break;
124
125 case ND_LLINFO_PURGE: /* FALLTHROUGH */
126 case ND_LLINFO_STALE:
127 if (!ND_IS_LLINFO_PERMANENT(ln)) {
128 LLE_REMREF(ln);
129 nd->nd_free(ln, 1);
130 ln = NULL;
131 }
132 break;
133
134 case ND_LLINFO_DELAY:
135 if (nd->nd_nud_enabled(ifp)) {
136 ln->ln_asked = 1;
137 ln->ln_state = ND_LLINFO_PROBE;
138 send_ns = true;
139 daddrp = &taddr;
140 } else
141 ln->ln_state = ND_LLINFO_STALE;
142 break;
143
144 case ND_LLINFO_PROBE:
145 send_ns = true;
146 if (ln->ln_asked++ < nd->nd_umaxtries) {
147 daddrp = &taddr;
148 } else {
149 ln->ln_state = ND_LLINFO_UNREACHABLE;
150 ln->ln_asked = 1;
151 missed = ND_LLINFO_PROBE;
152 /* nd_missed() consumers can use missed to know if
153 * they need to send ICMP UNREACHABLE or not. */
154 }
155 break;
156 case ND_LLINFO_UNREACHABLE:
157 /*
158 * RFC 7048 Section 3 says in the UNREACHABLE state
159 * packets continue to be sent to the link-layer address and
160 * then backoff exponentially.
161 * We adjust this slightly and move to the INCOMPLETE state
162 * after nd_mmaxtries probes and then start backing off.
163 *
164 * This results in simpler code whilst providing a more robust
165 * model which doubles the time to failure over what we did
166 * before. We don't want to be back to the old ARP model where
167 * no unreachability errors are returned because very
168 * few applications would look at unreachability hints provided
169 * such as ND_LLINFO_UNREACHABLE or RTM_MISS.
170 */
171 send_ns = true;
172 if (ln->ln_asked++ < nd->nd_mmaxtries)
173 break;
174
175 missed = ND_LLINFO_UNREACHABLE;
176 ln->ln_state = ND_LLINFO_WAITDELETE;
177 ln->la_flags &= ~LLE_VALID;
178 break;
179 }
180
181 if (ln != NULL) {
182 int type = ND_TIMER_RETRANS;
183 if (ln->ln_state == ND_LLINFO_WAITDELETE)
184 type = ND_TIMER_RETRANS_BACKOFF;
185 else if (ln->ln_state == ND_LLINFO_STALE)
186 type = ND_TIMER_GC;
187 nd_set_timer(ln, type);
188 }
189 if (send_ns) {
190 uint8_t lladdr[255], *lladdrp;
191 union l3addr src, *psrc;
192
193 if (ln->ln_state > ND_LLINFO_INCOMPLETE &&
194 ln->la_flags & LLE_VALID)
195 {
196 KASSERT(sizeof(lladdr) >= ifp->if_addrlen);
197 memcpy(lladdr, &ln->ll_addr, ifp->if_addrlen);
198 lladdrp = lladdr;
199 } else
200 lladdrp = NULL;
201 psrc = nd->nd_holdsrc(ln, &src);
202 LLE_FREE_LOCKED(ln);
203 ln = NULL;
204 nd->nd_output(ifp, daddrp, &taddr, lladdrp, psrc);
205 }
206
207 out:
208 if (ln != NULL)
209 LLE_FREE_LOCKED(ln);
210 SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
211
212 if (missed != ND_LLINFO_NOSTATE)
213 nd->nd_missed(ifp, &taddr, missed, m);
214 if (ifp != NULL)
215 if_release(ifp, &psref);
216 }
217
218 static void
219 nd_set_timertick(struct llentry *ln, time_t xtick)
220 {
221
222 CTASSERT(sizeof(time_t) > sizeof(int));
223 KASSERT(xtick >= 0);
224
225 /*
226 * We have to take care of a reference leak which occurs if
227 * callout_reset overwrites a pending callout schedule. Unfortunately
228 * we don't have a mean to know the overwrite, so we need to know it
229 * using callout_stop. We need to call callout_pending first to exclude
230 * the case that the callout has never been scheduled.
231 */
232 if (callout_pending(&ln->la_timer)) {
233 bool expired;
234
235 expired = callout_stop(&ln->la_timer);
236 if (!expired)
237 LLE_REMREF(ln);
238 }
239
240 ln->ln_expire = time_uptime + xtick / hz;
241 LLE_ADDREF(ln);
242 if (xtick > INT_MAX) {
243 ln->ln_ntick = xtick - INT_MAX;
244 xtick = INT_MAX;
245 } else {
246 ln->ln_ntick = 0;
247 }
248 callout_reset(&ln->ln_timer_ch, xtick, nd_timer, ln);
249 }
250
251 void
252 nd_set_timer(struct llentry *ln, int type)
253 {
254 time_t xtick;
255 struct ifnet *ifp;
256 struct nd_domain *nd;
257
258 LLE_WLOCK_ASSERT(ln);
259
260 ifp = ln->lle_tbl->llt_ifp;
261 nd = nd_find_domain(ln->lle_tbl->llt_af);
262
263 switch (type) {
264 case ND_TIMER_IMMEDIATE:
265 xtick = 0;
266 break;
267 case ND_TIMER_TICK:
268 xtick = ln->ln_ntick;
269 break;
270 case ND_TIMER_RETRANS:
271 xtick = nd->nd_retrans(ifp) * hz / 1000;
272 break;
273 case ND_TIMER_RETRANS_BACKOFF:
274 {
275 unsigned int retrans = nd->nd_retrans(ifp);
276 unsigned int attempts = ln->ln_asked - nd->nd_mmaxtries;
277
278 xtick = retrans;
279 while (attempts-- != 0) {
280 xtick *= nd->nd_retransmultiple;
281 if (xtick > nd->nd_maxretrans || xtick < retrans) {
282 xtick = nd->nd_maxretrans;
283 break;
284 }
285 }
286 xtick = xtick * hz / 1000;
287 break;
288 }
289 case ND_TIMER_REACHABLE:
290 xtick = nd->nd_reachable(ifp) * hz / 1000;
291 break;
292 case ND_TIMER_EXPIRE:
293 if (ln->ln_expire > time_uptime)
294 xtick = (ln->ln_expire - time_uptime) * hz;
295 else
296 xtick = nd_gctimer * hz;
297 break;
298 case ND_TIMER_DELAY:
299 xtick = nd->nd_delay * hz;
300 break;
301 case ND_TIMER_GC:
302 xtick = nd_gctimer * hz;
303 break;
304 default:
305 panic("%s: invalid timer type\n", __func__);
306 }
307
308 nd_set_timertick(ln, xtick);
309 }
310
311 int
312 nd_resolve(struct llentry *ln, const struct rtentry *rt, struct mbuf *m,
313 uint8_t *lldst, size_t dstsize)
314 {
315 struct ifnet *ifp;
316 struct nd_domain *nd;
317 int error;
318
319 LLE_WLOCK_ASSERT(ln);
320
321 ifp = ln->lle_tbl->llt_ifp;
322 nd = nd_find_domain(ln->lle_tbl->llt_af);
323
324 /* We don't have to do link-layer address resolution on a p2p link. */
325 if (ifp->if_flags & IFF_POINTOPOINT &&
326 ln->ln_state < ND_LLINFO_REACHABLE)
327 {
328 ln->ln_state = ND_LLINFO_STALE;
329 nd_set_timer(ln, ND_TIMER_GC);
330 }
331
332 /*
333 * The first time we send a packet to a neighbor whose entry is
334 * STALE, we have to change the state to DELAY and a sets a timer to
335 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
336 * neighbor unreachability detection on expiration.
337 * (RFC 2461 7.3.3)
338 */
339 if (ln->ln_state == ND_LLINFO_STALE) {
340 ln->ln_asked = 0;
341 ln->ln_state = ND_LLINFO_DELAY;
342 nd_set_timer(ln, ND_TIMER_DELAY);
343 }
344
345 /*
346 * If the neighbor cache entry has a state other than INCOMPLETE
347 * (i.e. its link-layer address is already resolved), just
348 * send the packet.
349 */
350 if (ln->ln_state > ND_LLINFO_INCOMPLETE) {
351 KASSERT((ln->la_flags & LLE_VALID) != 0);
352 memcpy(lldst, &ln->ll_addr, MIN(dstsize, ifp->if_addrlen));
353 LLE_WUNLOCK(ln);
354 return 0;
355 }
356
357 /*
358 * There is a neighbor cache entry, but no ethernet address
359 * response yet. Append this latest packet to the end of the
360 * packet queue in the mbuf, unless the number of the packet
361 * does not exceed maxqueuelen. When it exceeds maxqueuelen,
362 * the oldest packet in the queue will be removed.
363 */
364 if (ln->ln_state == ND_LLINFO_NOSTATE ||
365 ln->ln_state == ND_LLINFO_WAITDELETE)
366 ln->ln_state = ND_LLINFO_INCOMPLETE;
367
368 #ifdef MBUFTRACE
369 m_claimm(m, ln->lle_tbl->llt_mowner);
370 #endif
371 if (ln->ln_hold != NULL) {
372 struct mbuf *m_hold;
373 int i;
374
375 i = 0;
376 for (m_hold = ln->ln_hold; m_hold; m_hold = m_hold->m_nextpkt) {
377 i++;
378 if (m_hold->m_nextpkt == NULL) {
379 m_hold->m_nextpkt = m;
380 break;
381 }
382 }
383 KASSERTMSG(ln->la_numheld == i, "la_numheld=%d i=%d",
384 ln->la_numheld, i);
385 while (i >= nd->nd_maxqueuelen) {
386 m_hold = ln->ln_hold;
387 ln->ln_hold = ln->ln_hold->m_nextpkt;
388 m_freem(m_hold);
389 i--;
390 ln->la_numheld--;
391 }
392 } else {
393 KASSERTMSG(ln->la_numheld == 0, "la_numheld=%d",
394 ln->la_numheld);
395 ln->ln_hold = m;
396 }
397
398 KASSERTMSG(ln->la_numheld < nd->nd_maxqueuelen,
399 "la_numheld=%d nd_maxqueuelen=%d",
400 ln->la_numheld, nd->nd_maxqueuelen);
401 ln->la_numheld++;
402
403 if (ln->ln_asked >= nd->nd_mmaxtries)
404 error = (rt != NULL && rt->rt_flags & RTF_GATEWAY) ?
405 EHOSTUNREACH : EHOSTDOWN;
406 else
407 error = EWOULDBLOCK;
408
409 /*
410 * If there has been no NS for the neighbor after entering the
411 * INCOMPLETE state, send the first solicitation.
412 */
413 if (!ND_IS_LLINFO_PERMANENT(ln) && ln->ln_asked == 0) {
414 struct psref psref;
415 union l3addr dst, src, *psrc;
416
417 ln->ln_asked++;
418 nd_set_timer(ln, ND_TIMER_RETRANS);
419 memcpy(&dst, &ln->r_l3addr, sizeof(dst));
420 psrc = nd->nd_holdsrc(ln, &src);
421 if_acquire(ifp, &psref);
422 LLE_WUNLOCK(ln);
423
424 nd->nd_output(ifp, NULL, &dst, NULL, psrc);
425 if_release(ifp, &psref);
426 } else
427 LLE_WUNLOCK(ln);
428
429 return error;
430 }
431
432 void
433 nd_nud_hint(struct llentry *ln)
434 {
435 struct nd_domain *nd;
436
437 if (ln == NULL)
438 return;
439
440 LLE_WLOCK_ASSERT(ln);
441
442 if (ln->ln_state < ND_LLINFO_REACHABLE)
443 goto done;
444
445 nd = nd_find_domain(ln->lle_tbl->llt_af);
446
447 /*
448 * if we get upper-layer reachability confirmation many times,
449 * it is possible we have false information.
450 */
451 ln->ln_byhint++;
452 if (ln->ln_byhint > nd->nd_maxnudhint)
453 goto done;
454
455 ln->ln_state = ND_LLINFO_REACHABLE;
456 if (!ND_IS_LLINFO_PERMANENT(ln))
457 nd_set_timer(ln, ND_TIMER_REACHABLE);
458
459 done:
460 LLE_WUNLOCK(ln);
461
462 return;
463 }
464
465 static struct nd_domain *
466 nd_find_domain(int af)
467 {
468
469 KASSERT(af < __arraycount(nd_domains) && nd_domains[af] != NULL);
470 return nd_domains[af];
471 }
472
473 void
474 nd_attach_domain(struct nd_domain *nd)
475 {
476
477 KASSERT(nd->nd_family < __arraycount(nd_domains));
478 nd_domains[nd->nd_family] = nd;
479 }
480