nd.c revision 1.4 1 /* $NetBSD: nd.c,v 1.4 2020/09/15 23:40:03 roy Exp $ */
2
3 /*
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Roy Marples.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: nd.c,v 1.4 2020/09/15 23:40:03 roy Exp $");
32
33 #include <sys/callout.h>
34 #include <sys/mbuf.h>
35 #include <sys/socketvar.h> /* for softnet_lock */
36
37 #include <net/if_llatbl.h>
38 #include <net/nd.h>
39 #include <net/route.h>
40
41 #include <netinet/in.h>
42 #include <netinet/ip6.h>
43
44 static struct nd_domain *nd_domains[AF_MAX];
45
46 static int nd_gctimer = (60 * 60 * 24); /* 1 day: garbage collection timer */
47
48 static void nd_set_timertick(struct llentry *, time_t);
49 static struct nd_domain *nd_find_domain(int);
50
51 static void
52 nd_timer(void *arg)
53 {
54 struct llentry *ln = arg;
55 struct nd_domain *nd;
56 struct ifnet *ifp = NULL;
57 struct psref psref;
58 struct mbuf *m = NULL;
59 bool send_ns = false;
60 int16_t missed = ND_LLINFO_NOSTATE;
61 union l3addr taddr, *daddrp = NULL;
62
63 SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
64 LLE_WLOCK(ln);
65
66 if (!(ln->la_flags & LLE_LINKED))
67 goto out;
68 if (ln->ln_ntick > 0) {
69 nd_set_timer(ln, ND_TIMER_TICK);
70 goto out;
71 }
72
73 nd = nd_find_domain(ln->lle_tbl->llt_af);
74 ifp = ln->lle_tbl->llt_ifp;
75 KASSERT(ifp != NULL);
76 if_acquire(ifp, &psref);
77
78 memcpy(&taddr, &ln->r_l3addr, sizeof(taddr));
79
80 switch (ln->ln_state) {
81 case ND_LLINFO_WAITDELETE:
82 LLE_REMREF(ln);
83 nd->nd_free(ln, 0);
84 ln = NULL;
85 break;
86
87 case ND_LLINFO_INCOMPLETE:
88 send_ns = true;
89 if (ln->ln_asked++ < nd->nd_mmaxtries)
90 break;
91
92 if (ln->ln_hold) {
93 struct mbuf *m0, *mnxt;
94
95 /*
96 * Assuming every packet in ln_hold
97 * has the same IP header.
98 */
99 m = ln->ln_hold;
100 for (m0 = m->m_nextpkt; m0 != NULL; m0 = mnxt) {
101 mnxt = m0->m_nextpkt;
102 m0->m_nextpkt = NULL;
103 m_freem(m0);
104 }
105
106 m->m_nextpkt = NULL;
107 ln->ln_hold = NULL;
108 }
109
110 missed = ND_LLINFO_INCOMPLETE;
111 ln->ln_state = ND_LLINFO_WAITDELETE;
112 break;
113
114 case ND_LLINFO_REACHABLE:
115 if (!ND_IS_LLINFO_PERMANENT(ln)) {
116 ln->ln_state = ND_LLINFO_STALE;
117 nd_set_timer(ln, ND_TIMER_GC);
118 }
119 break;
120
121 case ND_LLINFO_PURGE: /* FALLTHROUGH */
122 case ND_LLINFO_STALE:
123 if (!ND_IS_LLINFO_PERMANENT(ln)) {
124 LLE_REMREF(ln);
125 nd->nd_free(ln, 1);
126 ln = NULL;
127 }
128 break;
129
130 case ND_LLINFO_DELAY:
131 if (nd->nd_nud_enabled(ifp)) {
132 ln->ln_asked = 1;
133 ln->ln_state = ND_LLINFO_PROBE;
134 send_ns = true;
135 daddrp = &taddr;
136 } else {
137 ln->ln_state = ND_LLINFO_STALE;
138 nd_set_timer(ln, ND_TIMER_GC);
139 }
140 break;
141
142 case ND_LLINFO_PROBE:
143 send_ns = true;
144 if (ln->ln_asked++ < nd->nd_umaxtries) {
145 daddrp = &taddr;
146 } else {
147 ln->ln_state = ND_LLINFO_UNREACHABLE;
148 ln->ln_asked = 1;
149 missed = ND_LLINFO_PROBE;
150 /* nd_missed() consumers can use missed to know if
151 * they need to send ICMP UNREACHABLE or not. */
152 }
153 break;
154 case ND_LLINFO_UNREACHABLE:
155 /*
156 * RFC 7048 Section 3 says in the UNREACHABLE state
157 * packets continue to be sent to the link-layer address and
158 * then backoff exponentially.
159 * We adjust this slightly and move to the INCOMPLETE state
160 * after nd_mmaxtries probes and then start backing off.
161 *
162 * This results in simpler code whilst providing a more robust
163 * model which doubles the time to failure over what we did
164 * before. We don't want to be back to the old ARP model where
165 * no unreachability errors are returned because very
166 * few applications would look at unreachability hints provided
167 * such as ND_LLINFO_UNREACHABLE or RTM_MISS.
168 */
169 send_ns = true;
170 if (ln->ln_asked++ < nd->nd_mmaxtries)
171 break;
172
173 missed = ND_LLINFO_UNREACHABLE;
174 ln->ln_state = ND_LLINFO_WAITDELETE;
175 ln->la_flags &= ~LLE_VALID;
176 break;
177 }
178
179 if (send_ns) {
180 uint8_t lladdr[255], *lladdrp;
181 union l3addr src, *psrc;
182
183 if (ln->ln_state == ND_LLINFO_WAITDELETE)
184 nd_set_timer(ln, ND_TIMER_RETRANS_BACKOFF);
185 else
186 nd_set_timer(ln, ND_TIMER_RETRANS);
187 if (ln->ln_state > ND_LLINFO_INCOMPLETE &&
188 ln->la_flags & LLE_VALID)
189 {
190 KASSERT(sizeof(lladdr) >= ifp->if_addrlen);
191 memcpy(lladdr, &ln->ll_addr, ifp->if_addrlen);
192 lladdrp = lladdr;
193 } else
194 lladdrp = NULL;
195 psrc = nd->nd_holdsrc(ln, &src);
196 LLE_FREE_LOCKED(ln);
197 ln = NULL;
198 nd->nd_output(ifp, daddrp, &taddr, lladdrp, psrc);
199 }
200
201 out:
202 if (ln != NULL)
203 LLE_FREE_LOCKED(ln);
204 SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
205
206 if (missed != ND_LLINFO_NOSTATE)
207 nd->nd_missed(ifp, &taddr, missed, m);
208 if (ifp != NULL)
209 if_release(ifp, &psref);
210 }
211
212 static void
213 nd_set_timertick(struct llentry *ln, time_t xtick)
214 {
215
216 CTASSERT(sizeof(time_t) > sizeof(int));
217 KASSERT(xtick >= 0);
218
219 /*
220 * We have to take care of a reference leak which occurs if
221 * callout_reset overwrites a pending callout schedule. Unfortunately
222 * we don't have a mean to know the overwrite, so we need to know it
223 * using callout_stop. We need to call callout_pending first to exclude
224 * the case that the callout has never been scheduled.
225 */
226 if (callout_pending(&ln->la_timer)) {
227 bool expired;
228
229 expired = callout_stop(&ln->la_timer);
230 if (!expired)
231 LLE_REMREF(ln);
232 }
233
234 ln->ln_expire = time_uptime + xtick / hz;
235 LLE_ADDREF(ln);
236 if (xtick > INT_MAX) {
237 ln->ln_ntick = xtick - INT_MAX;
238 xtick = INT_MAX;
239 } else {
240 ln->ln_ntick = 0;
241 }
242 callout_reset(&ln->ln_timer_ch, xtick, nd_timer, ln);
243 }
244
245 void
246 nd_set_timer(struct llentry *ln, int type)
247 {
248 time_t xtick;
249 struct ifnet *ifp;
250 struct nd_domain *nd;
251
252 LLE_WLOCK_ASSERT(ln);
253
254 ifp = ln->lle_tbl->llt_ifp;
255 nd = nd_find_domain(ln->lle_tbl->llt_af);
256
257 switch (type) {
258 case ND_TIMER_IMMEDIATE:
259 xtick = 0;
260 break;
261 case ND_TIMER_TICK:
262 xtick = ln->ln_ntick;
263 break;
264 case ND_TIMER_RETRANS:
265 xtick = nd->nd_retrans(ifp) * hz / 1000;
266 break;
267 case ND_TIMER_RETRANS_BACKOFF:
268 {
269 unsigned int retrans = nd->nd_retrans(ifp);
270 unsigned int attempts = ln->ln_asked - nd->nd_mmaxtries;
271
272 xtick = retrans;
273 while (attempts-- != 0) {
274 xtick *= nd->nd_retransmultiple;
275 if (xtick > nd->nd_maxretrans || xtick < retrans) {
276 xtick = nd->nd_maxretrans;
277 break;
278 }
279 }
280 xtick = xtick * hz / 1000;
281 break;
282 }
283 case ND_TIMER_REACHABLE:
284 xtick = nd->nd_reachable(ifp) * hz / 1000;
285 break;
286 case ND_TIMER_EXPIRE:
287 if (ln->ln_expire > time_uptime)
288 xtick = (ln->ln_expire - time_uptime) * hz;
289 else
290 xtick = nd_gctimer * hz;
291 break;
292 case ND_TIMER_DELAY:
293 xtick = nd->nd_delay * hz;
294 break;
295 case ND_TIMER_GC:
296 xtick = nd_gctimer * hz;
297 break;
298 default:
299 panic("%s: invalid timer type\n", __func__);
300 }
301
302 nd_set_timertick(ln, xtick);
303 }
304
305 int
306 nd_resolve(struct llentry *ln, const struct rtentry *rt, struct mbuf *m,
307 uint8_t *lldst, size_t dstsize)
308 {
309 struct ifnet *ifp;
310 struct nd_domain *nd;
311 int error;
312
313 LLE_WLOCK_ASSERT(ln);
314
315 ifp = ln->lle_tbl->llt_ifp;
316 nd = nd_find_domain(ln->lle_tbl->llt_af);
317
318 /* We don't have to do link-layer address resolution on a p2p link. */
319 if (ifp->if_flags & IFF_POINTOPOINT &&
320 ln->ln_state < ND_LLINFO_REACHABLE)
321 {
322 ln->ln_state = ND_LLINFO_STALE;
323 nd_set_timer(ln, ND_TIMER_GC);
324 }
325
326 /*
327 * The first time we send a packet to a neighbor whose entry is
328 * STALE, we have to change the state to DELAY and a sets a timer to
329 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
330 * neighbor unreachability detection on expiration.
331 * (RFC 2461 7.3.3)
332 */
333 if (ln->ln_state == ND_LLINFO_STALE) {
334 ln->ln_asked = 0;
335 ln->ln_state = ND_LLINFO_DELAY;
336 nd_set_timer(ln, ND_TIMER_DELAY);
337 }
338
339 /*
340 * If the neighbor cache entry has a state other than INCOMPLETE
341 * (i.e. its link-layer address is already resolved), just
342 * send the packet.
343 */
344 if (ln->ln_state > ND_LLINFO_INCOMPLETE) {
345 KASSERT((ln->la_flags & LLE_VALID) != 0);
346 memcpy(lldst, &ln->ll_addr, MIN(dstsize, ifp->if_addrlen));
347 LLE_WUNLOCK(ln);
348 return 0;
349 }
350
351 /*
352 * There is a neighbor cache entry, but no ethernet address
353 * response yet. Append this latest packet to the end of the
354 * packet queue in the mbuf, unless the number of the packet
355 * does not exceed maxqueuelen. When it exceeds maxqueuelen,
356 * the oldest packet in the queue will be removed.
357 */
358 if (ln->ln_state == ND_LLINFO_NOSTATE ||
359 ln->ln_state == ND_LLINFO_WAITDELETE)
360 ln->ln_state = ND_LLINFO_INCOMPLETE;
361
362 if (ln->ln_hold != NULL) {
363 struct mbuf *m_hold;
364 int i;
365
366 i = 0;
367 for (m_hold = ln->ln_hold; m_hold; m_hold = m_hold->m_nextpkt) {
368 i++;
369 if (m_hold->m_nextpkt == NULL) {
370 m_hold->m_nextpkt = m;
371 break;
372 }
373 }
374 while (i >= nd->nd_maxqueuelen) {
375 m_hold = ln->ln_hold;
376 ln->ln_hold = ln->ln_hold->m_nextpkt;
377 m_freem(m_hold);
378 i--;
379 }
380 } else
381 ln->ln_hold = m;
382
383 if (ln->ln_asked >= nd->nd_mmaxtries)
384 error = (rt != NULL && rt->rt_flags & RTF_GATEWAY) ?
385 EHOSTUNREACH : EHOSTDOWN;
386 else
387 error = EWOULDBLOCK;
388
389 /*
390 * If there has been no NS for the neighbor after entering the
391 * INCOMPLETE state, send the first solicitation.
392 */
393 if (!ND_IS_LLINFO_PERMANENT(ln) && ln->ln_asked == 0) {
394 struct psref psref;
395 union l3addr dst, src, *psrc;
396
397 ln->ln_asked++;
398 nd_set_timer(ln, ND_TIMER_RETRANS);
399 memcpy(&dst, &ln->r_l3addr, sizeof(dst));
400 psrc = nd->nd_holdsrc(ln, &src);
401 if_acquire(ifp, &psref);
402 LLE_WUNLOCK(ln);
403
404 nd->nd_output(ifp, NULL, &dst, NULL, psrc);
405 if_release(ifp, &psref);
406 } else
407 LLE_WUNLOCK(ln);
408
409 return error;
410 }
411
412 void
413 nd_nud_hint(struct llentry *ln)
414 {
415 struct nd_domain *nd;
416
417 if (ln == NULL)
418 return;
419
420 LLE_WLOCK_ASSERT(ln);
421
422 if (ln->ln_state < ND_LLINFO_REACHABLE)
423 goto done;
424
425 nd = nd_find_domain(ln->lle_tbl->llt_af);
426
427 /*
428 * if we get upper-layer reachability confirmation many times,
429 * it is possible we have false information.
430 */
431 ln->ln_byhint++;
432 if (ln->ln_byhint > nd->nd_maxnudhint)
433 goto done;
434
435 ln->ln_state = ND_LLINFO_REACHABLE;
436 if (!ND_IS_LLINFO_PERMANENT(ln))
437 nd_set_timer(ln, ND_TIMER_REACHABLE);
438
439 done:
440 LLE_WUNLOCK(ln);
441
442 return;
443 }
444
445 static struct nd_domain *
446 nd_find_domain(int af)
447 {
448
449 KASSERT(af < __arraycount(nd_domains) && nd_domains[af] != NULL);
450 return nd_domains[af];
451 }
452
453 void
454 nd_attach_domain(struct nd_domain *nd)
455 {
456
457 KASSERT(nd->nd_family < __arraycount(nd_domains));
458 nd_domains[nd->nd_family] = nd;
459 }
460