ip_icmp.c revision 1.164 1 /* $NetBSD: ip_icmp.c,v 1.164 2018/01/22 06:56:25 maxv Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*-
33 * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
34 * All rights reserved.
35 *
36 * This code is derived from software contributed to The NetBSD Foundation
37 * by Public Access Networks Corporation ("Panix"). It was developed under
38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39 *
40 * This code is derived from software contributed to The NetBSD Foundation
41 * by Jason R. Thorpe of Zembu Labs, Inc.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 * notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
53 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
54 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
55 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
56 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
57 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
58 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
59 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
60 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
61 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62 * POSSIBILITY OF SUCH DAMAGE.
63 */
64
65 /*
66 * Copyright (c) 1982, 1986, 1988, 1993
67 * The Regents of the University of California. All rights reserved.
68 *
69 * Redistribution and use in source and binary forms, with or without
70 * modification, are permitted provided that the following conditions
71 * are met:
72 * 1. Redistributions of source code must retain the above copyright
73 * notice, this list of conditions and the following disclaimer.
74 * 2. Redistributions in binary form must reproduce the above copyright
75 * notice, this list of conditions and the following disclaimer in the
76 * documentation and/or other materials provided with the distribution.
77 * 3. Neither the name of the University nor the names of its contributors
78 * may be used to endorse or promote products derived from this software
79 * without specific prior written permission.
80 *
81 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
82 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
83 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
84 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
85 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
86 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
87 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
88 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
89 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
90 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
91 * SUCH DAMAGE.
92 *
93 * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94
94 */
95
96 #include <sys/cdefs.h>
97 __KERNEL_RCSID(0, "$NetBSD: ip_icmp.c,v 1.164 2018/01/22 06:56:25 maxv Exp $");
98
99 #ifdef _KERNEL_OPT
100 #include "opt_ipsec.h"
101 #endif
102
103 #include <sys/param.h>
104 #include <sys/systm.h>
105 #include <sys/mbuf.h>
106 #include <sys/protosw.h>
107 #include <sys/socket.h>
108 #include <sys/socketvar.h> /* For softnet_lock */
109 #include <sys/kmem.h>
110 #include <sys/time.h>
111 #include <sys/kernel.h>
112 #include <sys/syslog.h>
113 #include <sys/sysctl.h>
114
115 #include <net/if.h>
116 #include <net/route.h>
117
118 #include <netinet/in.h>
119 #include <netinet/in_systm.h>
120 #include <netinet/in_var.h>
121 #include <netinet/ip.h>
122 #include <netinet/ip_icmp.h>
123 #include <netinet/ip_var.h>
124 #include <netinet/in_pcb.h>
125 #include <netinet/in_proto.h>
126 #include <netinet/icmp_var.h>
127 #include <netinet/icmp_private.h>
128 #include <netinet/wqinput.h>
129
130 #ifdef IPSEC
131 #include <netipsec/ipsec.h>
132 #include <netipsec/key.h>
133 #endif /* IPSEC*/
134
135 /*
136 * ICMP routines: error generation, receive packet processing, and
137 * routines to turnaround packets back to the originator, and
138 * host table maintenance routines.
139 */
140
141 int icmpmaskrepl = 0;
142 int icmpbmcastecho = 0;
143 #ifdef ICMPPRINTFS
144 int icmpprintfs = 0;
145 #endif
146 int icmpreturndatabytes = 8;
147
148 percpu_t *icmpstat_percpu;
149
150 /*
151 * List of callbacks to notify when Path MTU changes are made.
152 */
153 struct icmp_mtudisc_callback {
154 LIST_ENTRY(icmp_mtudisc_callback) mc_list;
155 void (*mc_func)(struct in_addr);
156 };
157
158 LIST_HEAD(, icmp_mtudisc_callback) icmp_mtudisc_callbacks =
159 LIST_HEAD_INITIALIZER(&icmp_mtudisc_callbacks);
160
161 #if 0
162 static u_int ip_next_mtu(u_int, int);
163 #else
164 /*static*/ u_int ip_next_mtu(u_int, int);
165 #endif
166
167 extern int icmperrppslim;
168 static int icmperrpps_count = 0;
169 static struct timeval icmperrppslim_last;
170 static int icmp_rediraccept = 1;
171 static int icmp_redirtimeout = 600;
172 static struct rttimer_queue *icmp_redirect_timeout_q = NULL;
173
174 /* Protect mtudisc and redirect stuffs */
175 static kmutex_t icmp_mtx __cacheline_aligned;
176
177 static void icmp_mtudisc_timeout(struct rtentry *, struct rttimer *);
178 static void icmp_redirect_timeout(struct rtentry *, struct rttimer *);
179
180 static void sysctl_netinet_icmp_setup(struct sysctllog **);
181
182 /* workqueue-based pr_input */
183 static struct wqinput *icmp_wqinput;
184 static void _icmp_input(struct mbuf *, int, int);
185
186 void
187 icmp_init(void)
188 {
189
190 sysctl_netinet_icmp_setup(NULL);
191
192 mutex_init(&icmp_mtx, MUTEX_DEFAULT, IPL_NONE);
193 /*
194 * This is only useful if the user initializes redirtimeout to
195 * something other than zero.
196 */
197 mutex_enter(&icmp_mtx);
198 icmp_redirect_timeout_q = rt_timer_queue_create(icmp_redirtimeout);
199 mutex_exit(&icmp_mtx);
200
201 icmpstat_percpu = percpu_alloc(sizeof(uint64_t) * ICMP_NSTATS);
202 icmp_wqinput = wqinput_create("icmp", _icmp_input);
203 }
204
205 void
206 icmp_mtudisc_lock(void)
207 {
208
209 mutex_enter(&icmp_mtx);
210 }
211
212 void
213 icmp_mtudisc_unlock(void)
214 {
215
216 mutex_exit(&icmp_mtx);
217 }
218
219 /*
220 * Register a Path MTU Discovery callback.
221 */
222 void
223 icmp_mtudisc_callback_register(void (*func)(struct in_addr))
224 {
225 struct icmp_mtudisc_callback *mc, *new;
226
227 new = kmem_alloc(sizeof(*mc), KM_SLEEP);
228
229 mutex_enter(&icmp_mtx);
230 for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
231 mc = LIST_NEXT(mc, mc_list)) {
232 if (mc->mc_func == func) {
233 mutex_exit(&icmp_mtx);
234 kmem_free(new, sizeof(*mc));
235 return;
236 }
237 }
238
239 new->mc_func = func;
240 LIST_INSERT_HEAD(&icmp_mtudisc_callbacks, new, mc_list);
241 mutex_exit(&icmp_mtx);
242 }
243
244 /*
245 * Generate an error packet of type error in response to a bad IP packet. 'n'
246 * contains this packet. We create 'm' and send it.
247 *
248 * As we are not required to return everything we have, we return whatever
249 * we can return at ease.
250 *
251 * Note that ICMP datagrams longer than 576 octets are out of spec according
252 * to RFC1812; the limit on icmpreturndatabytes will keep things below that
253 * limit.
254 */
255 void
256 icmp_error(struct mbuf *n, int type, int code, n_long dest, int destmtu)
257 {
258 struct ip *oip = mtod(n, struct ip *), *nip;
259 const unsigned oiphlen = oip->ip_hl << 2;
260 struct icmp *icp;
261 struct mbuf *m;
262 struct m_tag *mtag;
263 unsigned datalen, mblen;
264 int totlen;
265
266 #ifdef ICMPPRINTFS
267 if (icmpprintfs)
268 printf("icmp_error(%p, type:%d, code:%d)\n", oip, type, code);
269 #endif
270
271 if (type != ICMP_REDIRECT)
272 ICMP_STATINC(ICMP_STAT_ERROR);
273
274 /*
275 * Don't send error if:
276 * - The original packet was encrypted.
277 * - The packet is multicast or broadcast.
278 * - The packet is not the first fragment of the message.
279 * - The packet is an ICMP message with an unknown type.
280 */
281 if (n->m_flags & M_DECRYPTED)
282 goto freeit;
283 if (n->m_flags & (M_BCAST|M_MCAST))
284 goto freeit;
285 if (oip->ip_off &~ htons(IP_MF|IP_DF))
286 goto freeit;
287 if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
288 n->m_len >= oiphlen + ICMP_MINLEN) {
289 struct icmp *oicp = (struct icmp *)((char *)oip + oiphlen);
290 if (!ICMP_INFOTYPE(oicp->icmp_type)) {
291 ICMP_STATINC(ICMP_STAT_OLDICMP);
292 goto freeit;
293 }
294 }
295
296 /*
297 * First, do a rate limitation check.
298 */
299 if (icmp_ratelimit(&oip->ip_src, type, code)) {
300 /* XXX stat */
301 goto freeit;
302 }
303
304 /*
305 * Compute the number of bytes we will put in 'icmp_ip'. Truncate
306 * it to the size of the mbuf, if it's too big.
307 */
308 datalen = oiphlen + min(icmpreturndatabytes,
309 ntohs(oip->ip_len) - oiphlen);
310 mblen = 0;
311 for (m = n; m && (mblen < datalen); m = m->m_next)
312 mblen += m->m_len;
313 datalen = min(mblen, datalen);
314
315 /*
316 * Compute the total length of the new packet. Truncate it if it's
317 * bigger than the size of a cluster.
318 */
319 CTASSERT(ICMP_MINLEN + sizeof(struct ip) <= MCLBYTES);
320 totlen = sizeof(struct ip) + ICMP_MINLEN + datalen;
321 if (totlen > MCLBYTES) {
322 datalen = MCLBYTES - ICMP_MINLEN - sizeof(struct ip);
323 totlen = MCLBYTES;
324 }
325
326 /*
327 * Allocate the mbuf for the new packet.
328 */
329 m = m_gethdr(M_DONTWAIT, MT_HEADER);
330 if (m && (totlen > MHLEN)) {
331 MCLGET(m, M_DONTWAIT);
332 if ((m->m_flags & M_EXT) == 0) {
333 m_freem(m);
334 m = NULL;
335 }
336 }
337 if (m == NULL)
338 goto freeit;
339 MCLAIM(m, n->m_owner);
340 m->m_len = totlen;
341 m->m_pkthdr.len = m->m_len;
342 m_copy_rcvif(m, n);
343
344 if ((u_int)type > ICMP_MAXTYPE)
345 panic("icmp_error");
346 ICMP_STATINC(ICMP_STAT_OUTHIST + type);
347
348 if ((m->m_flags & M_EXT) == 0)
349 MH_ALIGN(m, m->m_len);
350
351 /*
352 * Get pointers on the IP header and the ICMP header.
353 */
354 nip = mtod(m, struct ip *);
355 icp = (struct icmp *)(nip + 1);
356
357 /*
358 * Fill in the fields of the ICMP header: icmp_type, icmp_code
359 * and icmp_ip. icmp_cksum gets filled later.
360 */
361 icp->icmp_type = type;
362 if (type == ICMP_REDIRECT) {
363 icp->icmp_gwaddr.s_addr = dest;
364 } else {
365 icp->icmp_void = 0;
366 /*
367 * The following assignments assume an overlay with the
368 * zeroed icmp_void field.
369 */
370 if (type == ICMP_PARAMPROB) {
371 icp->icmp_pptr = code;
372 code = 0;
373 } else if (type == ICMP_UNREACH &&
374 code == ICMP_UNREACH_NEEDFRAG && destmtu)
375 icp->icmp_nextmtu = htons(destmtu);
376 }
377 icp->icmp_code = code;
378 m_copydata(n, 0, datalen, (void *)&icp->icmp_ip);
379
380 /*
381 * Now, copy the old IP header (without options) in front of the
382 * ICMP message. The src/dst fields will be swapped in icmp_reflect.
383 */
384 /* ip_v set in ip_output */
385 nip->ip_hl = sizeof(struct ip) >> 2;
386 nip->ip_tos = 0;
387 nip->ip_len = htons(m->m_len);
388 /* ip_id set in ip_output */
389 nip->ip_off = htons(0);
390 /* ip_ttl set in icmp_reflect */
391 nip->ip_p = IPPROTO_ICMP;
392 nip->ip_src = oip->ip_src;
393 nip->ip_dst = oip->ip_dst;
394 /* move PF m_tag to new packet, if it exists */
395 mtag = m_tag_find(n, PACKET_TAG_PF, NULL);
396 if (mtag != NULL) {
397 m_tag_unlink(n, mtag);
398 m_tag_prepend(m, mtag);
399 }
400
401 icmp_reflect(m);
402
403 freeit:
404 m_freem(n);
405 }
406
407 struct sockaddr_in icmpsrc = {
408 .sin_len = sizeof (struct sockaddr_in),
409 .sin_family = AF_INET,
410 };
411 static struct sockaddr_in icmpdst = {
412 .sin_len = sizeof (struct sockaddr_in),
413 .sin_family = AF_INET,
414 };
415 static struct sockaddr_in icmpgw = {
416 .sin_len = sizeof (struct sockaddr_in),
417 .sin_family = AF_INET,
418 };
419 struct sockaddr_in icmpmask = {
420 .sin_len = 8,
421 .sin_family = 0,
422 };
423
424 /*
425 * Process a received ICMP message.
426 */
427 static void
428 _icmp_input(struct mbuf *m, int hlen, int proto)
429 {
430 struct icmp *icp;
431 struct ip *ip = mtod(m, struct ip *);
432 int icmplen;
433 int i;
434 struct in_ifaddr *ia;
435 void *(*ctlfunc)(int, const struct sockaddr *, void *);
436 int code;
437 struct rtentry *rt;
438
439 /*
440 * Locate icmp structure in mbuf, and check
441 * that not corrupted and of at least minimum length.
442 */
443 icmplen = ntohs(ip->ip_len) - hlen;
444 #ifdef ICMPPRINTFS
445 if (icmpprintfs) {
446 char sbuf[INET_ADDRSTRLEN], dbuf[INET_ADDRSTRLEN];
447 printf("icmp_input from `%s' to `%s', len %d\n",
448 IN_PRINT(sbuf, &ip->ip_src), IN_PRINT(dbuf, &ip->ip_dst),
449 icmplen);
450 }
451 #endif
452 if (icmplen < ICMP_MINLEN) {
453 ICMP_STATINC(ICMP_STAT_TOOSHORT);
454 goto freeit;
455 }
456 i = hlen + min(icmplen, ICMP_ADVLENMIN);
457 if ((m->m_len < i || M_READONLY(m)) && (m = m_pullup(m, i)) == NULL) {
458 ICMP_STATINC(ICMP_STAT_TOOSHORT);
459 return;
460 }
461 ip = mtod(m, struct ip *);
462 m->m_len -= hlen;
463 m->m_data += hlen;
464 icp = mtod(m, struct icmp *);
465 /* Don't need to assert alignment, here. */
466 if (in_cksum(m, icmplen)) {
467 ICMP_STATINC(ICMP_STAT_CHECKSUM);
468 goto freeit;
469 }
470 m->m_len += hlen;
471 m->m_data -= hlen;
472
473 #ifdef ICMPPRINTFS
474 /*
475 * Message type specific processing.
476 */
477 if (icmpprintfs)
478 printf("icmp_input(type:%d, code:%d)\n", icp->icmp_type,
479 icp->icmp_code);
480 #endif
481 if (icp->icmp_type > ICMP_MAXTYPE)
482 goto raw;
483 ICMP_STATINC(ICMP_STAT_INHIST + icp->icmp_type);
484 code = icp->icmp_code;
485 switch (icp->icmp_type) {
486
487 case ICMP_UNREACH:
488 switch (code) {
489 case ICMP_UNREACH_PROTOCOL:
490 code = PRC_UNREACH_PROTOCOL;
491 break;
492
493 case ICMP_UNREACH_PORT:
494 code = PRC_UNREACH_PORT;
495 break;
496
497 case ICMP_UNREACH_SRCFAIL:
498 code = PRC_UNREACH_SRCFAIL;
499 break;
500
501 case ICMP_UNREACH_NEEDFRAG:
502 code = PRC_MSGSIZE;
503 break;
504
505 case ICMP_UNREACH_NET:
506 case ICMP_UNREACH_NET_UNKNOWN:
507 case ICMP_UNREACH_NET_PROHIB:
508 case ICMP_UNREACH_TOSNET:
509 code = PRC_UNREACH_NET;
510 break;
511
512 case ICMP_UNREACH_HOST:
513 case ICMP_UNREACH_HOST_UNKNOWN:
514 case ICMP_UNREACH_ISOLATED:
515 case ICMP_UNREACH_HOST_PROHIB:
516 case ICMP_UNREACH_TOSHOST:
517 case ICMP_UNREACH_ADMIN_PROHIBIT:
518 case ICMP_UNREACH_HOST_PREC:
519 case ICMP_UNREACH_PREC_CUTOFF:
520 code = PRC_UNREACH_HOST;
521 break;
522
523 default:
524 goto badcode;
525 }
526 goto deliver;
527
528 case ICMP_TIMXCEED:
529 if (code > 1)
530 goto badcode;
531 code += PRC_TIMXCEED_INTRANS;
532 goto deliver;
533
534 case ICMP_PARAMPROB:
535 if (code > 1)
536 goto badcode;
537 code = PRC_PARAMPROB;
538 goto deliver;
539
540 case ICMP_SOURCEQUENCH:
541 if (code)
542 goto badcode;
543 code = PRC_QUENCH;
544 goto deliver;
545
546 deliver:
547 /*
548 * Problem with datagram; advise higher level routines.
549 */
550 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
551 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
552 ICMP_STATINC(ICMP_STAT_BADLEN);
553 goto freeit;
554 }
555 if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
556 goto badcode;
557 #ifdef ICMPPRINTFS
558 if (icmpprintfs)
559 printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
560 #endif
561 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
562 ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
563 if (ctlfunc)
564 (void) (*ctlfunc)(code, sintosa(&icmpsrc),
565 &icp->icmp_ip);
566 break;
567
568 badcode:
569 ICMP_STATINC(ICMP_STAT_BADCODE);
570 break;
571
572 case ICMP_ECHO:
573 if (!icmpbmcastecho &&
574 (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
575 ICMP_STATINC(ICMP_STAT_BMCASTECHO);
576 break;
577 }
578 icp->icmp_type = ICMP_ECHOREPLY;
579 goto reflect;
580
581 case ICMP_TSTAMP:
582 if (icmplen < ICMP_TSLEN) {
583 ICMP_STATINC(ICMP_STAT_BADLEN);
584 break;
585 }
586 if (!icmpbmcastecho &&
587 (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
588 ICMP_STATINC(ICMP_STAT_BMCASTTSTAMP);
589 break;
590 }
591 icp->icmp_type = ICMP_TSTAMPREPLY;
592 icp->icmp_rtime = iptime();
593 icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */
594 goto reflect;
595
596 case ICMP_MASKREQ: {
597 struct ifnet *rcvif;
598 int s, ss;
599 struct ifaddr *ifa = NULL;
600
601 if (icmpmaskrepl == 0)
602 break;
603 /*
604 * We are not able to respond with all ones broadcast
605 * unless we receive it over a point-to-point interface.
606 */
607 if (icmplen < ICMP_MASKLEN) {
608 ICMP_STATINC(ICMP_STAT_BADLEN);
609 break;
610 }
611 if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
612 in_nullhost(ip->ip_dst))
613 icmpdst.sin_addr = ip->ip_src;
614 else
615 icmpdst.sin_addr = ip->ip_dst;
616 ss = pserialize_read_enter();
617 rcvif = m_get_rcvif(m, &s);
618 if (__predict_true(rcvif != NULL))
619 ifa = ifaof_ifpforaddr(sintosa(&icmpdst), rcvif);
620 m_put_rcvif(rcvif, &s);
621 if (ifa == NULL) {
622 pserialize_read_exit(ss);
623 break;
624 }
625 ia = ifatoia(ifa);
626 icp->icmp_type = ICMP_MASKREPLY;
627 icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
628 if (in_nullhost(ip->ip_src)) {
629 if (ia->ia_ifp->if_flags & IFF_BROADCAST)
630 ip->ip_src = ia->ia_broadaddr.sin_addr;
631 else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
632 ip->ip_src = ia->ia_dstaddr.sin_addr;
633 }
634 pserialize_read_exit(ss);
635 reflect:
636 {
637 uint64_t *icps = percpu_getref(icmpstat_percpu);
638 icps[ICMP_STAT_REFLECT]++;
639 icps[ICMP_STAT_OUTHIST + icp->icmp_type]++;
640 percpu_putref(icmpstat_percpu);
641 }
642 icmp_reflect(m);
643 return;
644 }
645
646 case ICMP_REDIRECT:
647 if (code > 3)
648 goto badcode;
649 if (icmp_rediraccept == 0)
650 goto freeit;
651 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
652 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
653 ICMP_STATINC(ICMP_STAT_BADLEN);
654 break;
655 }
656 /*
657 * Short circuit routing redirects to force
658 * immediate change in the kernel's routing
659 * tables. The message is also handed to anyone
660 * listening on a raw socket (e.g. the routing
661 * daemon for use in updating its tables).
662 */
663 icmpgw.sin_addr = ip->ip_src;
664 icmpdst.sin_addr = icp->icmp_gwaddr;
665 #ifdef ICMPPRINTFS
666 if (icmpprintfs) {
667 char gbuf[INET_ADDRSTRLEN], dbuf[INET_ADDRSTRLEN];
668 printf("redirect dst `%s' to `%s'\n",
669 IN_PRINT(dbuf, &icp->icmp_ip.ip_dst),
670 IN_PRINT(gbuf, &icp->icmp_gwaddr));
671 }
672 #endif
673 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
674 rt = NULL;
675 rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
676 NULL, RTF_GATEWAY | RTF_HOST, sintosa(&icmpgw), &rt);
677 mutex_enter(&icmp_mtx);
678 if (rt != NULL && icmp_redirtimeout != 0) {
679 i = rt_timer_add(rt, icmp_redirect_timeout,
680 icmp_redirect_timeout_q);
681 if (i) {
682 char buf[INET_ADDRSTRLEN];
683 log(LOG_ERR, "ICMP: redirect failed to "
684 "register timeout for route to %s, "
685 "code %d\n",
686 IN_PRINT(buf, &icp->icmp_ip.ip_dst), i);
687 }
688 }
689 mutex_exit(&icmp_mtx);
690 if (rt != NULL)
691 rt_unref(rt);
692
693 pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
694 #if defined(IPSEC)
695 if (ipsec_used)
696 key_sa_routechange((struct sockaddr *)&icmpsrc);
697 #endif
698 break;
699
700 /*
701 * No kernel processing for the following;
702 * just fall through to send to raw listener.
703 */
704 case ICMP_ECHOREPLY:
705 case ICMP_ROUTERADVERT:
706 case ICMP_ROUTERSOLICIT:
707 case ICMP_TSTAMPREPLY:
708 case ICMP_IREQREPLY:
709 case ICMP_MASKREPLY:
710 default:
711 break;
712 }
713
714 raw:
715 rip_input(m, hlen, proto);
716 return;
717
718 freeit:
719 m_freem(m);
720 return;
721 }
722
723 void
724 icmp_input(struct mbuf *m, ...)
725 {
726 int hlen, proto;
727 va_list ap;
728
729 va_start(ap, m);
730 hlen = va_arg(ap, int);
731 proto = va_arg(ap, int);
732 va_end(ap);
733
734 wqinput_input(icmp_wqinput, m, hlen, proto);
735 }
736
737 /*
738 * Reflect the ip packet back to the source
739 */
740 void
741 icmp_reflect(struct mbuf *m)
742 {
743 struct ip *ip = mtod(m, struct ip *);
744 struct in_ifaddr *ia;
745 struct ifaddr *ifa;
746 struct sockaddr_in *sin;
747 struct in_addr t;
748 struct mbuf *opts = NULL;
749 int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
750 struct ifnet *rcvif;
751 struct psref psref, psref_ia;
752 int s;
753 int bound;
754
755 bound = curlwp_bind();
756
757 if (!in_canforward(ip->ip_src) &&
758 ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
759 htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
760 m_freem(m); /* Bad return address */
761 goto done; /* ip_output() will check for broadcast */
762 }
763 t = ip->ip_dst;
764 ip->ip_dst = ip->ip_src;
765 /*
766 * If the incoming packet was addressed directly to us, use
767 * dst as the src for the reply. Otherwise (broadcast or
768 * anonymous), use an address which corresponds to the
769 * incoming interface, with a preference for the address which
770 * corresponds to the route to the destination of the ICMP.
771 */
772
773 /* Look for packet addressed to us */
774 ia = in_get_ia_psref(t, &psref_ia);
775 if (ia && (ia->ia4_flags & IN_IFF_NOTREADY)) {
776 ia4_release(ia, &psref_ia);
777 ia = NULL;
778 }
779
780 rcvif = m_get_rcvif_psref(m, &psref);
781
782 /* look for packet sent to broadcast address */
783 if (ia == NULL && rcvif &&
784 (rcvif->if_flags & IFF_BROADCAST)) {
785 s = pserialize_read_enter();
786 IFADDR_READER_FOREACH(ifa, rcvif) {
787 if (ifa->ifa_addr->sa_family != AF_INET)
788 continue;
789 if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) {
790 ia = ifatoia(ifa);
791 if ((ia->ia4_flags & IN_IFF_NOTREADY) == 0)
792 break;
793 ia = NULL;
794 }
795 }
796 if (ia != NULL)
797 ia4_acquire(ia, &psref_ia);
798 pserialize_read_exit(s);
799 }
800
801 sin = ia ? &ia->ia_addr : NULL;
802
803 icmpdst.sin_addr = t;
804
805 /*
806 * if the packet is addressed somewhere else, compute the
807 * source address for packets routed back to the source, and
808 * use that, if it's an address on the interface which
809 * received the packet
810 */
811 if (sin == NULL && rcvif) {
812 struct sockaddr_in sin_dst;
813 struct route icmproute;
814 int errornum;
815
816 sockaddr_in_init(&sin_dst, &ip->ip_dst, 0);
817 memset(&icmproute, 0, sizeof(icmproute));
818 errornum = 0;
819 ia = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum,
820 &psref_ia);
821 /* errornum is never used */
822 rtcache_free(&icmproute);
823 /* check to make sure sin is a source address on rcvif */
824 if (ia != NULL) {
825 sin = &ia->ia_addr;
826 t = sin->sin_addr;
827 sin = NULL;
828 ia4_release(ia, &psref_ia);
829 ia = in_get_ia_on_iface_psref(t, rcvif, &psref_ia);
830 if (ia != NULL)
831 sin = &ia->ia_addr;
832 }
833 }
834
835 /*
836 * if it was not addressed to us, but the route doesn't go out
837 * the source interface, pick an address on the source
838 * interface. This can happen when routing is asymmetric, or
839 * when the incoming packet was encapsulated
840 */
841 if (sin == NULL && rcvif) {
842 KASSERT(ia == NULL);
843 s = pserialize_read_enter();
844 IFADDR_READER_FOREACH(ifa, rcvif) {
845 if (ifa->ifa_addr->sa_family != AF_INET)
846 continue;
847 sin = &(ifatoia(ifa)->ia_addr);
848 ia = ifatoia(ifa);
849 ia4_acquire(ia, &psref_ia);
850 break;
851 }
852 pserialize_read_exit(s);
853 }
854
855 m_put_rcvif_psref(rcvif, &psref);
856
857 /*
858 * The following happens if the packet was not addressed to us,
859 * and was received on an interface with no IP address:
860 * We find the first AF_INET address on the first non-loopback
861 * interface.
862 */
863 if (sin == NULL) {
864 KASSERT(ia == NULL);
865 s = pserialize_read_enter();
866 IN_ADDRLIST_READER_FOREACH(ia) {
867 if (ia->ia_ifp->if_flags & IFF_LOOPBACK)
868 continue;
869 sin = &ia->ia_addr;
870 ia4_acquire(ia, &psref_ia);
871 break;
872 }
873 pserialize_read_exit(s);
874 }
875
876 /*
877 * If we still didn't find an address, punt. We could have an
878 * interface up (and receiving packets) with no address.
879 */
880 if (sin == NULL) {
881 KASSERT(ia == NULL);
882 m_freem(m);
883 goto done;
884 }
885
886 ip->ip_src = sin->sin_addr;
887 ip->ip_ttl = MAXTTL;
888
889 if (ia != NULL)
890 ia4_release(ia, &psref_ia);
891
892 if (optlen > 0) {
893 u_char *cp;
894 int opt, cnt;
895 u_int len;
896
897 /*
898 * Retrieve any source routing from the incoming packet;
899 * add on any record-route or timestamp options.
900 */
901 cp = (u_char *) (ip + 1);
902 if ((opts = ip_srcroute(m)) == NULL &&
903 (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
904 MCLAIM(opts, m->m_owner);
905 opts->m_len = sizeof(struct in_addr);
906 *mtod(opts, struct in_addr *) = zeroin_addr;
907 }
908 if (opts) {
909 #ifdef ICMPPRINTFS
910 if (icmpprintfs)
911 printf("icmp_reflect optlen %d rt %d => ",
912 optlen, opts->m_len);
913 #endif
914 for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
915 opt = cp[IPOPT_OPTVAL];
916 if (opt == IPOPT_EOL)
917 break;
918 if (opt == IPOPT_NOP)
919 len = 1;
920 else {
921 if (cnt < IPOPT_OLEN + sizeof(*cp))
922 break;
923 len = cp[IPOPT_OLEN];
924 if (len < IPOPT_OLEN + sizeof(*cp) ||
925 len > cnt)
926 break;
927 }
928 /*
929 * Should check for overflow, but it "can't happen"
930 */
931 if (opt == IPOPT_RR || opt == IPOPT_TS ||
932 opt == IPOPT_SECURITY) {
933 memmove(mtod(opts, char *) + opts->m_len,
934 cp, len);
935 opts->m_len += len;
936 }
937 }
938 /* Terminate & pad, if necessary */
939 if ((cnt = opts->m_len % 4) != 0) {
940 for (; cnt < 4; cnt++) {
941 *(mtod(opts, char *) + opts->m_len) =
942 IPOPT_EOL;
943 opts->m_len++;
944 }
945 }
946 #ifdef ICMPPRINTFS
947 if (icmpprintfs)
948 printf("%d\n", opts->m_len);
949 #endif
950 }
951 /*
952 * Now strip out original options by copying rest of first
953 * mbuf's data back, and adjust the IP length.
954 */
955 ip->ip_len = htons(ntohs(ip->ip_len) - optlen);
956 ip->ip_hl = sizeof(struct ip) >> 2;
957 m->m_len -= optlen;
958 if (m->m_flags & M_PKTHDR)
959 m->m_pkthdr.len -= optlen;
960 optlen += sizeof(struct ip);
961 memmove(ip + 1, (char *)ip + optlen,
962 (unsigned)(m->m_len - sizeof(struct ip)));
963 }
964 m_tag_delete_nonpersistent(m);
965 m->m_flags &= ~(M_BCAST|M_MCAST);
966
967 /*
968 * Clear any in-bound checksum flags for this packet.
969 */
970 if (m->m_flags & M_PKTHDR)
971 m->m_pkthdr.csum_flags = 0;
972
973 icmp_send(m, opts);
974 done:
975 curlwp_bindx(bound);
976 if (opts)
977 (void)m_free(opts);
978 }
979
980 /*
981 * Send an icmp packet back to the ip level,
982 * after supplying a checksum.
983 */
984 void
985 icmp_send(struct mbuf *m, struct mbuf *opts)
986 {
987 struct ip *ip = mtod(m, struct ip *);
988 int hlen;
989 struct icmp *icp;
990
991 hlen = ip->ip_hl << 2;
992 m->m_data += hlen;
993 m->m_len -= hlen;
994 icp = mtod(m, struct icmp *);
995 icp->icmp_cksum = 0;
996 icp->icmp_cksum = in_cksum(m, ntohs(ip->ip_len) - hlen);
997 m->m_data -= hlen;
998 m->m_len += hlen;
999 #ifdef ICMPPRINTFS
1000 if (icmpprintfs) {
1001 char sbuf[INET_ADDRSTRLEN], dbuf[INET_ADDRSTRLEN];
1002 printf("icmp_send to destination `%s' from `%s'\n",
1003 IN_PRINT(dbuf, &ip->ip_dst), IN_PRINT(sbuf, &ip->ip_src));
1004 }
1005 #endif
1006 (void)ip_output(m, opts, NULL, 0, NULL, NULL);
1007 }
1008
1009 n_time
1010 iptime(void)
1011 {
1012 struct timeval atv;
1013 u_long t;
1014
1015 microtime(&atv);
1016 t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
1017 return (htonl(t));
1018 }
1019
1020 /*
1021 * sysctl helper routine for net.inet.icmp.returndatabytes. ensures
1022 * that the new value is in the correct range.
1023 */
1024 static int
1025 sysctl_net_inet_icmp_returndatabytes(SYSCTLFN_ARGS)
1026 {
1027 int error, t;
1028 struct sysctlnode node;
1029
1030 node = *rnode;
1031 node.sysctl_data = &t;
1032 t = icmpreturndatabytes;
1033 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1034 if (error || newp == NULL)
1035 return (error);
1036
1037 if (t < 8 || t > 512)
1038 return (EINVAL);
1039 icmpreturndatabytes = t;
1040
1041 return (0);
1042 }
1043
1044 /*
1045 * sysctl helper routine for net.inet.icmp.redirtimeout. ensures that
1046 * the given value is not less than zero and then resets the timeout
1047 * queue.
1048 */
1049 static int
1050 sysctl_net_inet_icmp_redirtimeout(SYSCTLFN_ARGS)
1051 {
1052 int error, tmp;
1053 struct sysctlnode node;
1054
1055 mutex_enter(&icmp_mtx);
1056
1057 node = *rnode;
1058 node.sysctl_data = &tmp;
1059 tmp = icmp_redirtimeout;
1060 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1061 if (error || newp == NULL)
1062 goto out;
1063 if (tmp < 0) {
1064 error = EINVAL;
1065 goto out;
1066 }
1067 icmp_redirtimeout = tmp;
1068
1069 /*
1070 * was it a *defined* side-effect that anyone even *reading*
1071 * this value causes these things to happen?
1072 */
1073 if (icmp_redirect_timeout_q != NULL) {
1074 if (icmp_redirtimeout == 0) {
1075 rt_timer_queue_destroy(icmp_redirect_timeout_q);
1076 icmp_redirect_timeout_q = NULL;
1077 } else {
1078 rt_timer_queue_change(icmp_redirect_timeout_q,
1079 icmp_redirtimeout);
1080 }
1081 } else if (icmp_redirtimeout > 0) {
1082 icmp_redirect_timeout_q =
1083 rt_timer_queue_create(icmp_redirtimeout);
1084 }
1085 error = 0;
1086 out:
1087 mutex_exit(&icmp_mtx);
1088 return error;
1089 }
1090
1091 static int
1092 sysctl_net_inet_icmp_stats(SYSCTLFN_ARGS)
1093 {
1094
1095 return (NETSTAT_SYSCTL(icmpstat_percpu, ICMP_NSTATS));
1096 }
1097
1098 static void
1099 sysctl_netinet_icmp_setup(struct sysctllog **clog)
1100 {
1101
1102 sysctl_createv(clog, 0, NULL, NULL,
1103 CTLFLAG_PERMANENT,
1104 CTLTYPE_NODE, "inet", NULL,
1105 NULL, 0, NULL, 0,
1106 CTL_NET, PF_INET, CTL_EOL);
1107 sysctl_createv(clog, 0, NULL, NULL,
1108 CTLFLAG_PERMANENT,
1109 CTLTYPE_NODE, "icmp",
1110 SYSCTL_DESCR("ICMPv4 related settings"),
1111 NULL, 0, NULL, 0,
1112 CTL_NET, PF_INET, IPPROTO_ICMP, CTL_EOL);
1113
1114 sysctl_createv(clog, 0, NULL, NULL,
1115 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1116 CTLTYPE_INT, "maskrepl",
1117 SYSCTL_DESCR("Respond to ICMP_MASKREQ messages"),
1118 NULL, 0, &icmpmaskrepl, 0,
1119 CTL_NET, PF_INET, IPPROTO_ICMP,
1120 ICMPCTL_MASKREPL, CTL_EOL);
1121 sysctl_createv(clog, 0, NULL, NULL,
1122 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1123 CTLTYPE_INT, "returndatabytes",
1124 SYSCTL_DESCR("Number of bytes to return in an ICMP "
1125 "error message"),
1126 sysctl_net_inet_icmp_returndatabytes, 0,
1127 &icmpreturndatabytes, 0,
1128 CTL_NET, PF_INET, IPPROTO_ICMP,
1129 ICMPCTL_RETURNDATABYTES, CTL_EOL);
1130 sysctl_createv(clog, 0, NULL, NULL,
1131 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1132 CTLTYPE_INT, "errppslimit",
1133 SYSCTL_DESCR("Maximum number of outgoing ICMP error "
1134 "messages per second"),
1135 NULL, 0, &icmperrppslim, 0,
1136 CTL_NET, PF_INET, IPPROTO_ICMP,
1137 ICMPCTL_ERRPPSLIMIT, CTL_EOL);
1138 sysctl_createv(clog, 0, NULL, NULL,
1139 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1140 CTLTYPE_INT, "rediraccept",
1141 SYSCTL_DESCR("Accept ICMP_REDIRECT messages"),
1142 NULL, 0, &icmp_rediraccept, 0,
1143 CTL_NET, PF_INET, IPPROTO_ICMP,
1144 ICMPCTL_REDIRACCEPT, CTL_EOL);
1145 sysctl_createv(clog, 0, NULL, NULL,
1146 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1147 CTLTYPE_INT, "redirtimeout",
1148 SYSCTL_DESCR("Lifetime of ICMP_REDIRECT generated "
1149 "routes"),
1150 sysctl_net_inet_icmp_redirtimeout, 0,
1151 &icmp_redirtimeout, 0,
1152 CTL_NET, PF_INET, IPPROTO_ICMP,
1153 ICMPCTL_REDIRTIMEOUT, CTL_EOL);
1154 sysctl_createv(clog, 0, NULL, NULL,
1155 CTLFLAG_PERMANENT,
1156 CTLTYPE_STRUCT, "stats",
1157 SYSCTL_DESCR("ICMP statistics"),
1158 sysctl_net_inet_icmp_stats, 0, NULL, 0,
1159 CTL_NET, PF_INET, IPPROTO_ICMP, ICMPCTL_STATS,
1160 CTL_EOL);
1161 sysctl_createv(clog, 0, NULL, NULL,
1162 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1163 CTLTYPE_INT, "bmcastecho",
1164 SYSCTL_DESCR("Respond to ICMP_ECHO or ICMP_TIMESTAMP "
1165 "message to the broadcast or multicast"),
1166 NULL, 0, &icmpbmcastecho, 0,
1167 CTL_NET, PF_INET, IPPROTO_ICMP, ICMPCTL_BMCASTECHO,
1168 CTL_EOL);
1169 }
1170
1171 void
1172 icmp_statinc(u_int stat)
1173 {
1174
1175 KASSERT(stat < ICMP_NSTATS);
1176 ICMP_STATINC(stat);
1177 }
1178
1179 /* Table of common MTUs: */
1180
1181 static const u_int mtu_table[] = {
1182 65535, 65280, 32000, 17914, 9180, 8166,
1183 4352, 2002, 1492, 1006, 508, 296, 68, 0
1184 };
1185
1186 void
1187 icmp_mtudisc(struct icmp *icp, struct in_addr faddr)
1188 {
1189 struct icmp_mtudisc_callback *mc;
1190 struct sockaddr *dst = sintosa(&icmpsrc);
1191 struct rtentry *rt;
1192 u_long mtu = ntohs(icp->icmp_nextmtu); /* Why a long? IPv6 */
1193 int error;
1194
1195 rt = rtalloc1(dst, 1);
1196 if (rt == NULL)
1197 return;
1198
1199 /* If we didn't get a host route, allocate one */
1200
1201 if ((rt->rt_flags & RTF_HOST) == 0) {
1202 struct rtentry *nrt;
1203
1204 error = rtrequest(RTM_ADD, dst, rt->rt_gateway, NULL,
1205 RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
1206 if (error) {
1207 rt_unref(rt);
1208 return;
1209 }
1210 nrt->rt_rmx = rt->rt_rmx;
1211 rt_unref(rt);
1212 rt = nrt;
1213 }
1214
1215 mutex_enter(&icmp_mtx);
1216 error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
1217 mutex_exit(&icmp_mtx);
1218 if (error) {
1219 rt_unref(rt);
1220 return;
1221 }
1222
1223 if (mtu == 0) {
1224 int i = 0;
1225
1226 mtu = ntohs(icp->icmp_ip.ip_len);
1227 /* Some 4.2BSD-based routers incorrectly adjust the ip_len */
1228 if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
1229 mtu -= (icp->icmp_ip.ip_hl << 2);
1230
1231 /* If we still can't guess a value, try the route */
1232
1233 if (mtu == 0) {
1234 mtu = rt->rt_rmx.rmx_mtu;
1235
1236 /* If no route mtu, default to the interface mtu */
1237
1238 if (mtu == 0)
1239 mtu = rt->rt_ifp->if_mtu;
1240 }
1241
1242 for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
1243 if (mtu > mtu_table[i]) {
1244 mtu = mtu_table[i];
1245 break;
1246 }
1247 }
1248
1249 /*
1250 * XXX: RTV_MTU is overloaded, since the admin can set it
1251 * to turn off PMTU for a route, and the kernel can
1252 * set it to indicate a serious problem with PMTU
1253 * on a route. We should be using a separate flag
1254 * for the kernel to indicate this.
1255 */
1256
1257 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
1258 if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
1259 rt->rt_rmx.rmx_locks |= RTV_MTU;
1260 else if (rt->rt_rmx.rmx_mtu > mtu ||
1261 rt->rt_rmx.rmx_mtu == 0) {
1262 ICMP_STATINC(ICMP_STAT_PMTUCHG);
1263 rt->rt_rmx.rmx_mtu = mtu;
1264 }
1265 }
1266
1267 if (rt != NULL)
1268 rt_unref(rt);
1269
1270 /*
1271 * Notify protocols that the MTU for this destination
1272 * has changed.
1273 */
1274 mutex_enter(&icmp_mtx);
1275 for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
1276 mc = LIST_NEXT(mc, mc_list))
1277 (*mc->mc_func)(faddr);
1278 mutex_exit(&icmp_mtx);
1279 }
1280
1281 /*
1282 * Return the next larger or smaller MTU plateau (table from RFC 1191)
1283 * given current value MTU. If DIR is less than zero, a larger plateau
1284 * is returned; otherwise, a smaller value is returned.
1285 */
1286 u_int
1287 ip_next_mtu(u_int mtu, int dir) /* XXX */
1288 {
1289 int i;
1290
1291 for (i = 0; i < (sizeof mtu_table) / (sizeof mtu_table[0]); i++) {
1292 if (mtu >= mtu_table[i])
1293 break;
1294 }
1295
1296 if (dir < 0) {
1297 if (i == 0) {
1298 return 0;
1299 } else {
1300 return mtu_table[i - 1];
1301 }
1302 } else {
1303 if (mtu_table[i] == 0) {
1304 return 0;
1305 } else if (mtu > mtu_table[i]) {
1306 return mtu_table[i];
1307 } else {
1308 return mtu_table[i + 1];
1309 }
1310 }
1311 }
1312
1313 static void
1314 icmp_mtudisc_timeout(struct rtentry *rt, struct rttimer *r)
1315 {
1316
1317 KASSERT(rt != NULL);
1318 rt_assert_referenced(rt);
1319
1320 if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
1321 (RTF_DYNAMIC | RTF_HOST)) {
1322 rtrequest(RTM_DELETE, rt_getkey(rt),
1323 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
1324 } else {
1325 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
1326 rt->rt_rmx.rmx_mtu = 0;
1327 }
1328 }
1329 }
1330
1331 static void
1332 icmp_redirect_timeout(struct rtentry *rt, struct rttimer *r)
1333 {
1334
1335 KASSERT(rt != NULL);
1336 rt_assert_referenced(rt);
1337
1338 if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
1339 (RTF_DYNAMIC | RTF_HOST)) {
1340 rtrequest(RTM_DELETE, rt_getkey(rt),
1341 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
1342 }
1343 }
1344
1345 /*
1346 * Perform rate limit check.
1347 * Returns 0 if it is okay to send the icmp packet.
1348 * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
1349 * limitation.
1350 *
1351 * XXX per-destination/type check necessary?
1352 */
1353 int
1354 icmp_ratelimit(const struct in_addr *dst, const int type,
1355 const int code)
1356 {
1357
1358 /* PPS limit */
1359 if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
1360 icmperrppslim)) {
1361 /* The packet is subject to rate limit */
1362 return 1;
1363 }
1364
1365 /* okay to send */
1366 return 0;
1367 }
1368