ip_icmp.c revision 1.62 1 /* $NetBSD: ip_icmp.c,v 1.62 2001/10/29 07:02:33 simonb Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*-
33 * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
34 * All rights reserved.
35 *
36 * This code is derived from software contributed to The NetBSD Foundation
37 * by Public Access Networks Corporation ("Panix"). It was developed under
38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39 *
40 * This code is derived from software contributed to The NetBSD Foundation
41 * by Jason R. Thorpe of Zembu Labs, Inc.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 * notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 * must display the following acknowledgement:
53 * This product includes software developed by the NetBSD
54 * Foundation, Inc. and its contributors.
55 * 4. Neither the name of The NetBSD Foundation nor the names of its
56 * contributors may be used to endorse or promote products derived
57 * from this software without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
60 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
61 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
62 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
63 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
64 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
65 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
66 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
67 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
68 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
69 * POSSIBILITY OF SUCH DAMAGE.
70 */
71
72 /*
73 * Copyright (c) 1982, 1986, 1988, 1993
74 * The Regents of the University of California. All rights reserved.
75 *
76 * Redistribution and use in source and binary forms, with or without
77 * modification, are permitted provided that the following conditions
78 * are met:
79 * 1. Redistributions of source code must retain the above copyright
80 * notice, this list of conditions and the following disclaimer.
81 * 2. Redistributions in binary form must reproduce the above copyright
82 * notice, this list of conditions and the following disclaimer in the
83 * documentation and/or other materials provided with the distribution.
84 * 3. All advertising materials mentioning features or use of this software
85 * must display the following acknowledgement:
86 * This product includes software developed by the University of
87 * California, Berkeley and its contributors.
88 * 4. Neither the name of the University nor the names of its contributors
89 * may be used to endorse or promote products derived from this software
90 * without specific prior written permission.
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
93 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
95 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
96 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
97 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
98 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
99 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
100 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
101 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
102 * SUCH DAMAGE.
103 *
104 * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94
105 */
106
107 #include "opt_ipsec.h"
108
109 #include <sys/param.h>
110 #include <sys/systm.h>
111 #include <sys/malloc.h>
112 #include <sys/mbuf.h>
113 #include <sys/protosw.h>
114 #include <sys/socket.h>
115 #include <sys/time.h>
116 #include <sys/kernel.h>
117 #include <sys/sysctl.h>
118
119 #include <net/if.h>
120 #include <net/route.h>
121
122 #include <netinet/in.h>
123 #include <netinet/in_systm.h>
124 #include <netinet/in_var.h>
125 #include <netinet/ip.h>
126 #include <netinet/ip_icmp.h>
127 #include <netinet/ip_var.h>
128 #include <netinet/in_pcb.h>
129 #include <netinet/icmp_var.h>
130
131 #ifdef IPSEC
132 #include <netinet6/ipsec.h>
133 #include <netkey/key.h>
134 #endif
135
136 #include <machine/stdarg.h>
137
138 /*
139 * ICMP routines: error generation, receive packet processing, and
140 * routines to turnaround packets back to the originator, and
141 * host table maintenance routines.
142 */
143
144 int icmpmaskrepl = 0;
145 #ifdef ICMPPRINTFS
146 int icmpprintfs = 0;
147 #endif
148 int icmpreturndatabytes = 8;
149
150 /*
151 * List of callbacks to notify when Path MTU changes are made.
152 */
153 struct icmp_mtudisc_callback {
154 LIST_ENTRY(icmp_mtudisc_callback) mc_list;
155 void (*mc_func) __P((struct in_addr));
156 };
157
158 LIST_HEAD(, icmp_mtudisc_callback) icmp_mtudisc_callbacks =
159 LIST_HEAD_INITIALIZER(&icmp_mtudisc_callbacks);
160
161 #if 0
162 static int ip_next_mtu __P((int, int));
163 #else
164 /*static*/ int ip_next_mtu __P((int, int));
165 #endif
166
167 extern int icmperrppslim;
168 static int icmperrpps_count = 0;
169 static struct timeval icmperrppslim_last;
170
171 static void icmp_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
172
173 static int icmp_ratelimit __P((const struct in_addr *, const int, const int));
174
175 /*
176 * Register a Path MTU Discovery callback.
177 */
178 void
179 icmp_mtudisc_callback_register(func)
180 void (*func) __P((struct in_addr));
181 {
182 struct icmp_mtudisc_callback *mc;
183
184 for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
185 mc = LIST_NEXT(mc, mc_list)) {
186 if (mc->mc_func == func)
187 return;
188 }
189
190 mc = malloc(sizeof(*mc), M_PCB, M_NOWAIT);
191 if (mc == NULL)
192 panic("icmp_mtudisc_callback_register");
193
194 mc->mc_func = func;
195 LIST_INSERT_HEAD(&icmp_mtudisc_callbacks, mc, mc_list);
196 }
197
198 /*
199 * Generate an error packet of type error
200 * in response to bad packet ip.
201 */
202 void
203 icmp_error(n, type, code, dest, destifp)
204 struct mbuf *n;
205 int type, code;
206 n_long dest;
207 struct ifnet *destifp;
208 {
209 struct ip *oip = mtod(n, struct ip *), *nip;
210 unsigned oiplen = oip->ip_hl << 2;
211 struct icmp *icp;
212 struct mbuf *m;
213 unsigned icmplen, mblen;
214
215 #ifdef ICMPPRINTFS
216 if (icmpprintfs)
217 printf("icmp_error(%x, %d, %d)\n", oip, type, code);
218 #endif
219 if (type != ICMP_REDIRECT)
220 icmpstat.icps_error++;
221 /*
222 * Don't send error if the original packet was encrypted.
223 * Don't send error if not the first fragment of message.
224 * Don't error if the old packet protocol was ICMP
225 * error message, only known informational types.
226 */
227 if (n->m_flags & M_DECRYPTED)
228 goto freeit;
229 if (oip->ip_off &~ (IP_MF|IP_DF))
230 goto freeit;
231 if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
232 n->m_len >= oiplen + ICMP_MINLEN &&
233 !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
234 icmpstat.icps_oldicmp++;
235 goto freeit;
236 }
237 /* Don't send error in response to a multicast or broadcast packet */
238 if (n->m_flags & (M_BCAST|M_MCAST))
239 goto freeit;
240
241 /*
242 * First, do a rate limitation check.
243 */
244 if (icmp_ratelimit(&oip->ip_src, type, code)) {
245 /* XXX stat */
246 goto freeit;
247 }
248
249 /*
250 * Now, formulate icmp message
251 */
252 icmplen = oiplen + min(icmpreturndatabytes, oip->ip_len - oiplen);
253 /*
254 * Defend against mbuf chains shorter than oip->ip_len:
255 */
256 mblen = 0;
257 for (m = n; m && (mblen < icmplen); m = m->m_next)
258 mblen += m->m_len;
259 icmplen = min(mblen, icmplen);
260
261 /*
262 * As we are not required to return everything we have,
263 * we return whatever we can return at ease.
264 *
265 * Note that ICMP datagrams longer than 576 octets are out of spec
266 * according to RFC1812; the limit on icmpreturndatabytes below in
267 * icmp_sysctl will keep things below that limit.
268 */
269
270 KASSERT(ICMP_MINLEN <= MCLBYTES);
271
272 if (icmplen + ICMP_MINLEN > MCLBYTES)
273 icmplen = MCLBYTES - ICMP_MINLEN;
274
275 m = m_gethdr(M_DONTWAIT, MT_HEADER);
276 if (m && (icmplen + ICMP_MINLEN > MHLEN)) {
277 MCLGET(m, M_DONTWAIT);
278 if ((m->m_flags & M_EXT) == 0) {
279 m_freem(m);
280 m = NULL;
281 }
282 }
283 if (m == NULL)
284 goto freeit;
285 m->m_len = icmplen + ICMP_MINLEN;
286 if ((m->m_flags & M_EXT) == 0)
287 MH_ALIGN(m, m->m_len);
288 icp = mtod(m, struct icmp *);
289 if ((u_int)type > ICMP_MAXTYPE)
290 panic("icmp_error");
291 icmpstat.icps_outhist[type]++;
292 icp->icmp_type = type;
293 if (type == ICMP_REDIRECT)
294 icp->icmp_gwaddr.s_addr = dest;
295 else {
296 icp->icmp_void = 0;
297 /*
298 * The following assignments assume an overlay with the
299 * zeroed icmp_void field.
300 */
301 if (type == ICMP_PARAMPROB) {
302 icp->icmp_pptr = code;
303 code = 0;
304 } else if (type == ICMP_UNREACH &&
305 code == ICMP_UNREACH_NEEDFRAG && destifp)
306 icp->icmp_nextmtu = htons(destifp->if_mtu);
307 }
308
309 HTONS(oip->ip_off);
310 HTONS(oip->ip_len);
311 icp->icmp_code = code;
312 m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
313 nip = &icp->icmp_ip;
314
315 /*
316 * Now, copy old ip header (without options)
317 * in front of icmp message.
318 */
319 if (m->m_data - sizeof(struct ip) < m->m_pktdat)
320 panic("icmp len");
321 m->m_data -= sizeof(struct ip);
322 m->m_len += sizeof(struct ip);
323 m->m_pkthdr.len = m->m_len;
324 m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
325 nip = mtod(m, struct ip *);
326 /* ip_v set in ip_output */
327 nip->ip_hl = sizeof(struct ip) >> 2;
328 nip->ip_tos = 0;
329 nip->ip_len = m->m_len;
330 /* ip_id set in ip_output */
331 nip->ip_off = 0;
332 /* ip_ttl set in icmp_reflect */
333 nip->ip_p = IPPROTO_ICMP;
334 nip->ip_src = oip->ip_src;
335 nip->ip_dst = oip->ip_dst;
336 icmp_reflect(m);
337
338 freeit:
339 m_freem(n);
340 }
341
342 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
343 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
344 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
345 struct sockaddr_in icmpmask = { 8, 0 };
346
347 /*
348 * Process a received ICMP message.
349 */
350 void
351 #if __STDC__
352 icmp_input(struct mbuf *m, ...)
353 #else
354 icmp_input(m, va_alist)
355 struct mbuf *m;
356 va_dcl
357 #endif
358 {
359 int proto;
360 struct icmp *icp;
361 struct ip *ip = mtod(m, struct ip *);
362 int icmplen;
363 int i;
364 struct in_ifaddr *ia;
365 void *(*ctlfunc) __P((int, struct sockaddr *, void *));
366 int code;
367 int hlen;
368 va_list ap;
369
370 va_start(ap, m);
371 hlen = va_arg(ap, int);
372 proto = va_arg(ap, int);
373 va_end(ap);
374
375 /*
376 * Locate icmp structure in mbuf, and check
377 * that not corrupted and of at least minimum length.
378 */
379 icmplen = ip->ip_len - hlen;
380 #ifdef ICMPPRINTFS
381 if (icmpprintfs)
382 printf("icmp_input from %x to %x, len %d\n",
383 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr),
384 icmplen);
385 #endif
386 if (icmplen < ICMP_MINLEN) {
387 icmpstat.icps_tooshort++;
388 goto freeit;
389 }
390 i = hlen + min(icmplen, ICMP_ADVLENMIN);
391 if (m->m_len < i && (m = m_pullup(m, i)) == 0) {
392 icmpstat.icps_tooshort++;
393 return;
394 }
395 ip = mtod(m, struct ip *);
396 m->m_len -= hlen;
397 m->m_data += hlen;
398 icp = mtod(m, struct icmp *);
399 if (in_cksum(m, icmplen)) {
400 icmpstat.icps_checksum++;
401 goto freeit;
402 }
403 m->m_len += hlen;
404 m->m_data -= hlen;
405
406 #ifdef ICMPPRINTFS
407 /*
408 * Message type specific processing.
409 */
410 if (icmpprintfs)
411 printf("icmp_input, type %d code %d\n", icp->icmp_type,
412 icp->icmp_code);
413 #endif
414 if (icp->icmp_type > ICMP_MAXTYPE)
415 goto raw;
416 icmpstat.icps_inhist[icp->icmp_type]++;
417 code = icp->icmp_code;
418 switch (icp->icmp_type) {
419
420 case ICMP_UNREACH:
421 switch (code) {
422 case ICMP_UNREACH_NET:
423 case ICMP_UNREACH_HOST:
424 case ICMP_UNREACH_PROTOCOL:
425 case ICMP_UNREACH_PORT:
426 case ICMP_UNREACH_SRCFAIL:
427 code += PRC_UNREACH_NET;
428 break;
429
430 case ICMP_UNREACH_NEEDFRAG:
431 code = PRC_MSGSIZE;
432 break;
433
434 case ICMP_UNREACH_NET_UNKNOWN:
435 case ICMP_UNREACH_NET_PROHIB:
436 case ICMP_UNREACH_TOSNET:
437 code = PRC_UNREACH_NET;
438 break;
439
440 case ICMP_UNREACH_HOST_UNKNOWN:
441 case ICMP_UNREACH_ISOLATED:
442 case ICMP_UNREACH_HOST_PROHIB:
443 case ICMP_UNREACH_TOSHOST:
444 code = PRC_UNREACH_HOST;
445 break;
446
447 default:
448 goto badcode;
449 }
450 goto deliver;
451
452 case ICMP_TIMXCEED:
453 if (code > 1)
454 goto badcode;
455 code += PRC_TIMXCEED_INTRANS;
456 goto deliver;
457
458 case ICMP_PARAMPROB:
459 if (code > 1)
460 goto badcode;
461 code = PRC_PARAMPROB;
462 goto deliver;
463
464 case ICMP_SOURCEQUENCH:
465 if (code)
466 goto badcode;
467 code = PRC_QUENCH;
468 goto deliver;
469
470 deliver:
471 /*
472 * Problem with datagram; advise higher level routines.
473 */
474 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
475 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
476 icmpstat.icps_badlen++;
477 goto freeit;
478 }
479 if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
480 goto badcode;
481 NTOHS(icp->icmp_ip.ip_len);
482 #ifdef ICMPPRINTFS
483 if (icmpprintfs)
484 printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
485 #endif
486 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
487 ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
488 if (ctlfunc)
489 (void) (*ctlfunc)(code, sintosa(&icmpsrc),
490 &icp->icmp_ip);
491 break;
492
493 badcode:
494 icmpstat.icps_badcode++;
495 break;
496
497 case ICMP_ECHO:
498 icp->icmp_type = ICMP_ECHOREPLY;
499 goto reflect;
500
501 case ICMP_TSTAMP:
502 if (icmplen < ICMP_TSLEN) {
503 icmpstat.icps_badlen++;
504 break;
505 }
506 icp->icmp_type = ICMP_TSTAMPREPLY;
507 icp->icmp_rtime = iptime();
508 icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */
509 goto reflect;
510
511 case ICMP_MASKREQ:
512 if (icmpmaskrepl == 0)
513 break;
514 /*
515 * We are not able to respond with all ones broadcast
516 * unless we receive it over a point-to-point interface.
517 */
518 if (icmplen < ICMP_MASKLEN) {
519 icmpstat.icps_badlen++;
520 break;
521 }
522 if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
523 in_nullhost(ip->ip_dst))
524 icmpdst.sin_addr = ip->ip_src;
525 else
526 icmpdst.sin_addr = ip->ip_dst;
527 ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
528 m->m_pkthdr.rcvif));
529 if (ia == 0)
530 break;
531 icp->icmp_type = ICMP_MASKREPLY;
532 icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
533 if (in_nullhost(ip->ip_src)) {
534 if (ia->ia_ifp->if_flags & IFF_BROADCAST)
535 ip->ip_src = ia->ia_broadaddr.sin_addr;
536 else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
537 ip->ip_src = ia->ia_dstaddr.sin_addr;
538 }
539 reflect:
540 icmpstat.icps_reflect++;
541 icmpstat.icps_outhist[icp->icmp_type]++;
542 icmp_reflect(m);
543 return;
544
545 case ICMP_REDIRECT:
546 if (code > 3)
547 goto badcode;
548 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
549 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
550 icmpstat.icps_badlen++;
551 break;
552 }
553 /*
554 * Short circuit routing redirects to force
555 * immediate change in the kernel's routing
556 * tables. The message is also handed to anyone
557 * listening on a raw socket (e.g. the routing
558 * daemon for use in updating its tables).
559 */
560 icmpgw.sin_addr = ip->ip_src;
561 icmpdst.sin_addr = icp->icmp_gwaddr;
562 #ifdef ICMPPRINTFS
563 if (icmpprintfs)
564 printf("redirect dst %x to %x\n", icp->icmp_ip.ip_dst,
565 icp->icmp_gwaddr);
566 #endif
567 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
568 rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
569 (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
570 sintosa(&icmpgw), (struct rtentry **)0);
571 pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
572 #ifdef IPSEC
573 key_sa_routechange((struct sockaddr *)&icmpsrc);
574 #endif
575 break;
576
577 /*
578 * No kernel processing for the following;
579 * just fall through to send to raw listener.
580 */
581 case ICMP_ECHOREPLY:
582 case ICMP_ROUTERADVERT:
583 case ICMP_ROUTERSOLICIT:
584 case ICMP_TSTAMPREPLY:
585 case ICMP_IREQREPLY:
586 case ICMP_MASKREPLY:
587 default:
588 break;
589 }
590
591 raw:
592 rip_input(m, hlen, proto);
593 return;
594
595 freeit:
596 m_freem(m);
597 return;
598 }
599
600 /*
601 * Reflect the ip packet back to the source
602 */
603 void
604 icmp_reflect(m)
605 struct mbuf *m;
606 {
607 struct ip *ip = mtod(m, struct ip *);
608 struct in_ifaddr *ia;
609 struct ifaddr *ifa;
610 struct sockaddr_in *sin = 0;
611 struct in_addr t;
612 struct mbuf *opts = 0;
613 int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
614
615 if (!in_canforward(ip->ip_src) &&
616 ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
617 htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
618 m_freem(m); /* Bad return address */
619 goto done; /* ip_output() will check for broadcast */
620 }
621 t = ip->ip_dst;
622 ip->ip_dst = ip->ip_src;
623 /*
624 * If the incoming packet was addressed directly to us, use
625 * dst as the src for the reply. Otherwise (broadcast or
626 * anonymous), use an address which corresponds to the
627 * incoming interface, with a preference for the address which
628 * corresponds to the route to the destination of the ICMP.
629 */
630
631 /* Look for packet addressed to us */
632 INADDR_TO_IA(t, ia);
633
634 /* look for packet sent to broadcast address */
635 if (ia == NULL && (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST)) {
636 for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
637 ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
638 if (ifa->ifa_addr->sa_family != AF_INET)
639 continue;
640 if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) {
641 ia = ifatoia(ifa);
642 break;
643 }
644 }
645 }
646
647 if (ia)
648 sin = &ia->ia_addr;
649
650 icmpdst.sin_addr = t;
651
652 /* if the packet is addressed somewhere else, compute the
653 source address for packets routed back to the source, and
654 use that, if it's an address on the interface which
655 received the packet */
656 if (sin == (struct sockaddr_in *)0) {
657 struct sockaddr_in sin_dst;
658 struct route icmproute;
659 int errornum;
660
661 sin_dst.sin_family = AF_INET;
662 sin_dst.sin_len = sizeof(struct sockaddr_in);
663 sin_dst.sin_addr = ip->ip_dst;
664 bzero(&icmproute, sizeof(icmproute));
665 errornum = 0;
666 sin = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum);
667 /* errornum is never used */
668 if (icmproute.ro_rt)
669 RTFREE(icmproute.ro_rt);
670 /* check to make sure sin is a source address on rcvif */
671 if (sin) {
672 t = sin->sin_addr;
673 sin = (struct sockaddr_in *)0;
674 INADDR_TO_IA(t, ia);
675 while (ia) {
676 if (ia->ia_ifp == m->m_pkthdr.rcvif) {
677 sin = &ia->ia_addr;
678 break;
679 }
680 NEXT_IA_WITH_SAME_ADDR(ia);
681 }
682 }
683 }
684
685 /* if it was not addressed to us, but the route doesn't go out
686 the source interface, pick an address on the source
687 interface. This can happen when routing is asymmetric, or
688 when the incoming packet was encapsulated */
689 if (sin == (struct sockaddr_in *)0) {
690 for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
691 ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
692 if (ifa->ifa_addr->sa_family != AF_INET)
693 continue;
694 sin = &(ifatoia(ifa)->ia_addr);
695 break;
696 }
697 }
698
699 /*
700 * The following happens if the packet was not addressed to us,
701 * and was received on an interface with no IP address:
702 * We find the first AF_INET address on the first non-loopback
703 * interface.
704 */
705 if (sin == (struct sockaddr_in *)0)
706 for (ia = in_ifaddr.tqh_first; ia != NULL;
707 ia = ia->ia_list.tqe_next) {
708 if (ia->ia_ifp->if_flags & IFF_LOOPBACK)
709 continue;
710 sin = &ia->ia_addr;
711 break;
712 }
713
714 /*
715 * If we still didn't find an address, punt. We could have an
716 * interface up (and receiving packets) with no address.
717 */
718 if (sin == (struct sockaddr_in *)0) {
719 m_freem(m);
720 goto done;
721 }
722
723 ip->ip_src = sin->sin_addr;
724 ip->ip_ttl = MAXTTL;
725
726 if (optlen > 0) {
727 u_char *cp;
728 int opt, cnt;
729 u_int len;
730
731 /*
732 * Retrieve any source routing from the incoming packet;
733 * add on any record-route or timestamp options.
734 */
735 cp = (u_char *) (ip + 1);
736 if ((opts = ip_srcroute()) == 0 &&
737 (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
738 opts->m_len = sizeof(struct in_addr);
739 *mtod(opts, struct in_addr *) = zeroin_addr;
740 }
741 if (opts) {
742 #ifdef ICMPPRINTFS
743 if (icmpprintfs)
744 printf("icmp_reflect optlen %d rt %d => ",
745 optlen, opts->m_len);
746 #endif
747 for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
748 opt = cp[IPOPT_OPTVAL];
749 if (opt == IPOPT_EOL)
750 break;
751 if (opt == IPOPT_NOP)
752 len = 1;
753 else {
754 if (cnt < IPOPT_OLEN + sizeof(*cp))
755 break;
756 len = cp[IPOPT_OLEN];
757 if (len < IPOPT_OLEN + sizeof(*cp) ||
758 len > cnt)
759 break;
760 }
761 /*
762 * Should check for overflow, but it "can't happen"
763 */
764 if (opt == IPOPT_RR || opt == IPOPT_TS ||
765 opt == IPOPT_SECURITY) {
766 bcopy((caddr_t)cp,
767 mtod(opts, caddr_t) + opts->m_len, len);
768 opts->m_len += len;
769 }
770 }
771 /* Terminate & pad, if necessary */
772 if ((cnt = opts->m_len % 4) != 0) {
773 for (; cnt < 4; cnt++) {
774 *(mtod(opts, caddr_t) + opts->m_len) =
775 IPOPT_EOL;
776 opts->m_len++;
777 }
778 }
779 #ifdef ICMPPRINTFS
780 if (icmpprintfs)
781 printf("%d\n", opts->m_len);
782 #endif
783 }
784 /*
785 * Now strip out original options by copying rest of first
786 * mbuf's data back, and adjust the IP length.
787 */
788 ip->ip_len -= optlen;
789 ip->ip_hl = sizeof(struct ip) >> 2;
790 m->m_len -= optlen;
791 if (m->m_flags & M_PKTHDR)
792 m->m_pkthdr.len -= optlen;
793 optlen += sizeof(struct ip);
794 bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
795 (unsigned)(m->m_len - sizeof(struct ip)));
796 }
797 m->m_flags &= ~(M_BCAST|M_MCAST);
798 icmp_send(m, opts);
799 done:
800 if (opts)
801 (void)m_free(opts);
802 }
803
804 /*
805 * Send an icmp packet back to the ip level,
806 * after supplying a checksum.
807 */
808 void
809 icmp_send(m, opts)
810 struct mbuf *m;
811 struct mbuf *opts;
812 {
813 struct ip *ip = mtod(m, struct ip *);
814 int hlen;
815 struct icmp *icp;
816
817 hlen = ip->ip_hl << 2;
818 m->m_data += hlen;
819 m->m_len -= hlen;
820 icp = mtod(m, struct icmp *);
821 icp->icmp_cksum = 0;
822 icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
823 m->m_data -= hlen;
824 m->m_len += hlen;
825 #ifdef ICMPPRINTFS
826 if (icmpprintfs)
827 printf("icmp_send dst %x src %x\n", ip->ip_dst, ip->ip_src);
828 #endif
829 #ifdef IPSEC
830 /* Don't lookup socket */
831 (void)ipsec_setsocket(m, NULL);
832 #endif
833 (void) ip_output(m, opts, NULL, 0, NULL);
834 }
835
836 n_time
837 iptime()
838 {
839 struct timeval atv;
840 u_long t;
841
842 microtime(&atv);
843 t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
844 return (htonl(t));
845 }
846
847 int
848 icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
849 int *name;
850 u_int namelen;
851 void *oldp;
852 size_t *oldlenp;
853 void *newp;
854 size_t newlen;
855 {
856 int arg, error;
857
858 /* All sysctl names at this level are terminal. */
859 if (namelen != 1)
860 return (ENOTDIR);
861
862 switch (name[0])
863 {
864 case ICMPCTL_MASKREPL:
865 error = sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl);
866 break;
867 case ICMPCTL_RETURNDATABYTES:
868 arg = icmpreturndatabytes;
869 error = sysctl_int(oldp, oldlenp, newp, newlen, &arg);
870 if (error)
871 break;
872 if ((arg >= 8) || (arg <= 512))
873 icmpreturndatabytes = arg;
874 else
875 error = EINVAL;
876 break;
877 case ICMPCTL_ERRPPSLIMIT:
878 error = sysctl_int(oldp, oldlenp, newp, newlen, &icmperrppslim);
879 break;
880 default:
881 error = ENOPROTOOPT;
882 break;
883 }
884 return error;
885 }
886
887 void
888 icmp_mtudisc(icp, faddr)
889 struct icmp *icp;
890 struct in_addr faddr;
891 {
892 struct icmp_mtudisc_callback *mc;
893 struct sockaddr *dst = sintosa(&icmpsrc);
894 struct rtentry *rt;
895 u_long mtu = ntohs(icp->icmp_nextmtu); /* Why a long? IPv6 */
896 int error;
897
898 /* Table of common MTUs: */
899
900 static const u_int mtu_table[] = { 65535, 65280, 32000, 17914, 9180,
901 8166, 4352, 2002, 1492, 1006, 508, 296, 68, 0};
902
903 rt = rtalloc1(dst, 1);
904 if (rt == 0)
905 return;
906
907 /* If we didn't get a host route, allocate one */
908
909 if ((rt->rt_flags & RTF_HOST) == 0) {
910 struct rtentry *nrt;
911
912 error = rtrequest((int) RTM_ADD, dst,
913 (struct sockaddr *) rt->rt_gateway,
914 (struct sockaddr *) 0,
915 RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
916 if (error) {
917 rtfree(rt);
918 return;
919 }
920 nrt->rt_rmx = rt->rt_rmx;
921 rtfree(rt);
922 rt = nrt;
923 }
924 error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
925 if (error) {
926 rtfree(rt);
927 return;
928 }
929
930 if (mtu == 0) {
931 int i = 0;
932
933 mtu = icp->icmp_ip.ip_len; /* NTOHS happened in deliver: */
934 /* Some 4.2BSD-based routers incorrectly adjust the ip_len */
935 if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
936 mtu -= (icp->icmp_ip.ip_hl << 2);
937
938 /* If we still can't guess a value, try the route */
939
940 if (mtu == 0) {
941 mtu = rt->rt_rmx.rmx_mtu;
942
943 /* If no route mtu, default to the interface mtu */
944
945 if (mtu == 0)
946 mtu = rt->rt_ifp->if_mtu;
947 }
948
949 for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
950 if (mtu > mtu_table[i]) {
951 mtu = mtu_table[i];
952 break;
953 }
954 }
955
956 /*
957 * XXX: RTV_MTU is overloaded, since the admin can set it
958 * to turn off PMTU for a route, and the kernel can
959 * set it to indicate a serious problem with PMTU
960 * on a route. We should be using a separate flag
961 * for the kernel to indicate this.
962 */
963
964 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
965 if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
966 rt->rt_rmx.rmx_locks |= RTV_MTU;
967 else if (rt->rt_rmx.rmx_mtu > mtu ||
968 rt->rt_rmx.rmx_mtu == 0) {
969 icmpstat.icps_pmtuchg++;
970 rt->rt_rmx.rmx_mtu = mtu;
971 }
972 }
973
974 if (rt)
975 rtfree(rt);
976
977 /*
978 * Notify protocols that the MTU for this destination
979 * has changed.
980 */
981 for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
982 mc = LIST_NEXT(mc, mc_list))
983 (*mc->mc_func)(faddr);
984 }
985
986 /*
987 * Return the next larger or smaller MTU plateau (table from RFC 1191)
988 * given current value MTU. If DIR is less than zero, a larger plateau
989 * is returned; otherwise, a smaller value is returned.
990 */
991 int
992 ip_next_mtu(mtu, dir) /* XXX */
993 int mtu;
994 int dir;
995 {
996 static const u_int mtutab[] = {
997 65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296,
998 68, 0
999 };
1000 int i;
1001
1002 for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) {
1003 if (mtu >= mtutab[i])
1004 break;
1005 }
1006
1007 if (dir < 0) {
1008 if (i == 0) {
1009 return 0;
1010 } else {
1011 return mtutab[i - 1];
1012 }
1013 } else {
1014 if (mtutab[i] == 0) {
1015 return 0;
1016 } else if(mtu > mtutab[i]) {
1017 return mtutab[i];
1018 } else {
1019 return mtutab[i + 1];
1020 }
1021 }
1022 }
1023
1024 static void
1025 icmp_mtudisc_timeout(rt, r)
1026 struct rtentry *rt;
1027 struct rttimer *r;
1028 {
1029 if (rt == NULL)
1030 panic("icmp_mtudisc_timeout: bad route to timeout");
1031 if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
1032 (RTF_DYNAMIC | RTF_HOST)) {
1033 rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
1034 rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
1035 } else {
1036 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
1037 rt->rt_rmx.rmx_mtu = 0;
1038 }
1039 }
1040 }
1041
1042 /*
1043 * Perform rate limit check.
1044 * Returns 0 if it is okay to send the icmp packet.
1045 * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
1046 * limitation.
1047 *
1048 * XXX per-destination/type check necessary?
1049 */
1050 static int
1051 icmp_ratelimit(dst, type, code)
1052 const struct in_addr *dst;
1053 const int type; /* not used at this moment */
1054 const int code; /* not used at this moment */
1055 {
1056
1057 /* PPS limit */
1058 if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
1059 icmperrppslim)) {
1060 /* The packet is subject to rate limit */
1061 return 1;
1062 }
1063
1064 /*okay to send*/
1065 return 0;
1066 }
1067