ip_icmp.c revision 1.43 1 /* $NetBSD: ip_icmp.c,v 1.43 2000/03/01 12:49:32 itojun Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*-
33 * Copyright (c) 1998 The NetBSD Foundation, Inc.
34 * All rights reserved.
35 *
36 * This code is derived from software contributed to The NetBSD Foundation
37 * by Public Access Networks Corporation ("Panix"). It was developed under
38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. All advertising materials mentioning features or use of this software
49 * must display the following acknowledgement:
50 * This product includes software developed by the NetBSD
51 * Foundation, Inc. and its contributors.
52 * 4. Neither the name of The NetBSD Foundation nor the names of its
53 * contributors may be used to endorse or promote products derived
54 * from this software without specific prior written permission.
55 *
56 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
57 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
58 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
59 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
60 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
61 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
62 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
63 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
64 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
65 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66 * POSSIBILITY OF SUCH DAMAGE.
67 */
68
69 /*
70 * Copyright (c) 1982, 1986, 1988, 1993
71 * The Regents of the University of California. All rights reserved.
72 *
73 * Redistribution and use in source and binary forms, with or without
74 * modification, are permitted provided that the following conditions
75 * are met:
76 * 1. Redistributions of source code must retain the above copyright
77 * notice, this list of conditions and the following disclaimer.
78 * 2. Redistributions in binary form must reproduce the above copyright
79 * notice, this list of conditions and the following disclaimer in the
80 * documentation and/or other materials provided with the distribution.
81 * 3. All advertising materials mentioning features or use of this software
82 * must display the following acknowledgement:
83 * This product includes software developed by the University of
84 * California, Berkeley and its contributors.
85 * 4. Neither the name of the University nor the names of its contributors
86 * may be used to endorse or promote products derived from this software
87 * without specific prior written permission.
88 *
89 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
90 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
91 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
92 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
93 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
94 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
95 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
96 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
97 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
98 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
99 * SUCH DAMAGE.
100 *
101 * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94
102 */
103
104 #include "opt_ipsec.h"
105
106 #include <sys/param.h>
107 #include <sys/systm.h>
108 #include <sys/malloc.h>
109 #include <sys/mbuf.h>
110 #include <sys/protosw.h>
111 #include <sys/socket.h>
112 #include <sys/time.h>
113 #include <sys/kernel.h>
114 #include <sys/proc.h>
115
116 #include <vm/vm.h>
117 #include <sys/sysctl.h>
118
119 #include <net/if.h>
120 #include <net/route.h>
121
122 #include <netinet/in.h>
123 #include <netinet/in_systm.h>
124 #include <netinet/in_var.h>
125 #include <netinet/ip.h>
126 #include <netinet/ip_icmp.h>
127 #include <netinet/ip_var.h>
128 #include <netinet/in_pcb.h>
129 #include <netinet/icmp_var.h>
130
131 #ifdef IPSEC
132 #include <netinet6/ipsec.h>
133 #include <netkey/key.h>
134 #include <netkey/key_debug.h>
135 #endif
136
137 #include <machine/stdarg.h>
138
139 /*
140 * ICMP routines: error generation, receive packet processing, and
141 * routines to turnaround packets back to the originator, and
142 * host table maintenance routines.
143 */
144
145 int icmpmaskrepl = 0;
146 #ifdef ICMPPRINTFS
147 int icmpprintfs = 0;
148 #endif
149
150 #if 0
151 static int ip_next_mtu __P((int, int));
152 #else
153 /*static*/ int ip_next_mtu __P((int, int));
154 #endif
155
156 extern struct timeval icmperrratelim;
157
158 static void icmp_mtudisc __P((struct icmp *));
159 static void icmp_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
160
161 static int icmp_ratelimit __P((const struct in_addr *, const int, const int));
162
163 /*
164 * Generate an error packet of type error
165 * in response to bad packet ip.
166 */
167 void
168 icmp_error(n, type, code, dest, destifp)
169 struct mbuf *n;
170 int type, code;
171 n_long dest;
172 struct ifnet *destifp;
173 {
174 register struct ip *oip = mtod(n, struct ip *), *nip;
175 register unsigned oiplen = oip->ip_hl << 2;
176 register struct icmp *icp;
177 register struct mbuf *m;
178 unsigned icmplen;
179
180 #ifdef ICMPPRINTFS
181 if (icmpprintfs)
182 printf("icmp_error(%x, %d, %d)\n", oip, type, code);
183 #endif
184 if (type != ICMP_REDIRECT)
185 icmpstat.icps_error++;
186 /*
187 * Don't send error if the original packet was encrypted.
188 * Don't send error if not the first fragment of message.
189 * Don't error if the old packet protocol was ICMP
190 * error message, only known informational types.
191 */
192 if (n->m_flags & M_DECRYPTED)
193 goto freeit;
194 if (oip->ip_off &~ (IP_MF|IP_DF))
195 goto freeit;
196 if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
197 n->m_len >= oiplen + ICMP_MINLEN &&
198 !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
199 icmpstat.icps_oldicmp++;
200 goto freeit;
201 }
202 /* Don't send error in response to a multicast or broadcast packet */
203 if (n->m_flags & (M_BCAST|M_MCAST))
204 goto freeit;
205
206 /*
207 * First, do a rate limitation check.
208 */
209 if (icmp_ratelimit(&oip->ip_src, type, code)) {
210 /* XXX stat */
211 goto freeit;
212 }
213
214 /*
215 * Now, formulate icmp message
216 */
217 m = m_gethdr(M_DONTWAIT, MT_HEADER);
218 if (m == NULL)
219 goto freeit;
220 icmplen = oiplen + min(8, oip->ip_len - oiplen);
221 m->m_len = icmplen + ICMP_MINLEN;
222 MH_ALIGN(m, m->m_len);
223 icp = mtod(m, struct icmp *);
224 if ((u_int)type > ICMP_MAXTYPE)
225 panic("icmp_error");
226 icmpstat.icps_outhist[type]++;
227 icp->icmp_type = type;
228 if (type == ICMP_REDIRECT)
229 icp->icmp_gwaddr.s_addr = dest;
230 else {
231 icp->icmp_void = 0;
232 /*
233 * The following assignments assume an overlay with the
234 * zeroed icmp_void field.
235 */
236 if (type == ICMP_PARAMPROB) {
237 icp->icmp_pptr = code;
238 code = 0;
239 } else if (type == ICMP_UNREACH &&
240 code == ICMP_UNREACH_NEEDFRAG && destifp)
241 icp->icmp_nextmtu = htons(destifp->if_mtu);
242 }
243
244 HTONS(oip->ip_off);
245 HTONS(oip->ip_len);
246 icp->icmp_code = code;
247 bcopy((caddr_t)oip, (caddr_t)&icp->icmp_ip, icmplen);
248 nip = &icp->icmp_ip;
249
250 /*
251 * Now, copy old ip header (without options)
252 * in front of icmp message.
253 */
254 if (m->m_data - sizeof(struct ip) < m->m_pktdat)
255 panic("icmp len");
256 m->m_data -= sizeof(struct ip);
257 m->m_len += sizeof(struct ip);
258 m->m_pkthdr.len = m->m_len;
259 m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
260 nip = mtod(m, struct ip *);
261 bcopy((caddr_t)oip, (caddr_t)nip, sizeof(struct ip));
262 nip->ip_len = m->m_len;
263 nip->ip_hl = sizeof(struct ip) >> 2;
264 nip->ip_p = IPPROTO_ICMP;
265 nip->ip_tos = 0;
266 icmp_reflect(m);
267
268 freeit:
269 m_freem(n);
270 }
271
272 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
273 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
274 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
275 struct sockaddr_in icmpmask = { 8, 0 };
276
277 /*
278 * Process a received ICMP message.
279 */
280 void
281 #if __STDC__
282 icmp_input(struct mbuf *m, ...)
283 #else
284 icmp_input(m, va_alist)
285 struct mbuf *m;
286 va_dcl
287 #endif
288 {
289 int proto;
290 register struct icmp *icp;
291 register struct ip *ip = mtod(m, struct ip *);
292 int icmplen;
293 register int i;
294 struct in_ifaddr *ia;
295 void *(*ctlfunc) __P((int, struct sockaddr *, void *));
296 int code;
297 int hlen;
298 va_list ap;
299
300 va_start(ap, m);
301 hlen = va_arg(ap, int);
302 proto = va_arg(ap, int);
303 va_end(ap);
304
305 /*
306 * Locate icmp structure in mbuf, and check
307 * that not corrupted and of at least minimum length.
308 */
309 icmplen = ip->ip_len - hlen;
310 #ifdef ICMPPRINTFS
311 if (icmpprintfs)
312 printf("icmp_input from %x to %x, len %d\n",
313 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr),
314 icmplen);
315 #endif
316 if (icmplen < ICMP_MINLEN) {
317 icmpstat.icps_tooshort++;
318 goto freeit;
319 }
320 i = hlen + min(icmplen, ICMP_ADVLENMIN);
321 if (m->m_len < i && (m = m_pullup(m, i)) == 0) {
322 icmpstat.icps_tooshort++;
323 return;
324 }
325 ip = mtod(m, struct ip *);
326 m->m_len -= hlen;
327 m->m_data += hlen;
328 icp = mtod(m, struct icmp *);
329 if (in_cksum(m, icmplen)) {
330 icmpstat.icps_checksum++;
331 goto freeit;
332 }
333 m->m_len += hlen;
334 m->m_data -= hlen;
335
336 #ifdef ICMPPRINTFS
337 /*
338 * Message type specific processing.
339 */
340 if (icmpprintfs)
341 printf("icmp_input, type %d code %d\n", icp->icmp_type,
342 icp->icmp_code);
343 #endif
344 #ifdef IPSEC
345 /* drop it if it does not match the policy */
346 if (ipsec4_in_reject(m, NULL)) {
347 ipsecstat.in_polvio++;
348 goto freeit;
349 }
350 #endif
351 if (icp->icmp_type > ICMP_MAXTYPE)
352 goto raw;
353 icmpstat.icps_inhist[icp->icmp_type]++;
354 code = icp->icmp_code;
355 switch (icp->icmp_type) {
356
357 case ICMP_UNREACH:
358 switch (code) {
359 case ICMP_UNREACH_NET:
360 case ICMP_UNREACH_HOST:
361 case ICMP_UNREACH_PROTOCOL:
362 case ICMP_UNREACH_PORT:
363 case ICMP_UNREACH_SRCFAIL:
364 code += PRC_UNREACH_NET;
365 break;
366
367 case ICMP_UNREACH_NEEDFRAG:
368 code = PRC_MSGSIZE;
369 break;
370
371 case ICMP_UNREACH_NET_UNKNOWN:
372 case ICMP_UNREACH_NET_PROHIB:
373 case ICMP_UNREACH_TOSNET:
374 code = PRC_UNREACH_NET;
375 break;
376
377 case ICMP_UNREACH_HOST_UNKNOWN:
378 case ICMP_UNREACH_ISOLATED:
379 case ICMP_UNREACH_HOST_PROHIB:
380 case ICMP_UNREACH_TOSHOST:
381 code = PRC_UNREACH_HOST;
382 break;
383
384 default:
385 goto badcode;
386 }
387 goto deliver;
388
389 case ICMP_TIMXCEED:
390 if (code > 1)
391 goto badcode;
392 code += PRC_TIMXCEED_INTRANS;
393 goto deliver;
394
395 case ICMP_PARAMPROB:
396 if (code > 1)
397 goto badcode;
398 code = PRC_PARAMPROB;
399 goto deliver;
400
401 case ICMP_SOURCEQUENCH:
402 if (code)
403 goto badcode;
404 code = PRC_QUENCH;
405 goto deliver;
406
407 deliver:
408 /*
409 * Problem with datagram; advise higher level routines.
410 */
411 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
412 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
413 icmpstat.icps_badlen++;
414 goto freeit;
415 }
416 if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
417 goto badcode;
418 NTOHS(icp->icmp_ip.ip_len);
419 #ifdef ICMPPRINTFS
420 if (icmpprintfs)
421 printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
422 #endif
423 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
424 if (code == PRC_MSGSIZE && ip_mtudisc)
425 icmp_mtudisc(icp);
426 /*
427 * XXX if the packet contains [IPv4 AH TCP], we can't make a
428 * notification to TCP layer.
429 */
430 ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
431 if (ctlfunc)
432 (*ctlfunc)(code, sintosa(&icmpsrc), &icp->icmp_ip);
433 break;
434
435 badcode:
436 icmpstat.icps_badcode++;
437 break;
438
439 case ICMP_ECHO:
440 icp->icmp_type = ICMP_ECHOREPLY;
441 goto reflect;
442
443 case ICMP_TSTAMP:
444 if (icmplen < ICMP_TSLEN) {
445 icmpstat.icps_badlen++;
446 break;
447 }
448 icp->icmp_type = ICMP_TSTAMPREPLY;
449 icp->icmp_rtime = iptime();
450 icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */
451 goto reflect;
452
453 case ICMP_MASKREQ:
454 if (icmpmaskrepl == 0)
455 break;
456 /*
457 * We are not able to respond with all ones broadcast
458 * unless we receive it over a point-to-point interface.
459 */
460 if (icmplen < ICMP_MASKLEN) {
461 icmpstat.icps_badlen++;
462 break;
463 }
464 if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
465 in_nullhost(ip->ip_dst))
466 icmpdst.sin_addr = ip->ip_src;
467 else
468 icmpdst.sin_addr = ip->ip_dst;
469 ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
470 m->m_pkthdr.rcvif));
471 if (ia == 0)
472 break;
473 icp->icmp_type = ICMP_MASKREPLY;
474 icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
475 if (in_nullhost(ip->ip_src)) {
476 if (ia->ia_ifp->if_flags & IFF_BROADCAST)
477 ip->ip_src = ia->ia_broadaddr.sin_addr;
478 else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
479 ip->ip_src = ia->ia_dstaddr.sin_addr;
480 }
481 reflect:
482 icmpstat.icps_reflect++;
483 icmpstat.icps_outhist[icp->icmp_type]++;
484 icmp_reflect(m);
485 return;
486
487 case ICMP_REDIRECT:
488 if (code > 3)
489 goto badcode;
490 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
491 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
492 icmpstat.icps_badlen++;
493 break;
494 }
495 /*
496 * Short circuit routing redirects to force
497 * immediate change in the kernel's routing
498 * tables. The message is also handed to anyone
499 * listening on a raw socket (e.g. the routing
500 * daemon for use in updating its tables).
501 */
502 icmpgw.sin_addr = ip->ip_src;
503 icmpdst.sin_addr = icp->icmp_gwaddr;
504 #ifdef ICMPPRINTFS
505 if (icmpprintfs)
506 printf("redirect dst %x to %x\n", icp->icmp_ip.ip_dst,
507 icp->icmp_gwaddr);
508 #endif
509 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
510 rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
511 (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
512 sintosa(&icmpgw), (struct rtentry **)0);
513 pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
514 #ifdef IPSEC
515 key_sa_routechange((struct sockaddr *)&icmpsrc);
516 #endif
517 break;
518
519 /*
520 * No kernel processing for the following;
521 * just fall through to send to raw listener.
522 */
523 case ICMP_ECHOREPLY:
524 case ICMP_ROUTERADVERT:
525 case ICMP_ROUTERSOLICIT:
526 case ICMP_TSTAMPREPLY:
527 case ICMP_IREQREPLY:
528 case ICMP_MASKREPLY:
529 default:
530 break;
531 }
532
533 raw:
534 rip_input(m, hlen, proto);
535 return;
536
537 freeit:
538 m_freem(m);
539 return;
540 }
541
542 /*
543 * Reflect the ip packet back to the source
544 */
545 void
546 icmp_reflect(m)
547 struct mbuf *m;
548 {
549 register struct ip *ip = mtod(m, struct ip *);
550 register struct in_ifaddr *ia;
551 register struct ifaddr *ifa;
552 struct sockaddr_in *sin = 0;
553 struct in_addr t;
554 struct mbuf *opts = 0;
555 int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
556
557 if (!in_canforward(ip->ip_src) &&
558 ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
559 htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
560 m_freem(m); /* Bad return address */
561 goto done; /* ip_output() will check for broadcast */
562 }
563 t = ip->ip_dst;
564 ip->ip_dst = ip->ip_src;
565 /*
566 * If the incoming packet was addressed directly to us, use
567 * dst as the src for the reply. Otherwise (broadcast or
568 * anonymous), use an address which corresponds to the
569 * incoming interface, with a preference for the address which
570 * corresponds to the route to the destination of the ICMP.
571 */
572
573 /* Look for packet addressed to us */
574 INADDR_TO_IA(t, ia);
575
576 /* look for packet sent to broadcast address */
577 if (ia == NULL && (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST)) {
578 for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
579 ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
580 if (ifa->ifa_addr->sa_family != AF_INET)
581 continue;
582 if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) {
583 ia = ifatoia(ifa);
584 break;
585 }
586 }
587 }
588
589 if (ia)
590 sin = &ia->ia_addr;
591
592 icmpdst.sin_addr = t;
593
594 /* if the packet is addressed somewhere else, compute the
595 source address for packets routed back to the source, and
596 use that, if it's an address on the interface which
597 received the packet */
598 if (sin == (struct sockaddr_in *)0) {
599 struct sockaddr_in sin_dst;
600 struct route icmproute;
601 int errornum;
602
603 sin_dst.sin_family = AF_INET;
604 sin_dst.sin_len = sizeof(struct sockaddr_in);
605 sin_dst.sin_addr = ip->ip_dst;
606 bzero(&icmproute, sizeof(icmproute));
607 errornum = 0;
608 sin = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum);
609 /* errornum is never used */
610 if (icmproute.ro_rt)
611 RTFREE(icmproute.ro_rt);
612 /* check to make sure sin is a source address on rcvif */
613 if (sin) {
614 t = sin->sin_addr;
615 sin = (struct sockaddr_in *)0;
616 INADDR_TO_IA(t, ia);
617 while (ia) {
618 if (ia->ia_ifp == m->m_pkthdr.rcvif) {
619 sin = &ia->ia_addr;
620 break;
621 }
622 NEXT_IA_WITH_SAME_ADDR(ia);
623 }
624 }
625 }
626
627 /* if it was not addressed to us, but the route doesn't go out
628 the source interface, pick an address on the source
629 interface. This can happen when routing is asymmetric, or
630 when the incoming packet was encapsulated */
631 if (sin == (struct sockaddr_in *)0) {
632 for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
633 ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
634 if (ifa->ifa_addr->sa_family != AF_INET)
635 continue;
636 sin = &(ifatoia(ifa)->ia_addr);
637 break;
638 }
639 }
640
641 /*
642 * The following happens if the packet was not addressed to us,
643 * and was received on an interface with no IP address:
644 * We find the first AF_INET address on the first non-loopback
645 * interface.
646 */
647 if (sin == (struct sockaddr_in *)0)
648 for (ia = in_ifaddr.tqh_first; ia != NULL;
649 ia = ia->ia_list.tqe_next) {
650 if (ia->ia_ifp->if_flags & IFF_LOOPBACK)
651 continue;
652 sin = &ia->ia_addr;
653 break;
654 }
655
656 /*
657 * If we still didn't find an address, punt. We could have an
658 * interface up (and receiving packets) with no address.
659 */
660 if (sin == (struct sockaddr_in *)0) {
661 m_freem(m);
662 goto done;
663 }
664
665 ip->ip_src = sin->sin_addr;
666 ip->ip_ttl = MAXTTL;
667
668 if (optlen > 0) {
669 register u_char *cp;
670 int opt, cnt;
671 u_int len;
672
673 /*
674 * Retrieve any source routing from the incoming packet;
675 * add on any record-route or timestamp options.
676 */
677 cp = (u_char *) (ip + 1);
678 if ((opts = ip_srcroute()) == 0 &&
679 (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
680 opts->m_len = sizeof(struct in_addr);
681 *mtod(opts, struct in_addr *) = zeroin_addr;
682 }
683 if (opts) {
684 #ifdef ICMPPRINTFS
685 if (icmpprintfs)
686 printf("icmp_reflect optlen %d rt %d => ",
687 optlen, opts->m_len);
688 #endif
689 for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
690 opt = cp[IPOPT_OPTVAL];
691 if (opt == IPOPT_EOL)
692 break;
693 if (opt == IPOPT_NOP)
694 len = 1;
695 else {
696 len = cp[IPOPT_OLEN];
697 if (len <= 0 || len > cnt)
698 break;
699 }
700 /*
701 * Should check for overflow, but it "can't happen"
702 */
703 if (opt == IPOPT_RR || opt == IPOPT_TS ||
704 opt == IPOPT_SECURITY) {
705 bcopy((caddr_t)cp,
706 mtod(opts, caddr_t) + opts->m_len, len);
707 opts->m_len += len;
708 }
709 }
710 /* Terminate & pad, if necessary */
711 if ((cnt = opts->m_len % 4) != 0) {
712 for (; cnt < 4; cnt++) {
713 *(mtod(opts, caddr_t) + opts->m_len) =
714 IPOPT_EOL;
715 opts->m_len++;
716 }
717 }
718 #ifdef ICMPPRINTFS
719 if (icmpprintfs)
720 printf("%d\n", opts->m_len);
721 #endif
722 }
723 /*
724 * Now strip out original options by copying rest of first
725 * mbuf's data back, and adjust the IP length.
726 */
727 ip->ip_len -= optlen;
728 ip->ip_hl = sizeof(struct ip) >> 2;
729 m->m_len -= optlen;
730 if (m->m_flags & M_PKTHDR)
731 m->m_pkthdr.len -= optlen;
732 optlen += sizeof(struct ip);
733 bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
734 (unsigned)(m->m_len - sizeof(struct ip)));
735 }
736 m->m_flags &= ~(M_BCAST|M_MCAST);
737 icmp_send(m, opts);
738 done:
739 if (opts)
740 (void)m_free(opts);
741 }
742
743 /*
744 * Send an icmp packet back to the ip level,
745 * after supplying a checksum.
746 */
747 void
748 icmp_send(m, opts)
749 register struct mbuf *m;
750 struct mbuf *opts;
751 {
752 register struct ip *ip = mtod(m, struct ip *);
753 register int hlen;
754 register struct icmp *icp;
755
756 hlen = ip->ip_hl << 2;
757 m->m_data += hlen;
758 m->m_len -= hlen;
759 icp = mtod(m, struct icmp *);
760 icp->icmp_cksum = 0;
761 icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
762 m->m_data -= hlen;
763 m->m_len += hlen;
764 #ifdef ICMPPRINTFS
765 if (icmpprintfs)
766 printf("icmp_send dst %x src %x\n", ip->ip_dst, ip->ip_src);
767 #endif
768 #ifdef IPSEC
769 /* Don't lookup socket */
770 ipsec_setsocket(m, NULL);
771 #endif
772 (void) ip_output(m, opts, NULL, 0, NULL);
773 }
774
775 n_time
776 iptime()
777 {
778 struct timeval atv;
779 u_long t;
780
781 microtime(&atv);
782 t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
783 return (htonl(t));
784 }
785
786 int
787 icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
788 int *name;
789 u_int namelen;
790 void *oldp;
791 size_t *oldlenp;
792 void *newp;
793 size_t newlen;
794 {
795
796 /* All sysctl names at this level are terminal. */
797 if (namelen != 1)
798 return (ENOTDIR);
799
800 switch (name[0]) {
801 case ICMPCTL_MASKREPL:
802 return (sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl));
803 case ICMPCTL_ERRRATELIMIT:
804 {
805 int rate_usec, error, s;
806
807 /*
808 * The sysctl specifies the rate in usec-between-icmp,
809 * so we must convert from/to a timeval.
810 */
811 rate_usec = (icmperrratelim.tv_sec * 1000000) +
812 icmperrratelim.tv_usec;
813 error = sysctl_int(oldp, oldlenp, newp, newlen, &rate_usec);
814 if (error)
815 return (error);
816 s = splsoftnet();
817 icmperrratelim.tv_sec = rate_usec / 1000000;
818 icmperrratelim.tv_usec = rate_usec % 1000000;
819 splx(s);
820
821 return (0);
822 }
823 default:
824 return (ENOPROTOOPT);
825 }
826 /* NOTREACHED */
827 }
828
829 static void
830 icmp_mtudisc(icp)
831 struct icmp *icp;
832 {
833 struct rtentry *rt;
834 struct sockaddr *dst = sintosa(&icmpsrc);
835 u_long mtu = ntohs(icp->icmp_nextmtu); /* Why a long? IPv6 */
836 int error;
837
838 /* Table of common MTUs: */
839
840 static u_long mtu_table[] = {65535, 65280, 32000, 17914, 9180, 8166,
841 4352, 2002, 1492, 1006, 508, 296, 68, 0};
842
843 rt = rtalloc1(dst, 1);
844 if (rt == 0)
845 return;
846
847 /* If we didn't get a host route, allocate one */
848
849 if ((rt->rt_flags & RTF_HOST) == 0) {
850 struct rtentry *nrt;
851
852 error = rtrequest((int) RTM_ADD, dst,
853 (struct sockaddr *) rt->rt_gateway,
854 (struct sockaddr *) 0,
855 RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
856 if (error) {
857 rtfree(rt);
858 rtfree(nrt);
859 return;
860 }
861 nrt->rt_rmx = rt->rt_rmx;
862 rtfree(rt);
863 rt = nrt;
864 }
865 error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
866 if (error) {
867 rtfree(rt);
868 return;
869 }
870
871 if (mtu == 0) {
872 int i = 0;
873
874 mtu = icp->icmp_ip.ip_len; /* NTOHS happened in deliver: */
875 /* Some 4.2BSD-based routers incorrectly adjust the ip_len */
876 if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
877 mtu -= (icp->icmp_ip.ip_hl << 2);
878
879 /* If we still can't guess a value, try the route */
880
881 if (mtu == 0) {
882 mtu = rt->rt_rmx.rmx_mtu;
883
884 /* If no route mtu, default to the interface mtu */
885
886 if (mtu == 0)
887 mtu = rt->rt_ifp->if_mtu;
888 }
889
890 for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
891 if (mtu > mtu_table[i]) {
892 mtu = mtu_table[i];
893 break;
894 }
895 }
896
897 /*
898 * XXX: RTV_MTU is overloaded, since the admin can set it
899 * to turn off PMTU for a route, and the kernel can
900 * set it to indicate a serious problem with PMTU
901 * on a route. We should be using a separate flag
902 * for the kernel to indicate this.
903 */
904
905 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
906 if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
907 rt->rt_rmx.rmx_locks |= RTV_MTU;
908 else if (rt->rt_rmx.rmx_mtu > mtu ||
909 rt->rt_rmx.rmx_mtu == 0)
910 rt->rt_rmx.rmx_mtu = mtu;
911 }
912
913 if (rt)
914 rtfree(rt);
915 }
916
917 /*
918 * Return the next larger or smaller MTU plateau (table from RFC 1191)
919 * given current value MTU. If DIR is less than zero, a larger plateau
920 * is returned; otherwise, a smaller value is returned.
921 */
922 int
923 ip_next_mtu(mtu, dir) /* XXX */
924 int mtu;
925 int dir;
926 {
927 static int mtutab[] = {
928 65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296,
929 68, 0
930 };
931 int i;
932
933 for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) {
934 if (mtu >= mtutab[i])
935 break;
936 }
937
938 if (dir < 0) {
939 if (i == 0) {
940 return 0;
941 } else {
942 return mtutab[i - 1];
943 }
944 } else {
945 if (mtutab[i] == 0) {
946 return 0;
947 } else if(mtu > mtutab[i]) {
948 return mtutab[i];
949 } else {
950 return mtutab[i + 1];
951 }
952 }
953 }
954
955 static void
956 icmp_mtudisc_timeout(rt, r)
957 struct rtentry *rt;
958 struct rttimer *r;
959 {
960 if (rt == NULL)
961 panic("icmp_mtudisc_timeout: bad route to timeout");
962 if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
963 (RTF_DYNAMIC | RTF_HOST)) {
964 rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
965 rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
966 } else {
967 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
968 rt->rt_rmx.rmx_mtu = 0;
969 }
970 }
971 }
972
973 /*
974 * Perform rate limit check.
975 * Returns 0 if it is okay to send the icmp packet.
976 * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
977 * limitation.
978 *
979 * XXX per-destination/type check necessary?
980 */
981 static int
982 icmp_ratelimit(dst, type, code)
983 const struct in_addr *dst; /* not used at this moment */
984 const int type; /* not used at this moment */
985 const int code; /* not used at this moment */
986 {
987 static struct timeval icmperrratelim_last;
988
989 /*
990 * ratecheck() returns true if it is okay to send. We return
991 * true if it is not okay to send.
992 */
993 return (ratecheck(&icmperrratelim_last, &icmperrratelim) == 0);
994 }
995