ip_icmp.c revision 1.51 1 /* $NetBSD: ip_icmp.c,v 1.51 2000/07/10 09:31:30 itojun Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*-
33 * Copyright (c) 1998 The NetBSD Foundation, Inc.
34 * All rights reserved.
35 *
36 * This code is derived from software contributed to The NetBSD Foundation
37 * by Public Access Networks Corporation ("Panix"). It was developed under
38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. All advertising materials mentioning features or use of this software
49 * must display the following acknowledgement:
50 * This product includes software developed by the NetBSD
51 * Foundation, Inc. and its contributors.
52 * 4. Neither the name of The NetBSD Foundation nor the names of its
53 * contributors may be used to endorse or promote products derived
54 * from this software without specific prior written permission.
55 *
56 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
57 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
58 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
59 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
60 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
61 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
62 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
63 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
64 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
65 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66 * POSSIBILITY OF SUCH DAMAGE.
67 */
68
69 /*
70 * Copyright (c) 1982, 1986, 1988, 1993
71 * The Regents of the University of California. All rights reserved.
72 *
73 * Redistribution and use in source and binary forms, with or without
74 * modification, are permitted provided that the following conditions
75 * are met:
76 * 1. Redistributions of source code must retain the above copyright
77 * notice, this list of conditions and the following disclaimer.
78 * 2. Redistributions in binary form must reproduce the above copyright
79 * notice, this list of conditions and the following disclaimer in the
80 * documentation and/or other materials provided with the distribution.
81 * 3. All advertising materials mentioning features or use of this software
82 * must display the following acknowledgement:
83 * This product includes software developed by the University of
84 * California, Berkeley and its contributors.
85 * 4. Neither the name of the University nor the names of its contributors
86 * may be used to endorse or promote products derived from this software
87 * without specific prior written permission.
88 *
89 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
90 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
91 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
92 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
93 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
94 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
95 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
96 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
97 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
98 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
99 * SUCH DAMAGE.
100 *
101 * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94
102 */
103
104 #include "opt_ipsec.h"
105
106 #include <sys/param.h>
107 #include <sys/systm.h>
108 #include <sys/malloc.h>
109 #include <sys/mbuf.h>
110 #include <sys/protosw.h>
111 #include <sys/socket.h>
112 #include <sys/time.h>
113 #include <sys/kernel.h>
114 #include <sys/proc.h>
115
116 #include <uvm/uvm_extern.h>
117
118 #include <sys/sysctl.h>
119
120 #include <net/if.h>
121 #include <net/route.h>
122
123 #include <netinet/in.h>
124 #include <netinet/in_systm.h>
125 #include <netinet/in_var.h>
126 #include <netinet/ip.h>
127 #include <netinet/ip_icmp.h>
128 #include <netinet/ip_var.h>
129 #include <netinet/in_pcb.h>
130 #include <netinet/icmp_var.h>
131
132 #ifdef IPSEC
133 #include <netinet6/ipsec.h>
134 #include <netkey/key.h>
135 #endif
136
137 #include <machine/stdarg.h>
138
139 /*
140 * ICMP routines: error generation, receive packet processing, and
141 * routines to turnaround packets back to the originator, and
142 * host table maintenance routines.
143 */
144
145 int icmpmaskrepl = 0;
146 #ifdef ICMPPRINTFS
147 int icmpprintfs = 0;
148 #endif
149 int icmpreturndatabytes = 8;
150
151 #if 0
152 static int ip_next_mtu __P((int, int));
153 #else
154 /*static*/ int ip_next_mtu __P((int, int));
155 #endif
156
157 extern struct timeval icmperrratelim;
158 extern int icmperrppslim;
159 static int icmperrpps_count = 0;
160 static struct timeval icmperrppslim_last;
161
162 static void icmp_mtudisc __P((struct icmp *));
163 static void icmp_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
164
165 static int icmp_ratelimit __P((const struct in_addr *, const int, const int));
166
167 /*
168 * Generate an error packet of type error
169 * in response to bad packet ip.
170 */
171 void
172 icmp_error(n, type, code, dest, destifp)
173 struct mbuf *n;
174 int type, code;
175 n_long dest;
176 struct ifnet *destifp;
177 {
178 struct ip *oip = mtod(n, struct ip *), *nip;
179 unsigned oiplen = oip->ip_hl << 2;
180 struct icmp *icp;
181 struct mbuf *m;
182 unsigned icmplen;
183
184 #ifdef ICMPPRINTFS
185 if (icmpprintfs)
186 printf("icmp_error(%x, %d, %d)\n", oip, type, code);
187 #endif
188 if (type != ICMP_REDIRECT)
189 icmpstat.icps_error++;
190 /*
191 * Don't send error if the original packet was encrypted.
192 * Don't send error if not the first fragment of message.
193 * Don't error if the old packet protocol was ICMP
194 * error message, only known informational types.
195 */
196 if (n->m_flags & M_DECRYPTED)
197 goto freeit;
198 if (oip->ip_off &~ (IP_MF|IP_DF))
199 goto freeit;
200 if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
201 n->m_len >= oiplen + ICMP_MINLEN &&
202 !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
203 icmpstat.icps_oldicmp++;
204 goto freeit;
205 }
206 /* Don't send error in response to a multicast or broadcast packet */
207 if (n->m_flags & (M_BCAST|M_MCAST))
208 goto freeit;
209
210 /*
211 * First, do a rate limitation check.
212 */
213 if (icmp_ratelimit(&oip->ip_src, type, code)) {
214 /* XXX stat */
215 goto freeit;
216 }
217
218 /*
219 * Now, formulate icmp message
220 */
221 m = m_gethdr(M_DONTWAIT, MT_HEADER);
222 if (m == NULL)
223 goto freeit;
224 icmplen = oiplen + min(icmpreturndatabytes, oip->ip_len - oiplen);
225 m->m_len = icmplen + ICMP_MINLEN;
226 MH_ALIGN(m, m->m_len);
227 icp = mtod(m, struct icmp *);
228 if ((u_int)type > ICMP_MAXTYPE)
229 panic("icmp_error");
230 icmpstat.icps_outhist[type]++;
231 icp->icmp_type = type;
232 if (type == ICMP_REDIRECT)
233 icp->icmp_gwaddr.s_addr = dest;
234 else {
235 icp->icmp_void = 0;
236 /*
237 * The following assignments assume an overlay with the
238 * zeroed icmp_void field.
239 */
240 if (type == ICMP_PARAMPROB) {
241 icp->icmp_pptr = code;
242 code = 0;
243 } else if (type == ICMP_UNREACH &&
244 code == ICMP_UNREACH_NEEDFRAG && destifp)
245 icp->icmp_nextmtu = htons(destifp->if_mtu);
246 }
247
248 HTONS(oip->ip_off);
249 HTONS(oip->ip_len);
250 icp->icmp_code = code;
251 m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
252 nip = &icp->icmp_ip;
253
254 /*
255 * Now, copy old ip header (without options)
256 * in front of icmp message.
257 */
258 if (m->m_data - sizeof(struct ip) < m->m_pktdat)
259 panic("icmp len");
260 m->m_data -= sizeof(struct ip);
261 m->m_len += sizeof(struct ip);
262 m->m_pkthdr.len = m->m_len;
263 m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
264 nip = mtod(m, struct ip *);
265 /* ip_v set in ip_output */
266 nip->ip_hl = sizeof(struct ip) >> 2;
267 nip->ip_tos = 0;
268 nip->ip_len = m->m_len;
269 /* ip_id set in ip_output */
270 nip->ip_off = 0;
271 /* ip_ttl set in icmp_reflect */
272 nip->ip_p = IPPROTO_ICMP;
273 nip->ip_src = oip->ip_src;
274 nip->ip_dst = oip->ip_dst;
275 icmp_reflect(m);
276
277 freeit:
278 m_freem(n);
279 }
280
281 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
282 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
283 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
284 struct sockaddr_in icmpmask = { 8, 0 };
285
286 /*
287 * Process a received ICMP message.
288 */
289 void
290 #if __STDC__
291 icmp_input(struct mbuf *m, ...)
292 #else
293 icmp_input(m, va_alist)
294 struct mbuf *m;
295 va_dcl
296 #endif
297 {
298 int proto;
299 struct icmp *icp;
300 struct ip *ip = mtod(m, struct ip *);
301 int icmplen;
302 int i;
303 struct in_ifaddr *ia;
304 void *(*ctlfunc) __P((int, struct sockaddr *, void *));
305 int code;
306 int hlen;
307 va_list ap;
308
309 va_start(ap, m);
310 hlen = va_arg(ap, int);
311 proto = va_arg(ap, int);
312 va_end(ap);
313
314 /*
315 * Locate icmp structure in mbuf, and check
316 * that not corrupted and of at least minimum length.
317 */
318 icmplen = ip->ip_len - hlen;
319 #ifdef ICMPPRINTFS
320 if (icmpprintfs)
321 printf("icmp_input from %x to %x, len %d\n",
322 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr),
323 icmplen);
324 #endif
325 if (icmplen < ICMP_MINLEN) {
326 icmpstat.icps_tooshort++;
327 goto freeit;
328 }
329 i = hlen + min(icmplen, ICMP_ADVLENMIN);
330 if (m->m_len < i && (m = m_pullup(m, i)) == 0) {
331 icmpstat.icps_tooshort++;
332 return;
333 }
334 ip = mtod(m, struct ip *);
335 m->m_len -= hlen;
336 m->m_data += hlen;
337 icp = mtod(m, struct icmp *);
338 if (in_cksum(m, icmplen)) {
339 icmpstat.icps_checksum++;
340 goto freeit;
341 }
342 m->m_len += hlen;
343 m->m_data -= hlen;
344
345 #ifdef ICMPPRINTFS
346 /*
347 * Message type specific processing.
348 */
349 if (icmpprintfs)
350 printf("icmp_input, type %d code %d\n", icp->icmp_type,
351 icp->icmp_code);
352 #endif
353 #ifdef IPSEC
354 /* drop it if it does not match the policy */
355 if (ipsec4_in_reject(m, NULL)) {
356 ipsecstat.in_polvio++;
357 goto freeit;
358 }
359 #endif
360 if (icp->icmp_type > ICMP_MAXTYPE)
361 goto raw;
362 icmpstat.icps_inhist[icp->icmp_type]++;
363 code = icp->icmp_code;
364 switch (icp->icmp_type) {
365
366 case ICMP_UNREACH:
367 switch (code) {
368 case ICMP_UNREACH_NET:
369 case ICMP_UNREACH_HOST:
370 case ICMP_UNREACH_PROTOCOL:
371 case ICMP_UNREACH_PORT:
372 case ICMP_UNREACH_SRCFAIL:
373 code += PRC_UNREACH_NET;
374 break;
375
376 case ICMP_UNREACH_NEEDFRAG:
377 code = PRC_MSGSIZE;
378 break;
379
380 case ICMP_UNREACH_NET_UNKNOWN:
381 case ICMP_UNREACH_NET_PROHIB:
382 case ICMP_UNREACH_TOSNET:
383 code = PRC_UNREACH_NET;
384 break;
385
386 case ICMP_UNREACH_HOST_UNKNOWN:
387 case ICMP_UNREACH_ISOLATED:
388 case ICMP_UNREACH_HOST_PROHIB:
389 case ICMP_UNREACH_TOSHOST:
390 code = PRC_UNREACH_HOST;
391 break;
392
393 default:
394 goto badcode;
395 }
396 goto deliver;
397
398 case ICMP_TIMXCEED:
399 if (code > 1)
400 goto badcode;
401 code += PRC_TIMXCEED_INTRANS;
402 goto deliver;
403
404 case ICMP_PARAMPROB:
405 if (code > 1)
406 goto badcode;
407 code = PRC_PARAMPROB;
408 goto deliver;
409
410 case ICMP_SOURCEQUENCH:
411 if (code)
412 goto badcode;
413 code = PRC_QUENCH;
414 goto deliver;
415
416 deliver:
417 /*
418 * Problem with datagram; advise higher level routines.
419 */
420 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
421 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
422 icmpstat.icps_badlen++;
423 goto freeit;
424 }
425 if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
426 goto badcode;
427 NTOHS(icp->icmp_ip.ip_len);
428 #ifdef ICMPPRINTFS
429 if (icmpprintfs)
430 printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
431 #endif
432 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
433 if (code == PRC_MSGSIZE && ip_mtudisc)
434 icmp_mtudisc(icp);
435 /*
436 * XXX if the packet contains [IPv4 AH TCP], we can't make a
437 * notification to TCP layer.
438 */
439 ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
440 if (ctlfunc)
441 (*ctlfunc)(code, sintosa(&icmpsrc), &icp->icmp_ip);
442 break;
443
444 badcode:
445 icmpstat.icps_badcode++;
446 break;
447
448 case ICMP_ECHO:
449 icp->icmp_type = ICMP_ECHOREPLY;
450 goto reflect;
451
452 case ICMP_TSTAMP:
453 if (icmplen < ICMP_TSLEN) {
454 icmpstat.icps_badlen++;
455 break;
456 }
457 icp->icmp_type = ICMP_TSTAMPREPLY;
458 icp->icmp_rtime = iptime();
459 icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */
460 goto reflect;
461
462 case ICMP_MASKREQ:
463 if (icmpmaskrepl == 0)
464 break;
465 /*
466 * We are not able to respond with all ones broadcast
467 * unless we receive it over a point-to-point interface.
468 */
469 if (icmplen < ICMP_MASKLEN) {
470 icmpstat.icps_badlen++;
471 break;
472 }
473 if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
474 in_nullhost(ip->ip_dst))
475 icmpdst.sin_addr = ip->ip_src;
476 else
477 icmpdst.sin_addr = ip->ip_dst;
478 ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
479 m->m_pkthdr.rcvif));
480 if (ia == 0)
481 break;
482 icp->icmp_type = ICMP_MASKREPLY;
483 icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
484 if (in_nullhost(ip->ip_src)) {
485 if (ia->ia_ifp->if_flags & IFF_BROADCAST)
486 ip->ip_src = ia->ia_broadaddr.sin_addr;
487 else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
488 ip->ip_src = ia->ia_dstaddr.sin_addr;
489 }
490 reflect:
491 icmpstat.icps_reflect++;
492 icmpstat.icps_outhist[icp->icmp_type]++;
493 icmp_reflect(m);
494 return;
495
496 case ICMP_REDIRECT:
497 if (code > 3)
498 goto badcode;
499 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
500 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
501 icmpstat.icps_badlen++;
502 break;
503 }
504 /*
505 * Short circuit routing redirects to force
506 * immediate change in the kernel's routing
507 * tables. The message is also handed to anyone
508 * listening on a raw socket (e.g. the routing
509 * daemon for use in updating its tables).
510 */
511 icmpgw.sin_addr = ip->ip_src;
512 icmpdst.sin_addr = icp->icmp_gwaddr;
513 #ifdef ICMPPRINTFS
514 if (icmpprintfs)
515 printf("redirect dst %x to %x\n", icp->icmp_ip.ip_dst,
516 icp->icmp_gwaddr);
517 #endif
518 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
519 rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
520 (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
521 sintosa(&icmpgw), (struct rtentry **)0);
522 pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
523 #ifdef IPSEC
524 key_sa_routechange((struct sockaddr *)&icmpsrc);
525 #endif
526 break;
527
528 /*
529 * No kernel processing for the following;
530 * just fall through to send to raw listener.
531 */
532 case ICMP_ECHOREPLY:
533 case ICMP_ROUTERADVERT:
534 case ICMP_ROUTERSOLICIT:
535 case ICMP_TSTAMPREPLY:
536 case ICMP_IREQREPLY:
537 case ICMP_MASKREPLY:
538 default:
539 break;
540 }
541
542 raw:
543 rip_input(m, hlen, proto);
544 return;
545
546 freeit:
547 m_freem(m);
548 return;
549 }
550
551 /*
552 * Reflect the ip packet back to the source
553 */
554 void
555 icmp_reflect(m)
556 struct mbuf *m;
557 {
558 struct ip *ip = mtod(m, struct ip *);
559 struct in_ifaddr *ia;
560 struct ifaddr *ifa;
561 struct sockaddr_in *sin = 0;
562 struct in_addr t;
563 struct mbuf *opts = 0;
564 int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
565
566 if (!in_canforward(ip->ip_src) &&
567 ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
568 htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
569 m_freem(m); /* Bad return address */
570 goto done; /* ip_output() will check for broadcast */
571 }
572 t = ip->ip_dst;
573 ip->ip_dst = ip->ip_src;
574 /*
575 * If the incoming packet was addressed directly to us, use
576 * dst as the src for the reply. Otherwise (broadcast or
577 * anonymous), use an address which corresponds to the
578 * incoming interface, with a preference for the address which
579 * corresponds to the route to the destination of the ICMP.
580 */
581
582 /* Look for packet addressed to us */
583 INADDR_TO_IA(t, ia);
584
585 /* look for packet sent to broadcast address */
586 if (ia == NULL && (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST)) {
587 for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
588 ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
589 if (ifa->ifa_addr->sa_family != AF_INET)
590 continue;
591 if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) {
592 ia = ifatoia(ifa);
593 break;
594 }
595 }
596 }
597
598 if (ia)
599 sin = &ia->ia_addr;
600
601 icmpdst.sin_addr = t;
602
603 /* if the packet is addressed somewhere else, compute the
604 source address for packets routed back to the source, and
605 use that, if it's an address on the interface which
606 received the packet */
607 if (sin == (struct sockaddr_in *)0) {
608 struct sockaddr_in sin_dst;
609 struct route icmproute;
610 int errornum;
611
612 sin_dst.sin_family = AF_INET;
613 sin_dst.sin_len = sizeof(struct sockaddr_in);
614 sin_dst.sin_addr = ip->ip_dst;
615 bzero(&icmproute, sizeof(icmproute));
616 errornum = 0;
617 sin = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum);
618 /* errornum is never used */
619 if (icmproute.ro_rt)
620 RTFREE(icmproute.ro_rt);
621 /* check to make sure sin is a source address on rcvif */
622 if (sin) {
623 t = sin->sin_addr;
624 sin = (struct sockaddr_in *)0;
625 INADDR_TO_IA(t, ia);
626 while (ia) {
627 if (ia->ia_ifp == m->m_pkthdr.rcvif) {
628 sin = &ia->ia_addr;
629 break;
630 }
631 NEXT_IA_WITH_SAME_ADDR(ia);
632 }
633 }
634 }
635
636 /* if it was not addressed to us, but the route doesn't go out
637 the source interface, pick an address on the source
638 interface. This can happen when routing is asymmetric, or
639 when the incoming packet was encapsulated */
640 if (sin == (struct sockaddr_in *)0) {
641 for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
642 ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
643 if (ifa->ifa_addr->sa_family != AF_INET)
644 continue;
645 sin = &(ifatoia(ifa)->ia_addr);
646 break;
647 }
648 }
649
650 /*
651 * The following happens if the packet was not addressed to us,
652 * and was received on an interface with no IP address:
653 * We find the first AF_INET address on the first non-loopback
654 * interface.
655 */
656 if (sin == (struct sockaddr_in *)0)
657 for (ia = in_ifaddr.tqh_first; ia != NULL;
658 ia = ia->ia_list.tqe_next) {
659 if (ia->ia_ifp->if_flags & IFF_LOOPBACK)
660 continue;
661 sin = &ia->ia_addr;
662 break;
663 }
664
665 /*
666 * If we still didn't find an address, punt. We could have an
667 * interface up (and receiving packets) with no address.
668 */
669 if (sin == (struct sockaddr_in *)0) {
670 m_freem(m);
671 goto done;
672 }
673
674 ip->ip_src = sin->sin_addr;
675 ip->ip_ttl = MAXTTL;
676
677 if (optlen > 0) {
678 u_char *cp;
679 int opt, cnt;
680 u_int len;
681
682 /*
683 * Retrieve any source routing from the incoming packet;
684 * add on any record-route or timestamp options.
685 */
686 cp = (u_char *) (ip + 1);
687 if ((opts = ip_srcroute()) == 0 &&
688 (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
689 opts->m_len = sizeof(struct in_addr);
690 *mtod(opts, struct in_addr *) = zeroin_addr;
691 }
692 if (opts) {
693 #ifdef ICMPPRINTFS
694 if (icmpprintfs)
695 printf("icmp_reflect optlen %d rt %d => ",
696 optlen, opts->m_len);
697 #endif
698 for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
699 opt = cp[IPOPT_OPTVAL];
700 if (opt == IPOPT_EOL)
701 break;
702 if (opt == IPOPT_NOP)
703 len = 1;
704 else {
705 if (cnt < IPOPT_OLEN + sizeof(*cp))
706 break;
707 len = cp[IPOPT_OLEN];
708 if (len < IPOPT_OLEN + sizeof(*cp) ||
709 len > cnt)
710 break;
711 }
712 /*
713 * Should check for overflow, but it "can't happen"
714 */
715 if (opt == IPOPT_RR || opt == IPOPT_TS ||
716 opt == IPOPT_SECURITY) {
717 bcopy((caddr_t)cp,
718 mtod(opts, caddr_t) + opts->m_len, len);
719 opts->m_len += len;
720 }
721 }
722 /* Terminate & pad, if necessary */
723 if ((cnt = opts->m_len % 4) != 0) {
724 for (; cnt < 4; cnt++) {
725 *(mtod(opts, caddr_t) + opts->m_len) =
726 IPOPT_EOL;
727 opts->m_len++;
728 }
729 }
730 #ifdef ICMPPRINTFS
731 if (icmpprintfs)
732 printf("%d\n", opts->m_len);
733 #endif
734 }
735 /*
736 * Now strip out original options by copying rest of first
737 * mbuf's data back, and adjust the IP length.
738 */
739 ip->ip_len -= optlen;
740 ip->ip_hl = sizeof(struct ip) >> 2;
741 m->m_len -= optlen;
742 if (m->m_flags & M_PKTHDR)
743 m->m_pkthdr.len -= optlen;
744 optlen += sizeof(struct ip);
745 bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
746 (unsigned)(m->m_len - sizeof(struct ip)));
747 }
748 m->m_flags &= ~(M_BCAST|M_MCAST);
749 icmp_send(m, opts);
750 done:
751 if (opts)
752 (void)m_free(opts);
753 }
754
755 /*
756 * Send an icmp packet back to the ip level,
757 * after supplying a checksum.
758 */
759 void
760 icmp_send(m, opts)
761 struct mbuf *m;
762 struct mbuf *opts;
763 {
764 struct ip *ip = mtod(m, struct ip *);
765 int hlen;
766 struct icmp *icp;
767
768 hlen = ip->ip_hl << 2;
769 m->m_data += hlen;
770 m->m_len -= hlen;
771 icp = mtod(m, struct icmp *);
772 icp->icmp_cksum = 0;
773 icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
774 m->m_data -= hlen;
775 m->m_len += hlen;
776 #ifdef ICMPPRINTFS
777 if (icmpprintfs)
778 printf("icmp_send dst %x src %x\n", ip->ip_dst, ip->ip_src);
779 #endif
780 #ifdef IPSEC
781 /* Don't lookup socket */
782 ipsec_setsocket(m, NULL);
783 #endif
784 (void) ip_output(m, opts, NULL, 0, NULL);
785 }
786
787 n_time
788 iptime()
789 {
790 struct timeval atv;
791 u_long t;
792
793 microtime(&atv);
794 t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
795 return (htonl(t));
796 }
797
798 int
799 icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
800 int *name;
801 u_int namelen;
802 void *oldp;
803 size_t *oldlenp;
804 void *newp;
805 size_t newlen;
806 {
807 int arg, error, s;
808
809 /* All sysctl names at this level are terminal. */
810 if (namelen != 1)
811 return (ENOTDIR);
812
813 switch (name[0])
814 {
815 case ICMPCTL_MASKREPL:
816 error = sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl);
817 break;
818 case ICMPCTL_ERRRATELIMIT:
819 /*
820 * The sysctl specifies the rate in usec-between-icmp,
821 * so we must convert from/to a timeval.
822 */
823 arg = (icmperrratelim.tv_sec * 1000000) +
824 icmperrratelim.tv_usec;
825 error = sysctl_int(oldp, oldlenp, newp, newlen, &arg);
826 if (error)
827 break;
828 if (arg >= 0) {
829 s = splsoftnet();
830 icmperrratelim.tv_sec = arg / 1000000;
831 icmperrratelim.tv_usec = arg % 1000000;
832 splx(s);
833 } else
834 error = EINVAL;
835 break;
836 case ICMPCTL_RETURNDATABYTES:
837 arg = icmpreturndatabytes;
838 error = sysctl_int(oldp, oldlenp, newp, newlen, &arg);
839 if (error)
840 break;
841 if ((arg >= 8) || (arg <= 512))
842 icmpreturndatabytes = arg;
843 else
844 error = EINVAL;
845 break;
846 case ICMPCTL_ERRPPSLIMIT:
847 error = sysctl_int(oldp, oldlenp, newp, newlen, &icmperrppslim);
848 break;
849 default:
850 error = ENOPROTOOPT;
851 break;
852 }
853 return error;
854 }
855
856 static void
857 icmp_mtudisc(icp)
858 struct icmp *icp;
859 {
860 struct rtentry *rt;
861 struct sockaddr *dst = sintosa(&icmpsrc);
862 u_long mtu = ntohs(icp->icmp_nextmtu); /* Why a long? IPv6 */
863 int error;
864
865 /* Table of common MTUs: */
866
867 static u_long mtu_table[] = {65535, 65280, 32000, 17914, 9180, 8166,
868 4352, 2002, 1492, 1006, 508, 296, 68, 0};
869
870 rt = rtalloc1(dst, 1);
871 if (rt == 0)
872 return;
873
874 /* If we didn't get a host route, allocate one */
875
876 if ((rt->rt_flags & RTF_HOST) == 0) {
877 struct rtentry *nrt;
878
879 error = rtrequest((int) RTM_ADD, dst,
880 (struct sockaddr *) rt->rt_gateway,
881 (struct sockaddr *) 0,
882 RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
883 if (error) {
884 rtfree(rt);
885 rtfree(nrt);
886 return;
887 }
888 nrt->rt_rmx = rt->rt_rmx;
889 rtfree(rt);
890 rt = nrt;
891 }
892 error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
893 if (error) {
894 rtfree(rt);
895 return;
896 }
897
898 if (mtu == 0) {
899 int i = 0;
900
901 mtu = icp->icmp_ip.ip_len; /* NTOHS happened in deliver: */
902 /* Some 4.2BSD-based routers incorrectly adjust the ip_len */
903 if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
904 mtu -= (icp->icmp_ip.ip_hl << 2);
905
906 /* If we still can't guess a value, try the route */
907
908 if (mtu == 0) {
909 mtu = rt->rt_rmx.rmx_mtu;
910
911 /* If no route mtu, default to the interface mtu */
912
913 if (mtu == 0)
914 mtu = rt->rt_ifp->if_mtu;
915 }
916
917 for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
918 if (mtu > mtu_table[i]) {
919 mtu = mtu_table[i];
920 break;
921 }
922 }
923
924 /*
925 * XXX: RTV_MTU is overloaded, since the admin can set it
926 * to turn off PMTU for a route, and the kernel can
927 * set it to indicate a serious problem with PMTU
928 * on a route. We should be using a separate flag
929 * for the kernel to indicate this.
930 */
931
932 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
933 if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
934 rt->rt_rmx.rmx_locks |= RTV_MTU;
935 else if (rt->rt_rmx.rmx_mtu > mtu ||
936 rt->rt_rmx.rmx_mtu == 0)
937 rt->rt_rmx.rmx_mtu = mtu;
938 }
939
940 if (rt)
941 rtfree(rt);
942 }
943
944 /*
945 * Return the next larger or smaller MTU plateau (table from RFC 1191)
946 * given current value MTU. If DIR is less than zero, a larger plateau
947 * is returned; otherwise, a smaller value is returned.
948 */
949 int
950 ip_next_mtu(mtu, dir) /* XXX */
951 int mtu;
952 int dir;
953 {
954 static int mtutab[] = {
955 65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296,
956 68, 0
957 };
958 int i;
959
960 for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) {
961 if (mtu >= mtutab[i])
962 break;
963 }
964
965 if (dir < 0) {
966 if (i == 0) {
967 return 0;
968 } else {
969 return mtutab[i - 1];
970 }
971 } else {
972 if (mtutab[i] == 0) {
973 return 0;
974 } else if(mtu > mtutab[i]) {
975 return mtutab[i];
976 } else {
977 return mtutab[i + 1];
978 }
979 }
980 }
981
982 static void
983 icmp_mtudisc_timeout(rt, r)
984 struct rtentry *rt;
985 struct rttimer *r;
986 {
987 if (rt == NULL)
988 panic("icmp_mtudisc_timeout: bad route to timeout");
989 if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
990 (RTF_DYNAMIC | RTF_HOST)) {
991 rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
992 rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
993 } else {
994 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
995 rt->rt_rmx.rmx_mtu = 0;
996 }
997 }
998 }
999
1000 /*
1001 * Perform rate limit check.
1002 * Returns 0 if it is okay to send the icmp packet.
1003 * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
1004 * limitation.
1005 *
1006 * XXX per-destination/type check necessary?
1007 */
1008 static int
1009 icmp_ratelimit(dst, type, code)
1010 const struct in_addr *dst;
1011 const int type; /* not used at this moment */
1012 const int code; /* not used at this moment */
1013 {
1014 static struct timeval icmperrratelim_last;
1015 struct in_ifaddr *ia;
1016
1017 /*
1018 * Don't rate-limit if it's for us!
1019 */
1020 INADDR_TO_IA(*dst, ia);
1021 if (ia != NULL)
1022 return 0;
1023
1024 /* PPS limit */
1025 if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
1026 icmperrppslim)) {
1027 /* The packet is subject to rate limit */
1028 return 1;
1029 }
1030
1031 /*
1032 * ratecheck() returns true if it is okay to send. We return
1033 * true if it is not okay to send.
1034 */
1035 if (!ratecheck(&icmperrratelim_last, &icmperrratelim)) {
1036 /* The packet is subject to rate limit */
1037 return 1;
1038 }
1039
1040 /*okay to send*/
1041 return 0;
1042 }
1043