altq_subr.c revision 1.33.46.1.2.1 1 /* $NetBSD: altq_subr.c,v 1.33.46.1.2.1 2023/11/15 02:19:00 thorpej Exp $ */
2 /* $KAME: altq_subr.c,v 1.24 2005/04/13 03:44:25 suz Exp $ */
3
4 /*
5 * Copyright (C) 1997-2003
6 * Sony Computer Science Laboratories Inc. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: altq_subr.c,v 1.33.46.1.2.1 2023/11/15 02:19:00 thorpej Exp $");
32
33 #ifdef _KERNEL_OPT
34 #include "opt_altq.h"
35 #include "opt_inet.h"
36 #include "pf.h"
37 #endif
38
39 #define __IFQ_PRIVATE
40
41 #include <sys/param.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/systm.h>
45 #include <sys/proc.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/kernel.h>
49 #include <sys/kmem.h>
50 #include <sys/errno.h>
51 #include <sys/syslog.h>
52 #include <sys/sysctl.h>
53 #include <sys/queue.h>
54
55 #include <net/if.h>
56 #include <net/if_dl.h>
57 #include <net/if_types.h>
58
59 #include <netinet/in.h>
60 #include <netinet/in_systm.h>
61 #include <netinet/ip.h>
62 #ifdef INET6
63 #include <netinet/ip6.h>
64 #endif
65 #include <netinet/tcp.h>
66 #include <netinet/udp.h>
67
68 #if NPF > 0
69 #include <net/pfvar.h>
70 #endif
71 #include <altq/altq.h>
72 #ifdef ALTQ3_COMPAT
73 #include <altq/altq_conf.h>
74 #endif
75
76 /*
77 * internal function prototypes
78 */
79 static void tbr_timeout(void *);
80 int (*altq_input)(struct mbuf *, int) = NULL;
81 static int tbr_timer = 0; /* token bucket regulator timer */
82 static struct callout tbr_callout;
83
84 #ifdef ALTQ3_CLFIER_COMPAT
85 static int extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *);
86 #ifdef INET6
87 static int extract_ports6(struct mbuf *, struct ip6_hdr *,
88 struct flowinfo_in6 *);
89 #endif
90 static int apply_filter4(u_int32_t, struct flow_filter *,
91 struct flowinfo_in *);
92 static int apply_ppfilter4(u_int32_t, struct flow_filter *,
93 struct flowinfo_in *);
94 #ifdef INET6
95 static int apply_filter6(u_int32_t, struct flow_filter6 *,
96 struct flowinfo_in6 *);
97 #endif
98 static int apply_tosfilter4(u_int32_t, struct flow_filter *,
99 struct flowinfo_in *);
100 static u_long get_filt_handle(struct acc_classifier *, int);
101 static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long);
102 static u_int32_t filt2fibmask(struct flow_filter *);
103
104 static void ip4f_cache(struct ip *, struct flowinfo_in *);
105 static int ip4f_lookup(struct ip *, struct flowinfo_in *);
106 static int ip4f_init(void);
107 static struct ip4_frag *ip4f_alloc(void);
108 static void ip4f_free(struct ip4_frag *);
109 #endif /* ALTQ3_CLFIER_COMPAT */
110
111 /*
112 * alternate queueing support routines
113 */
114
115 /*
116 * Allocate an ifaltq and associate it with the specified ifqueue and
117 * ifnet.
118 */
119 void
120 altq_alloc(struct ifqueue *ifq, struct ifnet *ifp)
121 {
122 if (ifq->ifq_altq == NULL) {
123 ifq->ifq_altq = kmem_zalloc(sizeof(*ifq->ifq_altq), KM_SLEEP);
124 ifq->ifq_altq->altq_ifq = ifq;
125
126 /*
127 * This allows the ALTQ_*() mcaros to work with either
128 * an ifqueue or an ifaltq argument. Yes, it's a hack,
129 * but it results in less code churn.
130 */
131 ifq->ifq_altq->ifq_altq = ifq->ifq_altq;
132 }
133 if (ifp != NULL) {
134 ifq->ifq_altq->altq_ifp = ifp;
135 }
136 }
137
138 /*
139 * Free the ifaltq structure associated with an ifqueue.
140 */
141 void
142 altq_free(struct ifqueue *ifq)
143 {
144 if (ifq->ifq_altq != NULL) {
145 /*
146 * No need to pre-flight these calls; both can handle
147 * the not-enabled / not-attached scenarios.
148 */
149 altq_disable(ifq->ifq_altq);
150 altq_detach(ifq->ifq_altq);
151
152 ifq->ifq_altq->altq_ifp = NULL;
153 kmem_free(ifq->ifq_altq, sizeof(*ifq->ifq_altq));
154 ifq->ifq_altq = NULL;
155 }
156 }
157
158 /*
159 * Mark's a device's send queue as being ready for (as in
160 * "knowledgeable about") ALTQ processing.
161 */
162 void
163 altq_set_ready(struct ifqueue *ifq)
164 {
165 altq_alloc(ifq, NULL);
166 ifq->ifq_altq->altq_flags |= ALTQF_READY;
167 }
168
169 /* look up the queue state by the interface name and the queueing type. */
170 void *
171 altq_lookup(char *name, int type)
172 {
173 struct ifnet *ifp;
174
175 if ((ifp = ifunit(name)) != NULL) {
176 struct ifaltq *altq = ifp->if_snd.ifq_altq;
177 if (type != ALTQT_NONE && altq->altq_type == type)
178 return (altq->altq_disc);
179 }
180
181 return NULL;
182 }
183
184 int
185 altq_attach(struct ifaltq *altq, int type, void *discipline,
186 int (*enqueue)(struct ifaltq *, struct mbuf *),
187 struct mbuf *(*dequeue)(struct ifaltq *, int),
188 int (*request)(struct ifaltq *, int, void *),
189 void *clfier, void *(*classify)(void *, struct mbuf *, int))
190 {
191 struct ifqueue *ifq = altq->altq_ifq;
192 int error = 0;
193
194 mutex_enter(ifq->ifq_lock);
195
196 if (!ALTQ_IS_READY(ifq)) {
197 error = ENXIO;
198 goto out;
199 }
200
201 #ifdef ALTQ3_COMPAT
202 /*
203 * pfaltq can override the existing discipline, but altq3 cannot.
204 * check these if clfier is not NULL (which implies altq3).
205 */
206 if (clfier != NULL) {
207 if (ALTQ_IS_ENABLED(ifq)) {
208 error = EBUSY;
209 goto out;
210 }
211 if (ALTQ_IS_ATTACHED(ifq)) {
212 error = EEXIST;
213 goto out;
214 }
215 }
216 #endif
217 altq->altq_type = type;
218 altq->altq_disc = discipline;
219 altq->altq_enqueue = enqueue;
220 altq->altq_dequeue = dequeue;
221 altq->altq_request = request;
222 altq->altq_clfier = clfier;
223 altq->altq_classify = classify;
224 altq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED);
225 #ifdef ALTQ3_COMPAT
226 #ifdef ALTQ_KLD
227 altq_module_incref(type);
228 #endif
229 #endif
230 out:
231 mutex_exit(ifq->ifq_lock);
232 return error;
233 }
234
235 int
236 altq_detach(struct ifaltq *altq)
237 {
238 struct ifqueue *ifq = altq->altq_ifq;
239 int error = 0;
240
241 mutex_enter(ifq->ifq_lock);
242
243 if (!ALTQ_IS_READY(ifq)) {
244 error = ENXIO;
245 goto out;
246 }
247 if (ALTQ_IS_ENABLED(ifq)) {
248 error = EBUSY;
249 goto out;
250 }
251 if (!ALTQ_IS_ATTACHED(ifq)) {
252 goto out;
253 }
254
255 #ifdef ALTQ3_COMPAT
256 #ifdef ALTQ_KLD
257 altq_module_declref(ifq->altq_type);
258 #endif
259 #endif
260
261 altq->altq_type = ALTQT_NONE;
262 altq->altq_disc = NULL;
263 altq->altq_enqueue = NULL;
264 altq->altq_dequeue = NULL;
265 altq->altq_request = NULL;
266 altq->altq_clfier = NULL;
267 altq->altq_classify = NULL;
268 altq->altq_flags &= ALTQF_CANTCHANGE;
269 out:
270 mutex_exit(ifq->ifq_lock);
271 return error;
272 }
273
274 int
275 altq_enable(struct ifaltq *altq)
276 {
277 struct ifqueue *ifq = altq->altq_ifq;
278 struct mbuf *m = NULL;
279 int error = 0;
280
281 mutex_enter(ifq->ifq_lock);
282
283 if (!ALTQ_IS_READY(ifq)) {
284 error = ENXIO;
285 goto out;
286 }
287 if (ALTQ_IS_ENABLED(ifq)) {
288 goto out;
289 }
290
291 m = ifq_purge_locked(ifq);
292 ASSERT(ALTQ_GET_LEN(altq) == 0);
293 altq->altq_flags |= ALTQF_ENABLED;
294 if (altq->altq_clfier != NULL)
295 altq->altq_flags |= ALTQF_CLASSIFY;
296 out:
297 mutex_exit(ifq->ifq_lock);
298 if (m != NULL) {
299 ifq_purge_free(m);
300 }
301 return error;
302 }
303
304 int
305 altq_disable(struct ifaltq *altq)
306 {
307 struct ifqueue *ifq = altq->altq_ifq;
308 struct mbuf *m = NULL;
309
310 mutex_enter(ifq->ifq_lock);
311
312 if (!ALTQ_IS_ENABLED(ifq)) {
313 goto out;
314 }
315
316 m = ifq_purge_locked(ifq);
317 ASSERT(ALTQ_GET_LEN(altq) == 0);
318 altq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
319 out:
320 mutex_exit(ifq->ifq_lock);
321 if (m != NULL) {
322 ifq_purge_free(m);
323 }
324 return 0;
325 }
326
327 #ifdef ALTQ_DEBUG
328 void
329 altq_assert(const char *file, int line, const char *failedexpr)
330 {
331 (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n",
332 failedexpr, file, line);
333 panic("altq assertion");
334 /* NOTREACHED */
335 }
336 #endif
337
338 /*
339 * internal representation of token bucket parameters
340 * rate: byte_per_unittime << 32
341 * (((bits_per_sec) / 8) << 32) / machclk_freq
342 * depth: byte << 32
343 *
344 */
345 #define TBR_SHIFT 32
346 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT)
347 #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT)
348
349 struct mbuf *
350 tbr_dequeue(struct ifaltq *ifq, int op)
351 {
352 struct tb_regulator *tbr;
353 struct mbuf *m;
354 int64_t interval;
355 u_int64_t now;
356
357 tbr = ifq->altq_tbr;
358 if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
359 /* if this is a remove after poll, bypass tbr check */
360 } else {
361 /* update token only when it is negative */
362 if (tbr->tbr_token <= 0) {
363 now = read_machclk();
364 interval = now - tbr->tbr_last;
365 if (interval >= tbr->tbr_filluptime)
366 tbr->tbr_token = tbr->tbr_depth;
367 else {
368 tbr->tbr_token += interval * tbr->tbr_rate;
369 if (tbr->tbr_token > tbr->tbr_depth)
370 tbr->tbr_token = tbr->tbr_depth;
371 }
372 tbr->tbr_last = now;
373 }
374 /* if token is still negative, don't allow dequeue */
375 if (tbr->tbr_token <= 0)
376 return (NULL);
377 }
378
379 if (ALTQ_IS_ENABLED(ifq))
380 m = (*ifq->altq_dequeue)(ifq, op);
381 else {
382 if (op == ALTDQ_POLL)
383 IF_POLL(ifq->altq_ifq, m);
384 else
385 IF_DEQUEUE(ifq->altq_ifq, m);
386 }
387
388 if (m != NULL && op == ALTDQ_REMOVE)
389 tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
390 tbr->tbr_lastop = op;
391 return (m);
392 }
393
394 /*
395 * set a token bucket regulator.
396 * if the specified rate is zero, the token bucket regulator is deleted.
397 */
398 int
399 tbr_set(struct ifaltq *ifq, struct tb_profile *profile)
400 {
401 struct tb_regulator *tbr, *otbr;
402
403 if (machclk_freq == 0)
404 init_machclk();
405 if (machclk_freq == 0) {
406 printf("tbr_set: no CPU clock available!\n");
407 return (ENXIO);
408 }
409
410 if (profile->rate == 0) {
411 /* delete this tbr */
412 if ((tbr = ifq->altq_tbr) == NULL)
413 return (ENOENT);
414 ifq->altq_tbr = NULL;
415 free(tbr, M_DEVBUF);
416 return (0);
417 }
418
419 tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_WAITOK|M_ZERO);
420 if (tbr == NULL)
421 return (ENOMEM);
422
423 tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
424 tbr->tbr_depth = TBR_SCALE(profile->depth);
425 if (tbr->tbr_rate > 0)
426 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
427 else
428 tbr->tbr_filluptime = 0xffffffffffffffffLL;
429 tbr->tbr_token = tbr->tbr_depth;
430 tbr->tbr_last = read_machclk();
431 tbr->tbr_lastop = ALTDQ_REMOVE;
432
433 otbr = ifq->altq_tbr;
434 ifq->altq_tbr = tbr; /* set the new tbr */
435
436 if (otbr != NULL) {
437 free(otbr, M_DEVBUF);
438 } else {
439 if (tbr_timer == 0) {
440 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
441 tbr_timer = 1;
442 }
443 }
444 return (0);
445 }
446
447 /*
448 * tbr_timeout goes through the interface list, and kicks the drivers
449 * if necessary.
450 */
451 static void
452 tbr_timeout(void *arg)
453 {
454 struct ifnet *ifp;
455 int active, s;
456
457 active = 0;
458 s = pserialize_read_enter();
459 IFNET_READER_FOREACH(ifp) {
460 struct psref psref;
461 if (!TBR_IS_ENABLED(&ifp->if_snd))
462 continue;
463 if_acquire(ifp, &psref);
464 pserialize_read_exit(s);
465
466 active++;
467 if (!IFQ_IS_EMPTY(&ifp->if_snd) && ifp->if_start != NULL) {
468 int _s = splnet();
469 if_start_lock(ifp);
470 splx(_s);
471 }
472
473 s = pserialize_read_enter();
474 if_release(ifp, &psref);
475 }
476 pserialize_read_exit(s);
477
478 if (active > 0)
479 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
480 else
481 tbr_timer = 0; /* don't need tbr_timer anymore */
482 }
483
484 /*
485 * get token bucket regulator profile
486 */
487 int
488 tbr_get(struct ifaltq *ifq, struct tb_profile *profile)
489 {
490 struct tb_regulator *tbr;
491
492 if ((tbr = ifq->altq_tbr) == NULL) {
493 profile->rate = 0;
494 profile->depth = 0;
495 } else {
496 profile->rate =
497 (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq);
498 profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth);
499 }
500 return (0);
501 }
502
503 #if NPF > 0
504 /*
505 * attach a discipline to the interface. if one already exists, it is
506 * overridden.
507 */
508 int
509 altq_pfattach(struct pf_altq *a)
510 {
511 int error = 0;
512
513 switch (a->scheduler) {
514 case ALTQT_NONE:
515 break;
516 #ifdef ALTQ_CBQ
517 case ALTQT_CBQ:
518 error = cbq_pfattach(a);
519 break;
520 #endif
521 #ifdef ALTQ_PRIQ
522 case ALTQT_PRIQ:
523 error = priq_pfattach(a);
524 break;
525 #endif
526 #ifdef ALTQ_HFSC
527 case ALTQT_HFSC:
528 error = hfsc_pfattach(a);
529 break;
530 #endif
531 default:
532 error = ENXIO;
533 }
534
535 return (error);
536 }
537
538 /*
539 * detach a discipline from the interface.
540 * it is possible that the discipline was already overridden by another
541 * discipline.
542 */
543 int
544 altq_pfdetach(struct pf_altq *a)
545 {
546 struct ifnet *ifp;
547 int s, error = 0;
548
549 if ((ifp = ifunit(a->ifname)) == NULL)
550 return (EINVAL);
551
552 /* if this discipline is no longer referenced, just return */
553 if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc)
554 return (0);
555
556 s = splnet();
557 if (ALTQ_IS_ENABLED(&ifp->if_snd))
558 error = altq_disable(&ifp->if_snd);
559 if (error == 0)
560 error = altq_detach(&ifp->if_snd);
561 splx(s);
562
563 return (error);
564 }
565
566 /*
567 * add a discipline or a queue
568 */
569 int
570 altq_add(struct pf_altq *a)
571 {
572 int error = 0;
573
574 if (a->qname[0] != 0)
575 return (altq_add_queue(a));
576
577 if (machclk_freq == 0)
578 init_machclk();
579 if (machclk_freq == 0)
580 panic("altq_add: no CPU clock");
581
582 switch (a->scheduler) {
583 #ifdef ALTQ_CBQ
584 case ALTQT_CBQ:
585 error = cbq_add_altq(a);
586 break;
587 #endif
588 #ifdef ALTQ_PRIQ
589 case ALTQT_PRIQ:
590 error = priq_add_altq(a);
591 break;
592 #endif
593 #ifdef ALTQ_HFSC
594 case ALTQT_HFSC:
595 error = hfsc_add_altq(a);
596 break;
597 #endif
598 default:
599 error = ENXIO;
600 }
601
602 return (error);
603 }
604
605 /*
606 * remove a discipline or a queue
607 */
608 int
609 altq_remove(struct pf_altq *a)
610 {
611 int error = 0;
612
613 if (a->qname[0] != 0)
614 return (altq_remove_queue(a));
615
616 switch (a->scheduler) {
617 #ifdef ALTQ_CBQ
618 case ALTQT_CBQ:
619 error = cbq_remove_altq(a);
620 break;
621 #endif
622 #ifdef ALTQ_PRIQ
623 case ALTQT_PRIQ:
624 error = priq_remove_altq(a);
625 break;
626 #endif
627 #ifdef ALTQ_HFSC
628 case ALTQT_HFSC:
629 error = hfsc_remove_altq(a);
630 break;
631 #endif
632 default:
633 error = ENXIO;
634 }
635
636 return (error);
637 }
638
639 /*
640 * add a queue to the discipline
641 */
642 int
643 altq_add_queue(struct pf_altq *a)
644 {
645 int error = 0;
646
647 switch (a->scheduler) {
648 #ifdef ALTQ_CBQ
649 case ALTQT_CBQ:
650 error = cbq_add_queue(a);
651 break;
652 #endif
653 #ifdef ALTQ_PRIQ
654 case ALTQT_PRIQ:
655 error = priq_add_queue(a);
656 break;
657 #endif
658 #ifdef ALTQ_HFSC
659 case ALTQT_HFSC:
660 error = hfsc_add_queue(a);
661 break;
662 #endif
663 default:
664 error = ENXIO;
665 }
666
667 return (error);
668 }
669
670 /*
671 * remove a queue from the discipline
672 */
673 int
674 altq_remove_queue(struct pf_altq *a)
675 {
676 int error = 0;
677
678 switch (a->scheduler) {
679 #ifdef ALTQ_CBQ
680 case ALTQT_CBQ:
681 error = cbq_remove_queue(a);
682 break;
683 #endif
684 #ifdef ALTQ_PRIQ
685 case ALTQT_PRIQ:
686 error = priq_remove_queue(a);
687 break;
688 #endif
689 #ifdef ALTQ_HFSC
690 case ALTQT_HFSC:
691 error = hfsc_remove_queue(a);
692 break;
693 #endif
694 default:
695 error = ENXIO;
696 }
697
698 return (error);
699 }
700
701 /*
702 * get queue statistics
703 */
704 int
705 altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
706 {
707 int error = 0;
708
709 switch (a->scheduler) {
710 #ifdef ALTQ_CBQ
711 case ALTQT_CBQ:
712 error = cbq_getqstats(a, ubuf, nbytes);
713 break;
714 #endif
715 #ifdef ALTQ_PRIQ
716 case ALTQT_PRIQ:
717 error = priq_getqstats(a, ubuf, nbytes);
718 break;
719 #endif
720 #ifdef ALTQ_HFSC
721 case ALTQT_HFSC:
722 error = hfsc_getqstats(a, ubuf, nbytes);
723 break;
724 #endif
725 default:
726 error = ENXIO;
727 }
728
729 return (error);
730 }
731 #endif /* NPF > 0 */
732
733 /*
734 * read and write diffserv field in IPv4 or IPv6 header
735 */
736 u_int8_t
737 read_dsfield(struct mbuf *m, struct altq_pktattr *pktattr)
738 {
739 struct mbuf *m0;
740 u_int8_t ds_field = 0;
741
742 if (pktattr == NULL ||
743 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
744 return ((u_int8_t)0);
745
746 /* verify that pattr_hdr is within the mbuf data */
747 for (m0 = m; m0 != NULL; m0 = m0->m_next)
748 if (((char *)pktattr->pattr_hdr >= m0->m_data) &&
749 ((char *)pktattr->pattr_hdr < m0->m_data + m0->m_len))
750 break;
751 if (m0 == NULL) {
752 /* ick, pattr_hdr is stale */
753 pktattr->pattr_af = AF_UNSPEC;
754 #ifdef ALTQ_DEBUG
755 printf("read_dsfield: can't locate header!\n");
756 #endif
757 return ((u_int8_t)0);
758 }
759
760 if (pktattr->pattr_af == AF_INET) {
761 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
762
763 if (ip->ip_v != 4)
764 return ((u_int8_t)0); /* version mismatch! */
765 ds_field = ip->ip_tos;
766 }
767 #ifdef INET6
768 else if (pktattr->pattr_af == AF_INET6) {
769 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
770 u_int32_t flowlabel;
771
772 flowlabel = ntohl(ip6->ip6_flow);
773 if ((flowlabel >> 28) != 6)
774 return ((u_int8_t)0); /* version mismatch! */
775 ds_field = (flowlabel >> 20) & 0xff;
776 }
777 #endif
778 return (ds_field);
779 }
780
781 void
782 write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield)
783 {
784 struct mbuf *m0;
785
786 if (pktattr == NULL ||
787 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
788 return;
789
790 /* verify that pattr_hdr is within the mbuf data */
791 for (m0 = m; m0 != NULL; m0 = m0->m_next)
792 if (((char *)pktattr->pattr_hdr >= m0->m_data) &&
793 ((char *)pktattr->pattr_hdr < m0->m_data + m0->m_len))
794 break;
795 if (m0 == NULL) {
796 /* ick, pattr_hdr is stale */
797 pktattr->pattr_af = AF_UNSPEC;
798 #ifdef ALTQ_DEBUG
799 printf("write_dsfield: can't locate header!\n");
800 #endif
801 return;
802 }
803
804 if (pktattr->pattr_af == AF_INET) {
805 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
806 u_int8_t old;
807 int32_t sum;
808
809 if (ip->ip_v != 4)
810 return; /* version mismatch! */
811 old = ip->ip_tos;
812 dsfield |= old & 3; /* leave CU bits */
813 if (old == dsfield)
814 return;
815 ip->ip_tos = dsfield;
816 /*
817 * update checksum (from RFC1624)
818 * HC' = ~(~HC + ~m + m')
819 */
820 sum = ~ntohs(ip->ip_sum) & 0xffff;
821 sum += 0xff00 + (~old & 0xff) + dsfield;
822 sum = (sum >> 16) + (sum & 0xffff);
823 sum += (sum >> 16); /* add carry */
824
825 ip->ip_sum = htons(~sum & 0xffff);
826 }
827 #ifdef INET6
828 else if (pktattr->pattr_af == AF_INET6) {
829 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
830 u_int32_t flowlabel;
831
832 flowlabel = ntohl(ip6->ip6_flow);
833 if ((flowlabel >> 28) != 6)
834 return; /* version mismatch! */
835 flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
836 ip6->ip6_flow = htonl(flowlabel);
837 }
838 #endif
839 return;
840 }
841
842 #define BINTIME_SHIFT 2
843
844 u_int32_t machclk_freq = 0;
845 u_int32_t machclk_per_tick = 0;
846
847 void
848 init_machclk(void)
849 {
850
851 callout_init(&tbr_callout, 0);
852
853 /*
854 * Always emulate 1GiHz counter using bintime(9)
855 * since it has enough resolution via timecounter(9).
856 * Using machine dependent cpu_counter() is not MP safe
857 * and it won't work even on UP with Speedstep etc.
858 */
859 machclk_freq = 1024 * 1024 * 1024; /* 2^30 to emulate ~1GHz */
860 machclk_per_tick = machclk_freq / hz;
861 #ifdef ALTQ_DEBUG
862 printf("altq: emulate %uHz CPU clock\n", machclk_freq);
863 #endif
864 }
865
866 u_int64_t
867 read_machclk(void)
868 {
869 struct bintime bt;
870 u_int64_t val;
871
872 binuptime(&bt);
873 val = (((u_int64_t)bt.sec << 32) + (bt.frac >> 32)) >> BINTIME_SHIFT;
874 return (val);
875 }
876
877 #ifdef ALTQ3_CLFIER_COMPAT
878
879 #ifndef IPPROTO_ESP
880 #define IPPROTO_ESP 50 /* encapsulating security payload */
881 #endif
882 #ifndef IPPROTO_AH
883 #define IPPROTO_AH 51 /* authentication header */
884 #endif
885
886 /*
887 * extract flow information from a given packet.
888 * filt_mask shows flowinfo fields required.
889 * we assume the ip header is in one mbuf, and addresses and ports are
890 * in network byte order.
891 */
892 int
893 altq_extractflow(struct mbuf *m, int af, struct flowinfo *flow,
894 u_int32_t filt_bmask)
895 {
896
897 switch (af) {
898 case PF_INET: {
899 struct flowinfo_in *fin;
900 struct ip *ip;
901
902 ip = mtod(m, struct ip *);
903
904 if (ip->ip_v != 4)
905 break;
906
907 fin = (struct flowinfo_in *)flow;
908 fin->fi_len = sizeof(struct flowinfo_in);
909 fin->fi_family = AF_INET;
910
911 fin->fi_proto = ip->ip_p;
912 fin->fi_tos = ip->ip_tos;
913
914 fin->fi_src.s_addr = ip->ip_src.s_addr;
915 fin->fi_dst.s_addr = ip->ip_dst.s_addr;
916
917 if (filt_bmask & FIMB4_PORTS)
918 /* if port info is required, extract port numbers */
919 extract_ports4(m, ip, fin);
920 else {
921 fin->fi_sport = 0;
922 fin->fi_dport = 0;
923 fin->fi_gpi = 0;
924 }
925 return (1);
926 }
927
928 #ifdef INET6
929 case PF_INET6: {
930 struct flowinfo_in6 *fin6;
931 struct ip6_hdr *ip6;
932
933 ip6 = mtod(m, struct ip6_hdr *);
934 /* should we check the ip version? */
935
936 fin6 = (struct flowinfo_in6 *)flow;
937 fin6->fi6_len = sizeof(struct flowinfo_in6);
938 fin6->fi6_family = AF_INET6;
939
940 fin6->fi6_proto = ip6->ip6_nxt;
941 fin6->fi6_tclass = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
942
943 fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff);
944 fin6->fi6_src = ip6->ip6_src;
945 fin6->fi6_dst = ip6->ip6_dst;
946
947 if ((filt_bmask & FIMB6_PORTS) ||
948 ((filt_bmask & FIMB6_PROTO)
949 && ip6->ip6_nxt > IPPROTO_IPV6))
950 /*
951 * if port info is required, or proto is required
952 * but there are option headers, extract port
953 * and protocol numbers.
954 */
955 extract_ports6(m, ip6, fin6);
956 else {
957 fin6->fi6_sport = 0;
958 fin6->fi6_dport = 0;
959 fin6->fi6_gpi = 0;
960 }
961 return (1);
962 }
963 #endif /* INET6 */
964
965 default:
966 break;
967 }
968
969 /* failed */
970 flow->fi_len = sizeof(struct flowinfo);
971 flow->fi_family = AF_UNSPEC;
972 return (0);
973 }
974
975 /*
976 * helper routine to extract port numbers
977 */
978 /* structure for ipsec and ipv6 option header template */
979 struct _opt6 {
980 u_int8_t opt6_nxt; /* next header */
981 u_int8_t opt6_hlen; /* header extension length */
982 u_int16_t _pad;
983 u_int32_t ah_spi; /* security parameter index
984 for authentication header */
985 };
986
987 /*
988 * extract port numbers from a ipv4 packet.
989 */
990 static int
991 extract_ports4(struct mbuf *m, struct ip *ip, struct flowinfo_in *fin)
992 {
993 struct mbuf *m0;
994 u_short ip_off;
995 u_int8_t proto;
996 int off;
997
998 fin->fi_sport = 0;
999 fin->fi_dport = 0;
1000 fin->fi_gpi = 0;
1001
1002 ip_off = ntohs(ip->ip_off);
1003 /* if it is a fragment, try cached fragment info */
1004 if (ip_off & IP_OFFMASK) {
1005 ip4f_lookup(ip, fin);
1006 return (1);
1007 }
1008
1009 /* locate the mbuf containing the protocol header */
1010 for (m0 = m; m0 != NULL; m0 = m0->m_next)
1011 if (((char *)ip >= m0->m_data) &&
1012 ((char *)ip < m0->m_data + m0->m_len))
1013 break;
1014 if (m0 == NULL) {
1015 #ifdef ALTQ_DEBUG
1016 printf("extract_ports4: can't locate header! ip=%p\n", ip);
1017 #endif
1018 return (0);
1019 }
1020 off = ((char *)ip - m0->m_data) + (ip->ip_hl << 2);
1021 proto = ip->ip_p;
1022
1023 #ifdef ALTQ_IPSEC
1024 again:
1025 #endif
1026 while (off >= m0->m_len) {
1027 off -= m0->m_len;
1028 m0 = m0->m_next;
1029 if (m0 == NULL)
1030 return (0); /* bogus ip_hl! */
1031 }
1032 if (m0->m_len < off + 4)
1033 return (0);
1034
1035 switch (proto) {
1036 case IPPROTO_TCP:
1037 case IPPROTO_UDP: {
1038 struct udphdr *udp;
1039
1040 udp = (struct udphdr *)(mtod(m0, char *) + off);
1041 fin->fi_sport = udp->uh_sport;
1042 fin->fi_dport = udp->uh_dport;
1043 fin->fi_proto = proto;
1044 }
1045 break;
1046
1047 #ifdef ALTQ_IPSEC
1048 case IPPROTO_ESP:
1049 if (fin->fi_gpi == 0){
1050 u_int32_t *gpi;
1051
1052 gpi = (u_int32_t *)(mtod(m0, char *) + off);
1053 fin->fi_gpi = *gpi;
1054 }
1055 fin->fi_proto = proto;
1056 break;
1057
1058 case IPPROTO_AH: {
1059 /* get next header and header length */
1060 struct _opt6 *opt6;
1061
1062 opt6 = (struct _opt6 *)(mtod(m0, char *) + off);
1063 proto = opt6->opt6_nxt;
1064 off += 8 + (opt6->opt6_hlen * 4);
1065 if (fin->fi_gpi == 0 && m0->m_len >= off + 8)
1066 fin->fi_gpi = opt6->ah_spi;
1067 }
1068 /* goto the next header */
1069 goto again;
1070 #endif /* ALTQ_IPSEC */
1071
1072 default:
1073 fin->fi_proto = proto;
1074 return (0);
1075 }
1076
1077 /* if this is a first fragment, cache it. */
1078 if (ip_off & IP_MF)
1079 ip4f_cache(ip, fin);
1080
1081 return (1);
1082 }
1083
1084 #ifdef INET6
1085 static int
1086 extract_ports6(struct mbuf *m, struct ip6_hdr *ip6, struct flowinfo_in6 *fin6)
1087 {
1088 struct mbuf *m0;
1089 int off;
1090 u_int8_t proto;
1091
1092 fin6->fi6_gpi = 0;
1093 fin6->fi6_sport = 0;
1094 fin6->fi6_dport = 0;
1095
1096 /* locate the mbuf containing the protocol header */
1097 for (m0 = m; m0 != NULL; m0 = m0->m_next)
1098 if (((char *)ip6 >= m0->m_data) &&
1099 ((char *)ip6 < m0->m_data + m0->m_len))
1100 break;
1101 if (m0 == NULL) {
1102 #ifdef ALTQ_DEBUG
1103 printf("extract_ports6: can't locate header! ip6=%p\n", ip6);
1104 #endif
1105 return (0);
1106 }
1107 off = ((char *)ip6 - m0->m_data) + sizeof(struct ip6_hdr);
1108
1109 proto = ip6->ip6_nxt;
1110 do {
1111 while (off >= m0->m_len) {
1112 off -= m0->m_len;
1113 m0 = m0->m_next;
1114 if (m0 == NULL)
1115 return (0);
1116 }
1117 if (m0->m_len < off + 4)
1118 return (0);
1119
1120 switch (proto) {
1121 case IPPROTO_TCP:
1122 case IPPROTO_UDP: {
1123 struct udphdr *udp;
1124
1125 udp = (struct udphdr *)(mtod(m0, char *) + off);
1126 fin6->fi6_sport = udp->uh_sport;
1127 fin6->fi6_dport = udp->uh_dport;
1128 fin6->fi6_proto = proto;
1129 }
1130 return (1);
1131
1132 case IPPROTO_ESP:
1133 if (fin6->fi6_gpi == 0) {
1134 u_int32_t *gpi;
1135
1136 gpi = (u_int32_t *)(mtod(m0, char *) + off);
1137 fin6->fi6_gpi = *gpi;
1138 }
1139 fin6->fi6_proto = proto;
1140 return (1);
1141
1142 case IPPROTO_AH: {
1143 /* get next header and header length */
1144 struct _opt6 *opt6;
1145
1146 opt6 = (struct _opt6 *)(mtod(m0, char *) + off);
1147 if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8)
1148 fin6->fi6_gpi = opt6->ah_spi;
1149 proto = opt6->opt6_nxt;
1150 off += 8 + (opt6->opt6_hlen * 4);
1151 /* goto the next header */
1152 break;
1153 }
1154
1155 case IPPROTO_HOPOPTS:
1156 case IPPROTO_ROUTING:
1157 case IPPROTO_DSTOPTS: {
1158 /* get next header and header length */
1159 struct _opt6 *opt6;
1160
1161 opt6 = (struct _opt6 *)(mtod(m0, char *) + off);
1162 proto = opt6->opt6_nxt;
1163 off += (opt6->opt6_hlen + 1) * 8;
1164 /* goto the next header */
1165 break;
1166 }
1167
1168 case IPPROTO_FRAGMENT:
1169 /* ipv6 fragmentations are not supported yet */
1170 default:
1171 fin6->fi6_proto = proto;
1172 return (0);
1173 }
1174 } while (1);
1175 /*NOTREACHED*/
1176 }
1177 #endif /* INET6 */
1178
1179 /*
1180 * altq common classifier
1181 */
1182 int
1183 acc_add_filter(struct acc_classifier *classifier, struct flow_filter *filter,
1184 void *class, u_long *phandle)
1185 {
1186 struct acc_filter *afp, *prev, *tmp;
1187 int i, s;
1188
1189 #ifdef INET6
1190 if (filter->ff_flow.fi_family != AF_INET &&
1191 filter->ff_flow.fi_family != AF_INET6)
1192 return (EINVAL);
1193 #else
1194 if (filter->ff_flow.fi_family != AF_INET)
1195 return (EINVAL);
1196 #endif
1197
1198 afp = malloc(sizeof(struct acc_filter), M_DEVBUF, M_WAITOK|M_ZERO);
1199 if (afp == NULL)
1200 return (ENOMEM);
1201
1202 afp->f_filter = *filter;
1203 afp->f_class = class;
1204
1205 i = ACC_WILDCARD_INDEX;
1206 if (filter->ff_flow.fi_family == AF_INET) {
1207 struct flow_filter *filter4 = &afp->f_filter;
1208
1209 /*
1210 * if address is 0, it's a wildcard. if address mask
1211 * isn't set, use full mask.
1212 */
1213 if (filter4->ff_flow.fi_dst.s_addr == 0)
1214 filter4->ff_mask.mask_dst.s_addr = 0;
1215 else if (filter4->ff_mask.mask_dst.s_addr == 0)
1216 filter4->ff_mask.mask_dst.s_addr = 0xffffffff;
1217 if (filter4->ff_flow.fi_src.s_addr == 0)
1218 filter4->ff_mask.mask_src.s_addr = 0;
1219 else if (filter4->ff_mask.mask_src.s_addr == 0)
1220 filter4->ff_mask.mask_src.s_addr = 0xffffffff;
1221
1222 /* clear extra bits in addresses */
1223 filter4->ff_flow.fi_dst.s_addr &=
1224 filter4->ff_mask.mask_dst.s_addr;
1225 filter4->ff_flow.fi_src.s_addr &=
1226 filter4->ff_mask.mask_src.s_addr;
1227
1228 /*
1229 * if dst address is a wildcard, use hash-entry
1230 * ACC_WILDCARD_INDEX.
1231 */
1232 if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff)
1233 i = ACC_WILDCARD_INDEX;
1234 else
1235 i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr);
1236 }
1237 #ifdef INET6
1238 else if (filter->ff_flow.fi_family == AF_INET6) {
1239 struct flow_filter6 *filter6 =
1240 (struct flow_filter6 *)&afp->f_filter;
1241 #ifndef IN6MASK0 /* taken from kame ipv6 */
1242 #define IN6MASK0 {{{ 0, 0, 0, 0 }}}
1243 #define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}}
1244 const struct in6_addr in6mask0 = IN6MASK0;
1245 const struct in6_addr in6mask128 = IN6MASK128;
1246 #endif
1247
1248 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst))
1249 filter6->ff_mask6.mask6_dst = in6mask0;
1250 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst))
1251 filter6->ff_mask6.mask6_dst = in6mask128;
1252 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src))
1253 filter6->ff_mask6.mask6_src = in6mask0;
1254 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src))
1255 filter6->ff_mask6.mask6_src = in6mask128;
1256
1257 /* clear extra bits in addresses */
1258 for (i = 0; i < 16; i++)
1259 filter6->ff_flow6.fi6_dst.s6_addr[i] &=
1260 filter6->ff_mask6.mask6_dst.s6_addr[i];
1261 for (i = 0; i < 16; i++)
1262 filter6->ff_flow6.fi6_src.s6_addr[i] &=
1263 filter6->ff_mask6.mask6_src.s6_addr[i];
1264
1265 if (filter6->ff_flow6.fi6_flowlabel == 0)
1266 i = ACC_WILDCARD_INDEX;
1267 else
1268 i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel);
1269 }
1270 #endif /* INET6 */
1271
1272 afp->f_handle = get_filt_handle(classifier, i);
1273
1274 /* update filter bitmask */
1275 afp->f_fbmask = filt2fibmask(filter);
1276 classifier->acc_fbmask |= afp->f_fbmask;
1277
1278 /*
1279 * add this filter to the filter list.
1280 * filters are ordered from the highest rule number.
1281 */
1282 s = splnet();
1283 prev = NULL;
1284 LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) {
1285 if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno)
1286 prev = tmp;
1287 else
1288 break;
1289 }
1290 if (prev == NULL)
1291 LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain);
1292 else
1293 LIST_INSERT_AFTER(prev, afp, f_chain);
1294 splx(s);
1295
1296 *phandle = afp->f_handle;
1297 return (0);
1298 }
1299
1300 int
1301 acc_delete_filter(struct acc_classifier *classifier, u_long handle)
1302 {
1303 struct acc_filter *afp;
1304 int s;
1305
1306 if ((afp = filth_to_filtp(classifier, handle)) == NULL)
1307 return (EINVAL);
1308
1309 s = splnet();
1310 LIST_REMOVE(afp, f_chain);
1311 splx(s);
1312
1313 free(afp, M_DEVBUF);
1314
1315 /* todo: update filt_bmask */
1316
1317 return (0);
1318 }
1319
1320 /*
1321 * delete filters referencing to the specified class.
1322 * if the all flag is not 0, delete all the filters.
1323 */
1324 int
1325 acc_discard_filters(struct acc_classifier *classifier, void *class, int all)
1326 {
1327 struct acc_filter *afp;
1328 int i, s;
1329
1330 s = splnet();
1331 for (i = 0; i < ACC_FILTER_TABLESIZE; i++) {
1332 do {
1333 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1334 if (all || afp->f_class == class) {
1335 LIST_REMOVE(afp, f_chain);
1336 free(afp, M_DEVBUF);
1337 /* start again from the head */
1338 break;
1339 }
1340 } while (afp != NULL);
1341 }
1342 splx(s);
1343
1344 if (all)
1345 classifier->acc_fbmask = 0;
1346
1347 return (0);
1348 }
1349
1350 void *
1351 acc_classify(void *clfier, struct mbuf *m, int af)
1352 {
1353 struct acc_classifier *classifier;
1354 struct flowinfo flow;
1355 struct acc_filter *afp;
1356 int i;
1357
1358 classifier = (struct acc_classifier *)clfier;
1359 altq_extractflow(m, af, &flow, classifier->acc_fbmask);
1360
1361 if (flow.fi_family == AF_INET) {
1362 struct flowinfo_in *fp = (struct flowinfo_in *)&flow;
1363
1364 if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) {
1365 /* only tos is used */
1366 LIST_FOREACH(afp,
1367 &classifier->acc_filters[ACC_WILDCARD_INDEX],
1368 f_chain)
1369 if (apply_tosfilter4(afp->f_fbmask,
1370 &afp->f_filter, fp))
1371 /* filter matched */
1372 return (afp->f_class);
1373 } else if ((classifier->acc_fbmask &
1374 (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL))
1375 == 0) {
1376 /* only proto and ports are used */
1377 LIST_FOREACH(afp,
1378 &classifier->acc_filters[ACC_WILDCARD_INDEX],
1379 f_chain)
1380 if (apply_ppfilter4(afp->f_fbmask,
1381 &afp->f_filter, fp))
1382 /* filter matched */
1383 return (afp->f_class);
1384 } else {
1385 /* get the filter hash entry from its dest address */
1386 i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr);
1387 do {
1388 /*
1389 * go through this loop twice. first for dst
1390 * hash, second for wildcards.
1391 */
1392 LIST_FOREACH(afp, &classifier->acc_filters[i],
1393 f_chain)
1394 if (apply_filter4(afp->f_fbmask,
1395 &afp->f_filter, fp))
1396 /* filter matched */
1397 return (afp->f_class);
1398
1399 /*
1400 * check again for filters with a dst addr
1401 * wildcard.
1402 * (daddr == 0 || dmask != 0xffffffff).
1403 */
1404 if (i != ACC_WILDCARD_INDEX)
1405 i = ACC_WILDCARD_INDEX;
1406 else
1407 break;
1408 } while (1);
1409 }
1410 }
1411 #ifdef INET6
1412 else if (flow.fi_family == AF_INET6) {
1413 struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow;
1414
1415 /* get the filter hash entry from its flow ID */
1416 if (fp6->fi6_flowlabel != 0)
1417 i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel);
1418 else
1419 /* flowlable can be zero */
1420 i = ACC_WILDCARD_INDEX;
1421
1422 /* go through this loop twice. first for flow hash, second
1423 for wildcards. */
1424 do {
1425 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1426 if (apply_filter6(afp->f_fbmask,
1427 (struct flow_filter6 *)&afp->f_filter,
1428 fp6))
1429 /* filter matched */
1430 return (afp->f_class);
1431
1432 /*
1433 * check again for filters with a wildcard.
1434 */
1435 if (i != ACC_WILDCARD_INDEX)
1436 i = ACC_WILDCARD_INDEX;
1437 else
1438 break;
1439 } while (1);
1440 }
1441 #endif /* INET6 */
1442
1443 /* no filter matched */
1444 return (NULL);
1445 }
1446
1447 static int
1448 apply_filter4(u_int32_t fbmask, struct flow_filter *filt,
1449 struct flowinfo_in *pkt)
1450 {
1451 if (filt->ff_flow.fi_family != AF_INET)
1452 return (0);
1453 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
1454 return (0);
1455 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
1456 return (0);
1457 if ((fbmask & FIMB4_DADDR) &&
1458 filt->ff_flow.fi_dst.s_addr !=
1459 (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr))
1460 return (0);
1461 if ((fbmask & FIMB4_SADDR) &&
1462 filt->ff_flow.fi_src.s_addr !=
1463 (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr))
1464 return (0);
1465 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
1466 return (0);
1467 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
1468 (pkt->fi_tos & filt->ff_mask.mask_tos))
1469 return (0);
1470 if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi))
1471 return (0);
1472 /* match */
1473 return (1);
1474 }
1475
1476 /*
1477 * filter matching function optimized for a common case that checks
1478 * only protocol and port numbers
1479 */
1480 static int
1481 apply_ppfilter4(u_int32_t fbmask, struct flow_filter *filt,
1482 struct flowinfo_in *pkt)
1483 {
1484 if (filt->ff_flow.fi_family != AF_INET)
1485 return (0);
1486 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
1487 return (0);
1488 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
1489 return (0);
1490 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
1491 return (0);
1492 /* match */
1493 return (1);
1494 }
1495
1496 /*
1497 * filter matching function only for tos field.
1498 */
1499 static int
1500 apply_tosfilter4(u_int32_t fbmask, struct flow_filter *filt,
1501 struct flowinfo_in *pkt)
1502 {
1503 if (filt->ff_flow.fi_family != AF_INET)
1504 return (0);
1505 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
1506 (pkt->fi_tos & filt->ff_mask.mask_tos))
1507 return (0);
1508 /* match */
1509 return (1);
1510 }
1511
1512 #ifdef INET6
1513 static int
1514 apply_filter6(u_int32_t fbmask, struct flow_filter6 *filt,
1515 struct flowinfo_in6 *pkt)
1516 {
1517 int i;
1518
1519 if (filt->ff_flow6.fi6_family != AF_INET6)
1520 return (0);
1521 if ((fbmask & FIMB6_FLABEL) &&
1522 filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel)
1523 return (0);
1524 if ((fbmask & FIMB6_PROTO) &&
1525 filt->ff_flow6.fi6_proto != pkt->fi6_proto)
1526 return (0);
1527 if ((fbmask & FIMB6_SPORT) &&
1528 filt->ff_flow6.fi6_sport != pkt->fi6_sport)
1529 return (0);
1530 if ((fbmask & FIMB6_DPORT) &&
1531 filt->ff_flow6.fi6_dport != pkt->fi6_dport)
1532 return (0);
1533 if (fbmask & FIMB6_SADDR) {
1534 for (i = 0; i < 4; i++)
1535 if (filt->ff_flow6.fi6_src.s6_addr32[i] !=
1536 (pkt->fi6_src.s6_addr32[i] &
1537 filt->ff_mask6.mask6_src.s6_addr32[i]))
1538 return (0);
1539 }
1540 if (fbmask & FIMB6_DADDR) {
1541 for (i = 0; i < 4; i++)
1542 if (filt->ff_flow6.fi6_dst.s6_addr32[i] !=
1543 (pkt->fi6_dst.s6_addr32[i] &
1544 filt->ff_mask6.mask6_dst.s6_addr32[i]))
1545 return (0);
1546 }
1547 if ((fbmask & FIMB6_TCLASS) &&
1548 filt->ff_flow6.fi6_tclass !=
1549 (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass))
1550 return (0);
1551 if ((fbmask & FIMB6_GPI) &&
1552 filt->ff_flow6.fi6_gpi != pkt->fi6_gpi)
1553 return (0);
1554 /* match */
1555 return (1);
1556 }
1557 #endif /* INET6 */
1558
1559 /*
1560 * filter handle:
1561 * bit 20-28: index to the filter hash table
1562 * bit 0-19: unique id in the hash bucket.
1563 */
1564 static u_long
1565 get_filt_handle(struct acc_classifier *classifier, int i)
1566 {
1567 static u_long handle_number = 1;
1568 u_long handle;
1569 struct acc_filter *afp;
1570
1571 while (1) {
1572 handle = handle_number++ & 0x000fffff;
1573
1574 if (LIST_EMPTY(&classifier->acc_filters[i]))
1575 break;
1576
1577 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1578 if ((afp->f_handle & 0x000fffff) == handle)
1579 break;
1580 if (afp == NULL)
1581 break;
1582 /* this handle is already used, try again */
1583 }
1584
1585 return ((i << 20) | handle);
1586 }
1587
1588 /* convert filter handle to filter pointer */
1589 static struct acc_filter *
1590 filth_to_filtp(struct acc_classifier *classifier, u_long handle)
1591 {
1592 struct acc_filter *afp;
1593 int i;
1594
1595 i = ACC_GET_HINDEX(handle);
1596
1597 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1598 if (afp->f_handle == handle)
1599 return (afp);
1600
1601 return (NULL);
1602 }
1603
1604 /* create flowinfo bitmask */
1605 static u_int32_t
1606 filt2fibmask(struct flow_filter *filt)
1607 {
1608 u_int32_t mask = 0;
1609 #ifdef INET6
1610 struct flow_filter6 *filt6;
1611 #endif
1612
1613 switch (filt->ff_flow.fi_family) {
1614 case AF_INET:
1615 if (filt->ff_flow.fi_proto != 0)
1616 mask |= FIMB4_PROTO;
1617 if (filt->ff_flow.fi_tos != 0)
1618 mask |= FIMB4_TOS;
1619 if (filt->ff_flow.fi_dst.s_addr != 0)
1620 mask |= FIMB4_DADDR;
1621 if (filt->ff_flow.fi_src.s_addr != 0)
1622 mask |= FIMB4_SADDR;
1623 if (filt->ff_flow.fi_sport != 0)
1624 mask |= FIMB4_SPORT;
1625 if (filt->ff_flow.fi_dport != 0)
1626 mask |= FIMB4_DPORT;
1627 if (filt->ff_flow.fi_gpi != 0)
1628 mask |= FIMB4_GPI;
1629 break;
1630 #ifdef INET6
1631 case AF_INET6:
1632 filt6 = (struct flow_filter6 *)filt;
1633
1634 if (filt6->ff_flow6.fi6_proto != 0)
1635 mask |= FIMB6_PROTO;
1636 if (filt6->ff_flow6.fi6_tclass != 0)
1637 mask |= FIMB6_TCLASS;
1638 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst))
1639 mask |= FIMB6_DADDR;
1640 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src))
1641 mask |= FIMB6_SADDR;
1642 if (filt6->ff_flow6.fi6_sport != 0)
1643 mask |= FIMB6_SPORT;
1644 if (filt6->ff_flow6.fi6_dport != 0)
1645 mask |= FIMB6_DPORT;
1646 if (filt6->ff_flow6.fi6_gpi != 0)
1647 mask |= FIMB6_GPI;
1648 if (filt6->ff_flow6.fi6_flowlabel != 0)
1649 mask |= FIMB6_FLABEL;
1650 break;
1651 #endif /* INET6 */
1652 }
1653 return (mask);
1654 }
1655
1656
1657 /*
1658 * helper functions to handle IPv4 fragments.
1659 * currently only in-sequence fragments are handled.
1660 * - fragment info is cached in a LRU list.
1661 * - when a first fragment is found, cache its flow info.
1662 * - when a non-first fragment is found, lookup the cache.
1663 */
1664
1665 struct ip4_frag {
1666 TAILQ_ENTRY(ip4_frag) ip4f_chain;
1667 char ip4f_valid;
1668 u_short ip4f_id;
1669 struct flowinfo_in ip4f_info;
1670 };
1671
1672 static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */
1673
1674 #define IP4F_TABSIZE 16 /* IPv4 fragment cache size */
1675
1676
1677 static void
1678 ip4f_cache(struct ip *ip, struct flowinfo_in *fin)
1679 {
1680 struct ip4_frag *fp;
1681
1682 if (TAILQ_EMPTY(&ip4f_list)) {
1683 /* first time call, allocate fragment cache entries. */
1684 if (ip4f_init() < 0)
1685 /* allocation failed! */
1686 return;
1687 }
1688
1689 fp = ip4f_alloc();
1690 fp->ip4f_id = ip->ip_id;
1691 fp->ip4f_info.fi_proto = ip->ip_p;
1692 fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr;
1693 fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr;
1694
1695 /* save port numbers */
1696 fp->ip4f_info.fi_sport = fin->fi_sport;
1697 fp->ip4f_info.fi_dport = fin->fi_dport;
1698 fp->ip4f_info.fi_gpi = fin->fi_gpi;
1699 }
1700
1701 static int
1702 ip4f_lookup(struct ip *ip, struct flowinfo_in *fin)
1703 {
1704 struct ip4_frag *fp;
1705
1706 for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid;
1707 fp = TAILQ_NEXT(fp, ip4f_chain))
1708 if (ip->ip_id == fp->ip4f_id &&
1709 ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr &&
1710 ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr &&
1711 ip->ip_p == fp->ip4f_info.fi_proto) {
1712
1713 /* found the matching entry */
1714 fin->fi_sport = fp->ip4f_info.fi_sport;
1715 fin->fi_dport = fp->ip4f_info.fi_dport;
1716 fin->fi_gpi = fp->ip4f_info.fi_gpi;
1717
1718 if ((ntohs(ip->ip_off) & IP_MF) == 0)
1719 /* this is the last fragment,
1720 release the entry. */
1721 ip4f_free(fp);
1722
1723 return (1);
1724 }
1725
1726 /* no matching entry found */
1727 return (0);
1728 }
1729
1730 static int
1731 ip4f_init(void)
1732 {
1733 struct ip4_frag *fp;
1734 int i;
1735
1736 TAILQ_INIT(&ip4f_list);
1737 for (i=0; i<IP4F_TABSIZE; i++) {
1738 fp = malloc(sizeof(struct ip4_frag), M_DEVBUF, M_NOWAIT);
1739 if (fp == NULL) {
1740 printf("ip4f_init: can't alloc %dth entry!\n", i);
1741 if (i == 0)
1742 return (-1);
1743 return (0);
1744 }
1745 fp->ip4f_valid = 0;
1746 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
1747 }
1748 return (0);
1749 }
1750
1751 static struct ip4_frag *
1752 ip4f_alloc(void)
1753 {
1754 struct ip4_frag *fp;
1755
1756 /* reclaim an entry at the tail, put it at the head */
1757 fp = TAILQ_LAST(&ip4f_list, ip4f_list);
1758 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
1759 fp->ip4f_valid = 1;
1760 TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain);
1761 return (fp);
1762 }
1763
1764 static void
1765 ip4f_free(struct ip4_frag *fp)
1766 {
1767 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
1768 fp->ip4f_valid = 0;
1769 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
1770 }
1771
1772 #endif /* ALTQ3_CLFIER_COMPAT */
1773