Home | History | Annotate | Line # | Download | only in cxgb
      1 /**************************************************************************
      2 
      3 Copyright (c) 2007, Chelsio Inc.
      4 All rights reserved.
      5 
      6 Redistribution and use in source and binary forms, with or without
      7 modification, are permitted provided that the following conditions are met:
      8 
      9  1. Redistributions of source code must retain the above copyright notice,
     10     this list of conditions and the following disclaimer.
     11 
     12 2. Neither the name of the Chelsio Corporation nor the names of its
     13     contributors may be used to endorse or promote products derived from
     14     this software without specific prior written permission.
     15 
     16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26 POSSIBILITY OF SUCH DAMAGE.
     27 
     28 ***************************************************************************/
     29 
     30 #include <sys/cdefs.h>
     31 __KERNEL_RCSID(0, "$NetBSD: cxgb_lro.c,v 1.2 2011/05/18 01:01:59 dyoung Exp $");
     32 
     33 
     34 #include <sys/param.h>
     35 #include <sys/systm.h>
     36 #include <sys/kernel.h>
     37 #include <sys/conf.h>
     38 #include <sys/bus.h>
     39 #include <sys/queue.h>
     40 
     41 #include <netinet/in_systm.h>
     42 #include <netinet/in.h>
     43 #include <netinet/ip.h>
     44 #include <netinet/tcp.h>
     45 
     46 
     47 #ifdef CONFIG_DEFINED
     48 #include <dev/pci/cxgb/cxgb_include.h>
     49 
     50 #include <machine/in_cksum.h>
     51 #endif
     52 
     53 #include "cxgb_include.h"
     54 
     55 #ifndef M_LRO
     56 #define M_LRO    0x0200
     57 #endif
     58 
     59 #ifdef DEBUG
     60 #define MBUF_HEADER_CHECK(m) do { \
     61     if ((m->m_len == 0) || (m->m_pkthdr.len == 0)   \
     62         || ((m->m_flags & M_PKTHDR) == 0))              \
     63         panic("lro_flush_session - mbuf len=%d pktlen=%d flags=0x%x\n", \
     64             m->m_len, m->m_pkthdr.len, m->m_flags); \
     65     if ((m->m_flags & M_PKTHDR) == 0)               \
     66         panic("first mbuf is not packet header - flags=0x%x\n", \
     67             m->m_flags);  \
     68     if ((m->m_len < ETHER_HDR_LEN) || (m->m_pkthdr.len < ETHER_HDR_LEN)) \
     69         panic("packet too small len=%d pktlen=%d\n", \
     70             m->m_len, m->m_pkthdr.len);\
     71 } while (0)
     72 #else
     73 #define MBUF_HEADER_CHECK(m)
     74 #endif
     75 
     76 #define IPH_OFFSET (2 + sizeof (struct cpl_rx_pkt) + ETHER_HDR_LEN)
     77 #define LRO_SESSION_IDX_HINT_HASH(hash) (hash & (MAX_LRO_SES - 1))
     78 #define LRO_IDX_INC(idx) idx = (idx + 1) & (MAX_LRO_SES - 1)
     79 
     80 static __inline int
     81 lro_match(struct mbuf *m, struct ip *ih, struct tcphdr *th)
     82 {
     83     struct ip *sih = (struct ip *)(mtod(m, uint8_t *) + IPH_OFFSET);
     84     struct tcphdr *sth = (struct tcphdr *) (sih + 1);
     85 
     86     return (th->th_sport == sth->th_sport &&
     87         th->th_dport == sth->th_dport &&
     88         ih->ip_src.s_addr == sih->ip_src.s_addr &&
     89         ih->ip_dst.s_addr == sih->ip_dst.s_addr);
     90 }
     91 
     92 static __inline struct t3_lro_session *
     93 lro_lookup(struct lro_state *l, int idx, struct ip *ih, struct tcphdr *th)
     94 {
     95     struct t3_lro_session *s = NULL;
     96     int active = l->nactive;
     97 
     98     while (active) {
     99         s = &l->sess[idx];
    100         if (s->head) {
    101             if (lro_match(s->head, ih, th))
    102                 break;
    103             active--;
    104         }
    105         LRO_IDX_INC(idx);
    106     }
    107 
    108     return (s);
    109 }
    110 
    111 static __inline int
    112 can_lro_packet(struct cpl_rx_pkt *cpl, unsigned int rss_hi)
    113 {
    114     struct ether_header *eh = (struct ether_header *)(cpl + 1);
    115     struct ip *ih = (struct ip *)(eh + 1);
    116 
    117     /*
    118      * XXX VLAN support?
    119      */
    120     if (__predict_false(G_HASHTYPE(ntohl(rss_hi)) != RSS_HASH_4_TUPLE ||
    121              (*((uint8_t *)cpl + 1) & 0x90) != 0x10 ||
    122              cpl->csum != 0xffff || eh->ether_type != ntohs(ETHERTYPE_IP) ||
    123              ih->ip_hl != (sizeof (*ih) >> 2))) {
    124         return 0;
    125     }
    126 
    127     return 1;
    128 }
    129 
    130 static int
    131 can_lro_tcpsegment(struct tcphdr *th)
    132 {
    133     int olen = (th->th_off << 2) - sizeof (*th);
    134     u8 control_bits = *((u8 *)th + 13);
    135 
    136     if (__predict_false((control_bits & 0xB7) != 0x10))
    137         goto no_lro;
    138 
    139     if (olen) {
    140         uint32_t *ptr = (u32 *)(th + 1);
    141         if (__predict_false(olen != TCPOLEN_TSTAMP_APPA ||
    142                  *ptr != ntohl((TCPOPT_NOP << 24) |
    143                        (TCPOPT_NOP << 16) |
    144                        (TCPOPT_TIMESTAMP << 8) |
    145                         TCPOLEN_TIMESTAMP)))
    146             goto no_lro;
    147     }
    148 
    149     return 1;
    150 
    151  no_lro:
    152     return 0;
    153 }
    154 
    155 static __inline void
    156 lro_new_session_init(struct t3_lro_session *s, struct mbuf *m)
    157 {
    158     struct ip *ih = (struct ip *)(mtod(m, uint8_t *) + IPH_OFFSET);
    159     struct tcphdr *th = (struct tcphdr *) (ih + 1);
    160     int ip_len = ntohs(ih->ip_len);
    161 
    162     DPRINTF("%s(s=%p, m=%p)\n", __func__, s, m);
    163 
    164     s->head = m;
    165 
    166     MBUF_HEADER_CHECK(m);
    167     s->ip_len = ip_len;
    168     s->seq = ntohl(th->th_seq) + ip_len - sizeof(*ih) - (th->th_off << 2);
    169 
    170 }
    171 
    172 static void
    173 lro_flush_session(struct sge_qset *qs, struct t3_lro_session *s, struct mbuf *m)
    174 {
    175     struct lro_state *l = &qs->lro;
    176     struct mbuf *sm = s->head;
    177     struct ip *ih = (struct ip *)(mtod(sm, uint8_t *) + IPH_OFFSET);
    178 
    179 
    180     DPRINTF("%s(qs=%p, s=%p, ", __func__,
    181         qs, s);
    182 
    183     if (m)
    184         DPRINTF("m=%p)\n", m);
    185     else
    186         DPRINTF("m=NULL)\n");
    187 
    188     ih->ip_len = htons(s->ip_len);
    189     ih->ip_sum = 0;
    190     ih->ip_sum = in_cksum_hdr(ih);
    191 
    192     MBUF_HEADER_CHECK(sm);
    193 
    194     sm->m_flags |= M_LRO;
    195     t3_rx_eth(qs->port->adapter, &qs->rspq, sm, 2);
    196 
    197     if (m) {
    198         s->head = m;
    199         lro_new_session_init(s, m);
    200     } else {
    201         s->head = NULL;
    202         l->nactive--;
    203     }
    204 
    205     qs->port_stats[SGE_PSTATS_LRO_FLUSHED]++;
    206 }
    207 
    208 static __inline struct t3_lro_session *
    209 lro_new_session(struct sge_qset *qs, struct mbuf *m, uint32_t rss_hash)
    210 {
    211     struct lro_state *l = &qs->lro;
    212     int idx = LRO_SESSION_IDX_HINT_HASH(rss_hash);
    213     struct t3_lro_session *s = &l->sess[idx];
    214 
    215     DPRINTF("%s(qs=%p,  m=%p, rss_hash=0x%x)\n", __func__,
    216         qs, m, rss_hash);
    217 
    218     if (__predict_true(!s->head))
    219         goto done;
    220 
    221     if (l->nactive > MAX_LRO_SES)
    222         panic("MAX_LRO_PER_QSET exceeded");
    223 
    224     if (l->nactive == MAX_LRO_SES) {
    225         lro_flush_session(qs, s, m);
    226         qs->port_stats[SGE_PSTATS_LRO_X_STREAMS]++;
    227         return s;
    228     }
    229 
    230     while (1) {
    231         LRO_IDX_INC(idx);
    232         s = &l->sess[idx];
    233         if (!s->head)
    234             break;
    235     }
    236 done:
    237     lro_new_session_init(s, m);
    238     l->nactive++;
    239 
    240     return s;
    241 }
    242 
    243 static __inline int
    244 lro_update_session(struct t3_lro_session *s, struct mbuf *m)
    245 {
    246     struct mbuf *sm = s->head;
    247     struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(sm, uint8_t *) + 2);
    248     struct cpl_rx_pkt *ncpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + 2);
    249     struct ip *nih = (struct ip *)(mtod(m, uint8_t *) + IPH_OFFSET);
    250     struct tcphdr *th, *nth = (struct tcphdr *)(nih + 1);
    251     uint32_t seq = ntohl(nth->th_seq);
    252     int plen, tcpiphlen, olen = (nth->th_off << 2) - sizeof (*nth);
    253 
    254 
    255     DPRINTF("%s(s=%p,  m=%p)\n", __func__, s, m);
    256     if (cpl->vlan_valid && cpl->vlan != ncpl->vlan) {
    257         return -1;
    258     }
    259     if (__predict_false(seq != s->seq)) {
    260         DPRINTF("sequence mismatch\n");
    261         return -1;
    262     }
    263 
    264     MBUF_HEADER_CHECK(sm);
    265     th = (struct tcphdr *)(mtod(sm, uint8_t *) + IPH_OFFSET + sizeof (struct ip));
    266 
    267     if (olen) {
    268         uint32_t *ptr = (uint32_t *)(th + 1);
    269         uint32_t *nptr = (uint32_t *)(nth + 1);
    270 
    271         if (__predict_false(ntohl(*(ptr + 1)) > ntohl(*(nptr + 1)) ||
    272                  !*(nptr + 2))) {
    273             return -1;
    274         }
    275         *(ptr + 1) = *(nptr + 1);
    276         *(ptr + 2) = *(nptr + 2);
    277     }
    278     th->th_ack = nth->th_ack;
    279     th->th_win = nth->th_win;
    280 
    281     tcpiphlen = (nth->th_off << 2) + sizeof (*nih);
    282     plen = ntohs(nih->ip_len) - tcpiphlen;
    283     s->seq += plen;
    284     s->ip_len += plen;
    285     sm->m_pkthdr.len += plen;
    286 
    287     /*
    288      * XXX FIX ME
    289      *
    290      *
    291      */
    292 
    293 #if 0
    294     /* XXX this I *do not* understand */
    295     if (plen > skb_shinfo(s->skb)->gso_size)
    296         skb_shinfo(s->skb)->gso_size = plen;
    297 #endif
    298     DPRINTF("m_adj(%d)\n", (int)(IPH_OFFSET + tcpiphlen));
    299     m_adj(m, IPH_OFFSET + tcpiphlen);
    300 #if 0
    301     if (__predict_false(!skb_shinfo(s->skb)->frag_list))
    302         skb_shinfo(s->skb)->frag_list = skb;
    303 
    304 #endif
    305 
    306 #if 0
    307 
    308     /*
    309      * XXX we really need to be able to
    310      * support vectors of buffers in FreeBSD
    311      */
    312     int nr = skb_shinfo(s->skb)->nr_frags;
    313     skb_shinfo(s->skb)->frags[nr].page = frag->page;
    314     skb_shinfo(s->skb)->frags[nr].page_offset =
    315         frag->page_offset + IPH_OFFSET + tcpiphlen;
    316     skb_shinfo(s->skb)->frags[nr].size = plen;
    317     skb_shinfo(s->skb)->nr_frags = ++nr;
    318 
    319 #endif
    320     return (0);
    321 }
    322 
    323 void
    324 t3_rx_eth_lro(adapter_t *adap, struct sge_rspq *rq, struct mbuf *m,
    325     int ethpad, uint32_t rss_hash, uint32_t rss_csum, int lro)
    326 {
    327     struct sge_qset *qs = rspq_to_qset(rq);
    328     struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
    329     struct ether_header *eh = (struct ether_header *)(cpl + 1);
    330     struct ip *ih;
    331     struct tcphdr *th;
    332     struct t3_lro_session *s = NULL;
    333 
    334     if (lro == 0)
    335         goto no_lro;
    336 
    337     if (!can_lro_packet(cpl, rss_csum))
    338         goto no_lro;
    339 
    340     ih = (struct ip *)(eh + 1);
    341     th = (struct tcphdr *)(ih + 1);
    342 
    343     s = lro_lookup(&qs->lro,
    344         LRO_SESSION_IDX_HINT_HASH(rss_hash), ih, th);
    345 
    346     if (__predict_false(!can_lro_tcpsegment(th))) {
    347         goto no_lro;
    348     } else if (__predict_false(!s)) {
    349         s = lro_new_session(qs, m, rss_hash);
    350     } else {
    351         if (lro_update_session(s, m)) {
    352             lro_flush_session(qs, s, m);
    353         }
    354 #ifdef notyet
    355         if (__predict_false(s->head->m_pkthdr.len + pi->ifp->if_mtu > 65535)) {
    356             lro_flush_session(qs, s, NULL);
    357         }
    358 #endif
    359     }
    360 
    361     qs->port_stats[SGE_PSTATS_LRO_QUEUED]++;
    362     return;
    363 no_lro:
    364     if (s)
    365         lro_flush_session(qs, s, NULL);
    366 
    367     if (m->m_len == 0 || m->m_pkthdr.len == 0 || (m->m_flags & M_PKTHDR) == 0)
    368         DPRINTF("rx_eth_lro mbuf len=%d pktlen=%d flags=0x%x\n",
    369             m->m_len, m->m_pkthdr.len, m->m_flags);
    370 
    371     t3_rx_eth(adap, rq, m, ethpad);
    372 }
    373 
    374 void
    375 t3_lro_flush(adapter_t *adap, struct sge_qset *qs, struct lro_state *state)
    376 {
    377     unsigned int idx = state->active_idx;
    378 
    379     while (state->nactive) {
    380         struct t3_lro_session *s = &state->sess[idx];
    381 
    382         if (s->head)
    383             lro_flush_session(qs, s, NULL);
    384         LRO_IDX_INC(idx);
    385     }
    386 }
    387