1 /************************************************************************** 2 3 Copyright (c) 2007, Chelsio Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __KERNEL_RCSID(0, "$NetBSD: cxgb_lro.c,v 1.2 2011/05/18 01:01:59 dyoung Exp $"); 32 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/conf.h> 38 #include <sys/bus.h> 39 #include <sys/queue.h> 40 41 #include <netinet/in_systm.h> 42 #include <netinet/in.h> 43 #include <netinet/ip.h> 44 #include <netinet/tcp.h> 45 46 47 #ifdef CONFIG_DEFINED 48 #include <dev/pci/cxgb/cxgb_include.h> 49 50 #include <machine/in_cksum.h> 51 #endif 52 53 #include "cxgb_include.h" 54 55 #ifndef M_LRO 56 #define M_LRO 0x0200 57 #endif 58 59 #ifdef DEBUG 60 #define MBUF_HEADER_CHECK(m) do { \ 61 if ((m->m_len == 0) || (m->m_pkthdr.len == 0) \ 62 || ((m->m_flags & M_PKTHDR) == 0)) \ 63 panic("lro_flush_session - mbuf len=%d pktlen=%d flags=0x%x\n", \ 64 m->m_len, m->m_pkthdr.len, m->m_flags); \ 65 if ((m->m_flags & M_PKTHDR) == 0) \ 66 panic("first mbuf is not packet header - flags=0x%x\n", \ 67 m->m_flags); \ 68 if ((m->m_len < ETHER_HDR_LEN) || (m->m_pkthdr.len < ETHER_HDR_LEN)) \ 69 panic("packet too small len=%d pktlen=%d\n", \ 70 m->m_len, m->m_pkthdr.len);\ 71 } while (0) 72 #else 73 #define MBUF_HEADER_CHECK(m) 74 #endif 75 76 #define IPH_OFFSET (2 + sizeof (struct cpl_rx_pkt) + ETHER_HDR_LEN) 77 #define LRO_SESSION_IDX_HINT_HASH(hash) (hash & (MAX_LRO_SES - 1)) 78 #define LRO_IDX_INC(idx) idx = (idx + 1) & (MAX_LRO_SES - 1) 79 80 static __inline int 81 lro_match(struct mbuf *m, struct ip *ih, struct tcphdr *th) 82 { 83 struct ip *sih = (struct ip *)(mtod(m, uint8_t *) + IPH_OFFSET); 84 struct tcphdr *sth = (struct tcphdr *) (sih + 1); 85 86 return (th->th_sport == sth->th_sport && 87 th->th_dport == sth->th_dport && 88 ih->ip_src.s_addr == sih->ip_src.s_addr && 89 ih->ip_dst.s_addr == sih->ip_dst.s_addr); 90 } 91 92 static __inline struct t3_lro_session * 93 lro_lookup(struct lro_state *l, int idx, struct ip *ih, struct tcphdr *th) 94 { 95 struct t3_lro_session *s = NULL; 96 int active = l->nactive; 97 98 while (active) { 99 s = &l->sess[idx]; 100 if (s->head) { 101 if (lro_match(s->head, ih, th)) 102 break; 103 active--; 104 } 105 LRO_IDX_INC(idx); 106 } 107 108 return (s); 109 } 110 111 static __inline int 112 can_lro_packet(struct cpl_rx_pkt *cpl, unsigned int rss_hi) 113 { 114 struct ether_header *eh = (struct ether_header *)(cpl + 1); 115 struct ip *ih = (struct ip *)(eh + 1); 116 117 /* 118 * XXX VLAN support? 119 */ 120 if (__predict_false(G_HASHTYPE(ntohl(rss_hi)) != RSS_HASH_4_TUPLE || 121 (*((uint8_t *)cpl + 1) & 0x90) != 0x10 || 122 cpl->csum != 0xffff || eh->ether_type != ntohs(ETHERTYPE_IP) || 123 ih->ip_hl != (sizeof (*ih) >> 2))) { 124 return 0; 125 } 126 127 return 1; 128 } 129 130 static int 131 can_lro_tcpsegment(struct tcphdr *th) 132 { 133 int olen = (th->th_off << 2) - sizeof (*th); 134 u8 control_bits = *((u8 *)th + 13); 135 136 if (__predict_false((control_bits & 0xB7) != 0x10)) 137 goto no_lro; 138 139 if (olen) { 140 uint32_t *ptr = (u32 *)(th + 1); 141 if (__predict_false(olen != TCPOLEN_TSTAMP_APPA || 142 *ptr != ntohl((TCPOPT_NOP << 24) | 143 (TCPOPT_NOP << 16) | 144 (TCPOPT_TIMESTAMP << 8) | 145 TCPOLEN_TIMESTAMP))) 146 goto no_lro; 147 } 148 149 return 1; 150 151 no_lro: 152 return 0; 153 } 154 155 static __inline void 156 lro_new_session_init(struct t3_lro_session *s, struct mbuf *m) 157 { 158 struct ip *ih = (struct ip *)(mtod(m, uint8_t *) + IPH_OFFSET); 159 struct tcphdr *th = (struct tcphdr *) (ih + 1); 160 int ip_len = ntohs(ih->ip_len); 161 162 DPRINTF("%s(s=%p, m=%p)\n", __func__, s, m); 163 164 s->head = m; 165 166 MBUF_HEADER_CHECK(m); 167 s->ip_len = ip_len; 168 s->seq = ntohl(th->th_seq) + ip_len - sizeof(*ih) - (th->th_off << 2); 169 170 } 171 172 static void 173 lro_flush_session(struct sge_qset *qs, struct t3_lro_session *s, struct mbuf *m) 174 { 175 struct lro_state *l = &qs->lro; 176 struct mbuf *sm = s->head; 177 struct ip *ih = (struct ip *)(mtod(sm, uint8_t *) + IPH_OFFSET); 178 179 180 DPRINTF("%s(qs=%p, s=%p, ", __func__, 181 qs, s); 182 183 if (m) 184 DPRINTF("m=%p)\n", m); 185 else 186 DPRINTF("m=NULL)\n"); 187 188 ih->ip_len = htons(s->ip_len); 189 ih->ip_sum = 0; 190 ih->ip_sum = in_cksum_hdr(ih); 191 192 MBUF_HEADER_CHECK(sm); 193 194 sm->m_flags |= M_LRO; 195 t3_rx_eth(qs->port->adapter, &qs->rspq, sm, 2); 196 197 if (m) { 198 s->head = m; 199 lro_new_session_init(s, m); 200 } else { 201 s->head = NULL; 202 l->nactive--; 203 } 204 205 qs->port_stats[SGE_PSTATS_LRO_FLUSHED]++; 206 } 207 208 static __inline struct t3_lro_session * 209 lro_new_session(struct sge_qset *qs, struct mbuf *m, uint32_t rss_hash) 210 { 211 struct lro_state *l = &qs->lro; 212 int idx = LRO_SESSION_IDX_HINT_HASH(rss_hash); 213 struct t3_lro_session *s = &l->sess[idx]; 214 215 DPRINTF("%s(qs=%p, m=%p, rss_hash=0x%x)\n", __func__, 216 qs, m, rss_hash); 217 218 if (__predict_true(!s->head)) 219 goto done; 220 221 if (l->nactive > MAX_LRO_SES) 222 panic("MAX_LRO_PER_QSET exceeded"); 223 224 if (l->nactive == MAX_LRO_SES) { 225 lro_flush_session(qs, s, m); 226 qs->port_stats[SGE_PSTATS_LRO_X_STREAMS]++; 227 return s; 228 } 229 230 while (1) { 231 LRO_IDX_INC(idx); 232 s = &l->sess[idx]; 233 if (!s->head) 234 break; 235 } 236 done: 237 lro_new_session_init(s, m); 238 l->nactive++; 239 240 return s; 241 } 242 243 static __inline int 244 lro_update_session(struct t3_lro_session *s, struct mbuf *m) 245 { 246 struct mbuf *sm = s->head; 247 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(sm, uint8_t *) + 2); 248 struct cpl_rx_pkt *ncpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + 2); 249 struct ip *nih = (struct ip *)(mtod(m, uint8_t *) + IPH_OFFSET); 250 struct tcphdr *th, *nth = (struct tcphdr *)(nih + 1); 251 uint32_t seq = ntohl(nth->th_seq); 252 int plen, tcpiphlen, olen = (nth->th_off << 2) - sizeof (*nth); 253 254 255 DPRINTF("%s(s=%p, m=%p)\n", __func__, s, m); 256 if (cpl->vlan_valid && cpl->vlan != ncpl->vlan) { 257 return -1; 258 } 259 if (__predict_false(seq != s->seq)) { 260 DPRINTF("sequence mismatch\n"); 261 return -1; 262 } 263 264 MBUF_HEADER_CHECK(sm); 265 th = (struct tcphdr *)(mtod(sm, uint8_t *) + IPH_OFFSET + sizeof (struct ip)); 266 267 if (olen) { 268 uint32_t *ptr = (uint32_t *)(th + 1); 269 uint32_t *nptr = (uint32_t *)(nth + 1); 270 271 if (__predict_false(ntohl(*(ptr + 1)) > ntohl(*(nptr + 1)) || 272 !*(nptr + 2))) { 273 return -1; 274 } 275 *(ptr + 1) = *(nptr + 1); 276 *(ptr + 2) = *(nptr + 2); 277 } 278 th->th_ack = nth->th_ack; 279 th->th_win = nth->th_win; 280 281 tcpiphlen = (nth->th_off << 2) + sizeof (*nih); 282 plen = ntohs(nih->ip_len) - tcpiphlen; 283 s->seq += plen; 284 s->ip_len += plen; 285 sm->m_pkthdr.len += plen; 286 287 /* 288 * XXX FIX ME 289 * 290 * 291 */ 292 293 #if 0 294 /* XXX this I *do not* understand */ 295 if (plen > skb_shinfo(s->skb)->gso_size) 296 skb_shinfo(s->skb)->gso_size = plen; 297 #endif 298 DPRINTF("m_adj(%d)\n", (int)(IPH_OFFSET + tcpiphlen)); 299 m_adj(m, IPH_OFFSET + tcpiphlen); 300 #if 0 301 if (__predict_false(!skb_shinfo(s->skb)->frag_list)) 302 skb_shinfo(s->skb)->frag_list = skb; 303 304 #endif 305 306 #if 0 307 308 /* 309 * XXX we really need to be able to 310 * support vectors of buffers in FreeBSD 311 */ 312 int nr = skb_shinfo(s->skb)->nr_frags; 313 skb_shinfo(s->skb)->frags[nr].page = frag->page; 314 skb_shinfo(s->skb)->frags[nr].page_offset = 315 frag->page_offset + IPH_OFFSET + tcpiphlen; 316 skb_shinfo(s->skb)->frags[nr].size = plen; 317 skb_shinfo(s->skb)->nr_frags = ++nr; 318 319 #endif 320 return (0); 321 } 322 323 void 324 t3_rx_eth_lro(adapter_t *adap, struct sge_rspq *rq, struct mbuf *m, 325 int ethpad, uint32_t rss_hash, uint32_t rss_csum, int lro) 326 { 327 struct sge_qset *qs = rspq_to_qset(rq); 328 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 329 struct ether_header *eh = (struct ether_header *)(cpl + 1); 330 struct ip *ih; 331 struct tcphdr *th; 332 struct t3_lro_session *s = NULL; 333 334 if (lro == 0) 335 goto no_lro; 336 337 if (!can_lro_packet(cpl, rss_csum)) 338 goto no_lro; 339 340 ih = (struct ip *)(eh + 1); 341 th = (struct tcphdr *)(ih + 1); 342 343 s = lro_lookup(&qs->lro, 344 LRO_SESSION_IDX_HINT_HASH(rss_hash), ih, th); 345 346 if (__predict_false(!can_lro_tcpsegment(th))) { 347 goto no_lro; 348 } else if (__predict_false(!s)) { 349 s = lro_new_session(qs, m, rss_hash); 350 } else { 351 if (lro_update_session(s, m)) { 352 lro_flush_session(qs, s, m); 353 } 354 #ifdef notyet 355 if (__predict_false(s->head->m_pkthdr.len + pi->ifp->if_mtu > 65535)) { 356 lro_flush_session(qs, s, NULL); 357 } 358 #endif 359 } 360 361 qs->port_stats[SGE_PSTATS_LRO_QUEUED]++; 362 return; 363 no_lro: 364 if (s) 365 lro_flush_session(qs, s, NULL); 366 367 if (m->m_len == 0 || m->m_pkthdr.len == 0 || (m->m_flags & M_PKTHDR) == 0) 368 DPRINTF("rx_eth_lro mbuf len=%d pktlen=%d flags=0x%x\n", 369 m->m_len, m->m_pkthdr.len, m->m_flags); 370 371 t3_rx_eth(adap, rq, m, ethpad); 372 } 373 374 void 375 t3_lro_flush(adapter_t *adap, struct sge_qset *qs, struct lro_state *state) 376 { 377 unsigned int idx = state->active_idx; 378 379 while (state->nactive) { 380 struct t3_lro_session *s = &state->sess[idx]; 381 382 if (s->head) 383 lro_flush_session(qs, s, NULL); 384 LRO_IDX_INC(idx); 385 } 386 } 387