Home | History | Annotate | Line # | Download | only in cxgb
cxgb_l2t.c revision 1.1.2.2
      1  1.1.2.2  uebayasi /**************************************************************************
      2  1.1.2.2  uebayasi 
      3  1.1.2.2  uebayasi Copyright (c) 2007, Chelsio Inc.
      4  1.1.2.2  uebayasi All rights reserved.
      5  1.1.2.2  uebayasi 
      6  1.1.2.2  uebayasi Redistribution and use in source and binary forms, with or without
      7  1.1.2.2  uebayasi modification, are permitted provided that the following conditions are met:
      8  1.1.2.2  uebayasi 
      9  1.1.2.2  uebayasi  1. Redistributions of source code must retain the above copyright notice,
     10  1.1.2.2  uebayasi     this list of conditions and the following disclaimer.
     11  1.1.2.2  uebayasi 
     12  1.1.2.2  uebayasi  2. Neither the name of the Chelsio Corporation nor the names of its
     13  1.1.2.2  uebayasi     contributors may be used to endorse or promote products derived from
     14  1.1.2.2  uebayasi     this software without specific prior written permission.
     15  1.1.2.2  uebayasi 
     16  1.1.2.2  uebayasi THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     17  1.1.2.2  uebayasi AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  1.1.2.2  uebayasi IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  1.1.2.2  uebayasi ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     20  1.1.2.2  uebayasi LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  1.1.2.2  uebayasi CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  1.1.2.2  uebayasi SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  1.1.2.2  uebayasi INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  1.1.2.2  uebayasi CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  1.1.2.2  uebayasi ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  1.1.2.2  uebayasi POSSIBILITY OF SUCH DAMAGE.
     27  1.1.2.2  uebayasi 
     28  1.1.2.2  uebayasi ***************************************************************************/
     29  1.1.2.2  uebayasi 
     30  1.1.2.2  uebayasi #include <sys/cdefs.h>
     31  1.1.2.2  uebayasi __KERNEL_RCSID(0, "$NetBSD: cxgb_l2t.c,v 1.1.2.2 2010/04/30 14:43:44 uebayasi Exp $");
     32  1.1.2.2  uebayasi 
     33  1.1.2.2  uebayasi #include <sys/param.h>
     34  1.1.2.2  uebayasi #include <sys/systm.h>
     35  1.1.2.2  uebayasi #include <sys/kernel.h>
     36  1.1.2.2  uebayasi #include <sys/lock.h>
     37  1.1.2.2  uebayasi #include <sys/mutex.h>
     38  1.1.2.2  uebayasi 
     39  1.1.2.2  uebayasi #include <sys/socket.h>
     40  1.1.2.2  uebayasi #include <sys/socketvar.h>
     41  1.1.2.2  uebayasi #include <net/if.h>
     42  1.1.2.2  uebayasi #include <netinet/in.h>
     43  1.1.2.2  uebayasi #include <netinet/in_var.h>
     44  1.1.2.2  uebayasi #include <netinet/if_inarp.h>
     45  1.1.2.2  uebayasi #include <net/if_dl.h>
     46  1.1.2.2  uebayasi #include <net/route.h>
     47  1.1.2.2  uebayasi #include <netinet/in.h>
     48  1.1.2.2  uebayasi 
     49  1.1.2.2  uebayasi #ifdef CONFIG_DEFINED
     50  1.1.2.2  uebayasi #include <cxgb_include.h>
     51  1.1.2.2  uebayasi #else
     52  1.1.2.2  uebayasi #include "cxgb_include.h"
     53  1.1.2.2  uebayasi #endif
     54  1.1.2.2  uebayasi 
     55  1.1.2.2  uebayasi #define VLAN_NONE 0xfff
     56  1.1.2.2  uebayasi #define SDL(s) ((struct sockaddr_dl *)s)
     57  1.1.2.2  uebayasi #define RT_ENADDR(rt)  ((u_char *)LLADDR(SDL((rt))))
     58  1.1.2.2  uebayasi #define rt_expire rt_rmx.rmx_expire
     59  1.1.2.2  uebayasi 
     60  1.1.2.2  uebayasi /*
     61  1.1.2.2  uebayasi  * Module locking notes:  There is a RW lock protecting the L2 table as a
     62  1.1.2.2  uebayasi  * whole plus a spinlock per L2T entry.  Entry lookups and allocations happen
     63  1.1.2.2  uebayasi  * under the protection of the table lock, individual entry changes happen
     64  1.1.2.2  uebayasi  * while holding that entry's spinlock.  The table lock nests outside the
     65  1.1.2.2  uebayasi  * entry locks.  Allocations of new entries take the table lock as writers so
     66  1.1.2.2  uebayasi  * no other lookups can happen while allocating new entries.  Entry updates
     67  1.1.2.2  uebayasi  * take the table lock as readers so multiple entries can be updated in
     68  1.1.2.2  uebayasi  * parallel.  An L2T entry can be dropped by decrementing its reference count
     69  1.1.2.2  uebayasi  * and therefore can happen in parallel with entry allocation but no entry
     70  1.1.2.2  uebayasi  * can change state or increment its ref count during allocation as both of
     71  1.1.2.2  uebayasi  * these perform lookups.
     72  1.1.2.2  uebayasi  */
     73  1.1.2.2  uebayasi 
     74  1.1.2.2  uebayasi static inline unsigned int
     75  1.1.2.2  uebayasi vlan_prio(const struct l2t_entry *e)
     76  1.1.2.2  uebayasi {
     77  1.1.2.2  uebayasi     return e->vlan >> 13;
     78  1.1.2.2  uebayasi }
     79  1.1.2.2  uebayasi 
     80  1.1.2.2  uebayasi static inline unsigned int
     81  1.1.2.2  uebayasi arp_hash(u32 key, int ifindex, const struct l2t_data *d)
     82  1.1.2.2  uebayasi {
     83  1.1.2.2  uebayasi     return jhash_2words(key, ifindex, 0) & (d->nentries - 1);
     84  1.1.2.2  uebayasi }
     85  1.1.2.2  uebayasi 
     86  1.1.2.2  uebayasi static inline void
     87  1.1.2.2  uebayasi neigh_replace(struct l2t_entry *e, struct rtentry *rt)
     88  1.1.2.2  uebayasi {
     89  1.1.2.2  uebayasi     RT_LOCK(rt);
     90  1.1.2.2  uebayasi     RT_ADDREF(rt);
     91  1.1.2.2  uebayasi     RT_UNLOCK(rt);
     92  1.1.2.2  uebayasi 
     93  1.1.2.2  uebayasi     if (e->neigh) {
     94  1.1.2.2  uebayasi         RT_LOCK(e->neigh);
     95  1.1.2.2  uebayasi         RT_REMREF(e->neigh);
     96  1.1.2.2  uebayasi         RT_UNLOCK(e->neigh);
     97  1.1.2.2  uebayasi     }
     98  1.1.2.2  uebayasi     e->neigh = rt;
     99  1.1.2.2  uebayasi }
    100  1.1.2.2  uebayasi 
    101  1.1.2.2  uebayasi /*
    102  1.1.2.2  uebayasi  * Set up an L2T entry and send any packets waiting in the arp queue.  The
    103  1.1.2.2  uebayasi  * supplied mbuf is used for the CPL_L2T_WRITE_REQ.  Must be called with the
    104  1.1.2.2  uebayasi  * entry locked.
    105  1.1.2.2  uebayasi  */
    106  1.1.2.2  uebayasi static int
    107  1.1.2.2  uebayasi setup_l2e_send_pending(struct toedev *dev, struct mbuf *m,
    108  1.1.2.2  uebayasi             struct l2t_entry *e)
    109  1.1.2.2  uebayasi {
    110  1.1.2.2  uebayasi     struct cpl_l2t_write_req *req;
    111  1.1.2.2  uebayasi 
    112  1.1.2.2  uebayasi     if (!m) {
    113  1.1.2.2  uebayasi         if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
    114  1.1.2.2  uebayasi             return (ENOMEM);
    115  1.1.2.2  uebayasi     }
    116  1.1.2.2  uebayasi     /*
    117  1.1.2.2  uebayasi      * XXX MH_ALIGN
    118  1.1.2.2  uebayasi      */
    119  1.1.2.2  uebayasi     req = mtod(m, struct cpl_l2t_write_req *);
    120  1.1.2.2  uebayasi     req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
    121  1.1.2.2  uebayasi     OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx));
    122  1.1.2.2  uebayasi     req->params = htonl(V_L2T_W_IDX(e->idx) | V_L2T_W_IFF(e->smt_idx) |
    123  1.1.2.2  uebayasi                 V_L2T_W_VLAN(e->vlan & EVL_VLID_MASK) |
    124  1.1.2.2  uebayasi                 V_L2T_W_PRIO(vlan_prio(e)));
    125  1.1.2.2  uebayasi 
    126  1.1.2.2  uebayasi     memcpy(e->dmac, RT_ENADDR(e->neigh), sizeof(e->dmac));
    127  1.1.2.2  uebayasi     memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
    128  1.1.2.2  uebayasi     m_set_priority(m, CPL_PRIORITY_CONTROL);
    129  1.1.2.2  uebayasi     while (e->arpq_head) {
    130  1.1.2.2  uebayasi         m = e->arpq_head;
    131  1.1.2.2  uebayasi         e->arpq_head = m->m_next;
    132  1.1.2.2  uebayasi         m->m_next = NULL;
    133  1.1.2.2  uebayasi     }
    134  1.1.2.2  uebayasi     e->arpq_tail = NULL;
    135  1.1.2.2  uebayasi     e->state = L2T_STATE_VALID;
    136  1.1.2.2  uebayasi 
    137  1.1.2.2  uebayasi     return 0;
    138  1.1.2.2  uebayasi }
    139  1.1.2.2  uebayasi 
    140  1.1.2.2  uebayasi /*
    141  1.1.2.2  uebayasi  * Add a packet to the an L2T entry's queue of packets awaiting resolution.
    142  1.1.2.2  uebayasi  * Must be called with the entry's lock held.
    143  1.1.2.2  uebayasi  */
    144  1.1.2.2  uebayasi static inline void
    145  1.1.2.2  uebayasi arpq_enqueue(struct l2t_entry *e, struct mbuf *m)
    146  1.1.2.2  uebayasi {
    147  1.1.2.2  uebayasi     m->m_next = NULL;
    148  1.1.2.2  uebayasi     if (e->arpq_head)
    149  1.1.2.2  uebayasi         e->arpq_tail->m_next = m;
    150  1.1.2.2  uebayasi     else
    151  1.1.2.2  uebayasi         e->arpq_head = m;
    152  1.1.2.2  uebayasi     e->arpq_tail = m;
    153  1.1.2.2  uebayasi }
    154  1.1.2.2  uebayasi 
    155  1.1.2.2  uebayasi int
    156  1.1.2.2  uebayasi t3_l2t_send_slow(struct toedev *dev, struct mbuf *m,
    157  1.1.2.2  uebayasi              struct l2t_entry *e)
    158  1.1.2.2  uebayasi {
    159  1.1.2.2  uebayasi     struct rtentry *rt;
    160  1.1.2.2  uebayasi     struct mbuf *m0;
    161  1.1.2.2  uebayasi 
    162  1.1.2.2  uebayasi     if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
    163  1.1.2.2  uebayasi         return (ENOMEM);
    164  1.1.2.2  uebayasi 
    165  1.1.2.2  uebayasi     rt = e->neigh;
    166  1.1.2.2  uebayasi 
    167  1.1.2.2  uebayasi again:
    168  1.1.2.2  uebayasi     switch (e->state) {
    169  1.1.2.2  uebayasi     case L2T_STATE_STALE:     /* entry is stale, kick off revalidation */
    170  1.1.2.2  uebayasi         arpresolve(rt->rt_ifp, rt, m0, rt->rt_gateway, RT_ENADDR(rt));
    171  1.1.2.2  uebayasi         mtx_lock(&e->lock);
    172  1.1.2.2  uebayasi         if (e->state == L2T_STATE_STALE)
    173  1.1.2.2  uebayasi             e->state = L2T_STATE_VALID;
    174  1.1.2.2  uebayasi         mtx_unlock(&e->lock);
    175  1.1.2.2  uebayasi     case L2T_STATE_VALID:     /* fast-path, send the packet on */
    176  1.1.2.2  uebayasi     case L2T_STATE_RESOLVING:
    177  1.1.2.2  uebayasi         mtx_lock(&e->lock);
    178  1.1.2.2  uebayasi         if (e->state != L2T_STATE_RESOLVING) { // ARP already completed
    179  1.1.2.2  uebayasi             mtx_unlock(&e->lock);
    180  1.1.2.2  uebayasi             goto again;
    181  1.1.2.2  uebayasi         }
    182  1.1.2.2  uebayasi         arpq_enqueue(e, m);
    183  1.1.2.2  uebayasi         mtx_unlock(&e->lock);
    184  1.1.2.2  uebayasi 
    185  1.1.2.2  uebayasi         if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
    186  1.1.2.2  uebayasi             return (ENOMEM);
    187  1.1.2.2  uebayasi         /*
    188  1.1.2.2  uebayasi          * Only the first packet added to the arpq should kick off
    189  1.1.2.2  uebayasi          * resolution.  However, because the m_gethdr below can fail,
    190  1.1.2.2  uebayasi          * we allow each packet added to the arpq to retry resolution
    191  1.1.2.2  uebayasi          * as a way of recovering from transient memory exhaustion.
    192  1.1.2.2  uebayasi          * A better way would be to use a work request to retry L2T
    193  1.1.2.2  uebayasi          * entries when there's no memory.
    194  1.1.2.2  uebayasi          */
    195  1.1.2.2  uebayasi         if (arpresolve(rt->rt_ifp, rt, m0, rt->rt_gateway, RT_ENADDR(rt)) == 0) {
    196  1.1.2.2  uebayasi 
    197  1.1.2.2  uebayasi             mtx_lock(&e->lock);
    198  1.1.2.2  uebayasi             if (e->arpq_head)
    199  1.1.2.2  uebayasi                 setup_l2e_send_pending(dev, m, e);
    200  1.1.2.2  uebayasi             else
    201  1.1.2.2  uebayasi                 m_freem(m);
    202  1.1.2.2  uebayasi             mtx_unlock(&e->lock);
    203  1.1.2.2  uebayasi         }
    204  1.1.2.2  uebayasi     }
    205  1.1.2.2  uebayasi     return 0;
    206  1.1.2.2  uebayasi }
    207  1.1.2.2  uebayasi 
    208  1.1.2.2  uebayasi void
    209  1.1.2.2  uebayasi t3_l2t_send_event(struct toedev *dev, struct l2t_entry *e)
    210  1.1.2.2  uebayasi {
    211  1.1.2.2  uebayasi     struct rtentry *rt;
    212  1.1.2.2  uebayasi     struct mbuf *m0;
    213  1.1.2.2  uebayasi 
    214  1.1.2.2  uebayasi     if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
    215  1.1.2.2  uebayasi         return;
    216  1.1.2.2  uebayasi 
    217  1.1.2.2  uebayasi     rt = e->neigh;
    218  1.1.2.2  uebayasi again:
    219  1.1.2.2  uebayasi     switch (e->state) {
    220  1.1.2.2  uebayasi     case L2T_STATE_STALE:     /* entry is stale, kick off revalidation */
    221  1.1.2.2  uebayasi         arpresolve(rt->rt_ifp, rt, m0, rt->rt_gateway, RT_ENADDR(rt));
    222  1.1.2.2  uebayasi         mtx_lock(&e->lock);
    223  1.1.2.2  uebayasi         if (e->state == L2T_STATE_STALE) {
    224  1.1.2.2  uebayasi             e->state = L2T_STATE_VALID;
    225  1.1.2.2  uebayasi         }
    226  1.1.2.2  uebayasi         mtx_unlock(&e->lock);
    227  1.1.2.2  uebayasi         return;
    228  1.1.2.2  uebayasi     case L2T_STATE_VALID:     /* fast-path, send the packet on */
    229  1.1.2.2  uebayasi         return;
    230  1.1.2.2  uebayasi     case L2T_STATE_RESOLVING:
    231  1.1.2.2  uebayasi         mtx_lock(&e->lock);
    232  1.1.2.2  uebayasi         if (e->state != L2T_STATE_RESOLVING) { // ARP already completed
    233  1.1.2.2  uebayasi             mtx_unlock(&e->lock);
    234  1.1.2.2  uebayasi             goto again;
    235  1.1.2.2  uebayasi         }
    236  1.1.2.2  uebayasi         mtx_unlock(&e->lock);
    237  1.1.2.2  uebayasi 
    238  1.1.2.2  uebayasi         if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
    239  1.1.2.2  uebayasi             return;
    240  1.1.2.2  uebayasi         /*
    241  1.1.2.2  uebayasi          * Only the first packet added to the arpq should kick off
    242  1.1.2.2  uebayasi          * resolution.  However, because the alloc_skb below can fail,
    243  1.1.2.2  uebayasi          * we allow each packet added to the arpq to retry resolution
    244  1.1.2.2  uebayasi          * as a way of recovering from transient memory exhaustion.
    245  1.1.2.2  uebayasi          * A better way would be to use a work request to retry L2T
    246  1.1.2.2  uebayasi          * entries when there's no memory.
    247  1.1.2.2  uebayasi          */
    248  1.1.2.2  uebayasi         arpresolve(rt->rt_ifp, rt, m0, rt->rt_gateway, RT_ENADDR(rt));
    249  1.1.2.2  uebayasi 
    250  1.1.2.2  uebayasi     }
    251  1.1.2.2  uebayasi     return;
    252  1.1.2.2  uebayasi }
    253  1.1.2.2  uebayasi /*
    254  1.1.2.2  uebayasi  * Allocate a free L2T entry.  Must be called with l2t_data.lock held.
    255  1.1.2.2  uebayasi  */
    256  1.1.2.2  uebayasi static struct l2t_entry *
    257  1.1.2.2  uebayasi alloc_l2e(struct l2t_data *d)
    258  1.1.2.2  uebayasi {
    259  1.1.2.2  uebayasi     struct l2t_entry *end, *e, **p;
    260  1.1.2.2  uebayasi 
    261  1.1.2.2  uebayasi     if (!atomic_load_acq_int(&d->nfree))
    262  1.1.2.2  uebayasi         return NULL;
    263  1.1.2.2  uebayasi 
    264  1.1.2.2  uebayasi     /* there's definitely a free entry */
    265  1.1.2.2  uebayasi     for (e = d->rover, end = &d->l2tab[d->nentries]; e != end; ++e)
    266  1.1.2.2  uebayasi         if (atomic_load_acq_int(&e->refcnt) == 0)
    267  1.1.2.2  uebayasi             goto found;
    268  1.1.2.2  uebayasi 
    269  1.1.2.2  uebayasi     for (e = &d->l2tab[1]; atomic_load_acq_int(&e->refcnt); ++e) ;
    270  1.1.2.2  uebayasi found:
    271  1.1.2.2  uebayasi     d->rover = e + 1;
    272  1.1.2.2  uebayasi     atomic_add_int(&d->nfree, -1);
    273  1.1.2.2  uebayasi 
    274  1.1.2.2  uebayasi     /*
    275  1.1.2.2  uebayasi      * The entry we found may be an inactive entry that is
    276  1.1.2.2  uebayasi      * presently in the hash table.  We need to remove it.
    277  1.1.2.2  uebayasi      */
    278  1.1.2.2  uebayasi     if (e->state != L2T_STATE_UNUSED) {
    279  1.1.2.2  uebayasi         int hash = arp_hash(e->addr, e->ifindex, d);
    280  1.1.2.2  uebayasi 
    281  1.1.2.2  uebayasi         for (p = &d->l2tab[hash].first; *p; p = &(*p)->next)
    282  1.1.2.2  uebayasi             if (*p == e) {
    283  1.1.2.2  uebayasi                 *p = e->next;
    284  1.1.2.2  uebayasi                 break;
    285  1.1.2.2  uebayasi             }
    286  1.1.2.2  uebayasi         e->state = L2T_STATE_UNUSED;
    287  1.1.2.2  uebayasi     }
    288  1.1.2.2  uebayasi     return e;
    289  1.1.2.2  uebayasi }
    290  1.1.2.2  uebayasi 
    291  1.1.2.2  uebayasi /*
    292  1.1.2.2  uebayasi  * Called when an L2T entry has no more users.  The entry is left in the hash
    293  1.1.2.2  uebayasi  * table since it is likely to be reused but we also bump nfree to indicate
    294  1.1.2.2  uebayasi  * that the entry can be reallocated for a different neighbor.  We also drop
    295  1.1.2.2  uebayasi  * the existing neighbor reference in case the neighbor is going away and is
    296  1.1.2.2  uebayasi  * waiting on our reference.
    297  1.1.2.2  uebayasi  *
    298  1.1.2.2  uebayasi  * Because entries can be reallocated to other neighbors once their ref count
    299  1.1.2.2  uebayasi  * drops to 0 we need to take the entry's lock to avoid races with a new
    300  1.1.2.2  uebayasi  * incarnation.
    301  1.1.2.2  uebayasi  */
    302  1.1.2.2  uebayasi void
    303  1.1.2.2  uebayasi t3_l2e_free(struct l2t_data *d, struct l2t_entry *e)
    304  1.1.2.2  uebayasi {
    305  1.1.2.2  uebayasi     mtx_lock(&e->lock);
    306  1.1.2.2  uebayasi     if (atomic_load_acq_int(&e->refcnt) == 0) {  /* hasn't been recycled */
    307  1.1.2.2  uebayasi         if (e->neigh) {
    308  1.1.2.2  uebayasi             RT_LOCK(e->neigh);
    309  1.1.2.2  uebayasi             RT_REMREF(e->neigh);
    310  1.1.2.2  uebayasi             RT_UNLOCK(e->neigh);
    311  1.1.2.2  uebayasi             e->neigh = NULL;
    312  1.1.2.2  uebayasi         }
    313  1.1.2.2  uebayasi     }
    314  1.1.2.2  uebayasi     mtx_unlock(&e->lock);
    315  1.1.2.2  uebayasi     atomic_add_int(&d->nfree, 1);
    316  1.1.2.2  uebayasi }
    317  1.1.2.2  uebayasi 
    318  1.1.2.2  uebayasi /*
    319  1.1.2.2  uebayasi  * Update an L2T entry that was previously used for the same next hop as neigh.
    320  1.1.2.2  uebayasi  * Must be called with softirqs disabled.
    321  1.1.2.2  uebayasi  */
    322  1.1.2.2  uebayasi static inline void
    323  1.1.2.2  uebayasi reuse_entry(struct l2t_entry *e, struct rtentry *neigh)
    324  1.1.2.2  uebayasi {
    325  1.1.2.2  uebayasi     struct llinfo_arp *la;
    326  1.1.2.2  uebayasi 
    327  1.1.2.2  uebayasi     la = (struct llinfo_arp *)neigh->rt_llinfo;
    328  1.1.2.2  uebayasi 
    329  1.1.2.2  uebayasi     mtx_lock(&e->lock);                /* avoid race with t3_l2t_free */
    330  1.1.2.2  uebayasi     if (neigh != e->neigh)
    331  1.1.2.2  uebayasi         neigh_replace(e, neigh);
    332  1.1.2.2  uebayasi 
    333  1.1.2.2  uebayasi     if (memcmp(e->dmac, RT_ENADDR(neigh), sizeof(e->dmac)) ||
    334  1.1.2.2  uebayasi         (neigh->rt_expire > time_uptime))
    335  1.1.2.2  uebayasi         e->state = L2T_STATE_RESOLVING;
    336  1.1.2.2  uebayasi     else if (la->la_hold == NULL)
    337  1.1.2.2  uebayasi         e->state = L2T_STATE_VALID;
    338  1.1.2.2  uebayasi     else
    339  1.1.2.2  uebayasi         e->state = L2T_STATE_STALE;
    340  1.1.2.2  uebayasi     mtx_unlock(&e->lock);
    341  1.1.2.2  uebayasi }
    342  1.1.2.2  uebayasi 
    343  1.1.2.2  uebayasi struct l2t_entry *
    344  1.1.2.2  uebayasi t3_l2t_get(struct toedev *dev, struct rtentry *neigh,
    345  1.1.2.2  uebayasi                  unsigned int smt_idx)
    346  1.1.2.2  uebayasi {
    347  1.1.2.2  uebayasi     struct l2t_entry *e;
    348  1.1.2.2  uebayasi     struct l2t_data *d = L2DATA(dev);
    349  1.1.2.2  uebayasi     u32 addr = *(u32 *)neigh->_rt_key;
    350  1.1.2.2  uebayasi     int ifidx = neigh->rt_ifp->if_index;
    351  1.1.2.2  uebayasi     int hash = arp_hash(addr, ifidx, d);
    352  1.1.2.2  uebayasi 
    353  1.1.2.2  uebayasi     rw_wlock(&d->lock);
    354  1.1.2.2  uebayasi     for (e = d->l2tab[hash].first; e; e = e->next)
    355  1.1.2.2  uebayasi         if (e->addr == addr && e->ifindex == ifidx &&
    356  1.1.2.2  uebayasi             e->smt_idx == smt_idx) {
    357  1.1.2.2  uebayasi             l2t_hold(d, e);
    358  1.1.2.2  uebayasi             if (atomic_load_acq_int(&e->refcnt) == 1)
    359  1.1.2.2  uebayasi                 reuse_entry(e, neigh);
    360  1.1.2.2  uebayasi             goto done;
    361  1.1.2.2  uebayasi         }
    362  1.1.2.2  uebayasi 
    363  1.1.2.2  uebayasi     /* Need to allocate a new entry */
    364  1.1.2.2  uebayasi     e = alloc_l2e(d);
    365  1.1.2.2  uebayasi     if (e) {
    366  1.1.2.2  uebayasi         mtx_lock(&e->lock);          /* avoid race with t3_l2t_free */
    367  1.1.2.2  uebayasi         e->next = d->l2tab[hash].first;
    368  1.1.2.2  uebayasi         d->l2tab[hash].first = e;
    369  1.1.2.2  uebayasi         e->state = L2T_STATE_RESOLVING;
    370  1.1.2.2  uebayasi         e->addr = addr;
    371  1.1.2.2  uebayasi         e->ifindex = ifidx;
    372  1.1.2.2  uebayasi         e->smt_idx = smt_idx;
    373  1.1.2.2  uebayasi         atomic_store_rel_int(&e->refcnt, 1);
    374  1.1.2.2  uebayasi         neigh_replace(e, neigh);
    375  1.1.2.2  uebayasi #ifdef notyet
    376  1.1.2.2  uebayasi         /*
    377  1.1.2.2  uebayasi          * XXX need to add accessor function for vlan tag
    378  1.1.2.2  uebayasi          */
    379  1.1.2.2  uebayasi         if (neigh->rt_ifp->if_vlantrunk)
    380  1.1.2.2  uebayasi             e->vlan = VLAN_DEV_INFO(neigh->dev)->vlan_id;
    381  1.1.2.2  uebayasi         else
    382  1.1.2.2  uebayasi #endif
    383  1.1.2.2  uebayasi             e->vlan = VLAN_NONE;
    384  1.1.2.2  uebayasi         mtx_unlock(&e->lock);
    385  1.1.2.2  uebayasi     }
    386  1.1.2.2  uebayasi done:
    387  1.1.2.2  uebayasi     rw_wunlock(&d->lock);
    388  1.1.2.2  uebayasi     return e;
    389  1.1.2.2  uebayasi }
    390  1.1.2.2  uebayasi 
    391  1.1.2.2  uebayasi /*
    392  1.1.2.2  uebayasi  * Called when address resolution fails for an L2T entry to handle packets
    393  1.1.2.2  uebayasi  * on the arpq head.  If a packet specifies a failure handler it is invoked,
    394  1.1.2.2  uebayasi  * otherwise the packets is sent to the TOE.
    395  1.1.2.2  uebayasi  *
    396  1.1.2.2  uebayasi  * XXX: maybe we should abandon the latter behavior and just require a failure
    397  1.1.2.2  uebayasi  * handler.
    398  1.1.2.2  uebayasi  */
    399  1.1.2.2  uebayasi static void
    400  1.1.2.2  uebayasi handle_failed_resolution(struct toedev *dev, struct mbuf *arpq)
    401  1.1.2.2  uebayasi {
    402  1.1.2.2  uebayasi 
    403  1.1.2.2  uebayasi     while (arpq) {
    404  1.1.2.2  uebayasi         struct mbuf *m = arpq;
    405  1.1.2.2  uebayasi #ifdef notyet
    406  1.1.2.2  uebayasi         struct l2t_mbuf_cb *cb = L2T_MBUF_CB(m);
    407  1.1.2.2  uebayasi #endif
    408  1.1.2.2  uebayasi         arpq = m->m_next;
    409  1.1.2.2  uebayasi         m->m_next = NULL;
    410  1.1.2.2  uebayasi #ifdef notyet
    411  1.1.2.2  uebayasi         if (cb->arp_failure_handler)
    412  1.1.2.2  uebayasi             cb->arp_failure_handler(dev, m);
    413  1.1.2.2  uebayasi         else
    414  1.1.2.2  uebayasi #endif
    415  1.1.2.2  uebayasi     }
    416  1.1.2.2  uebayasi 
    417  1.1.2.2  uebayasi }
    418  1.1.2.2  uebayasi 
    419  1.1.2.2  uebayasi #if defined(NETEVENT) || !defined(CONFIG_CHELSIO_T3_MODULE)
    420  1.1.2.2  uebayasi /*
    421  1.1.2.2  uebayasi  * Called when the host's ARP layer makes a change to some entry that is
    422  1.1.2.2  uebayasi  * loaded into the HW L2 table.
    423  1.1.2.2  uebayasi  */
    424  1.1.2.2  uebayasi void
    425  1.1.2.2  uebayasi t3_l2t_update(struct toedev *dev, struct rtentry *neigh)
    426  1.1.2.2  uebayasi {
    427  1.1.2.2  uebayasi     struct l2t_entry *e;
    428  1.1.2.2  uebayasi     struct mbuf *arpq = NULL;
    429  1.1.2.2  uebayasi     struct l2t_data *d = L2DATA(dev);
    430  1.1.2.2  uebayasi     u32 addr = *(u32 *)neigh->_rt_key;
    431  1.1.2.2  uebayasi     int ifidx = neigh->rt_ifp->if_index;
    432  1.1.2.2  uebayasi     int hash = arp_hash(addr, ifidx, d);
    433  1.1.2.2  uebayasi     struct llinfo_arp *la;
    434  1.1.2.2  uebayasi 
    435  1.1.2.2  uebayasi     rw_rlock(&d->lock);
    436  1.1.2.2  uebayasi     for (e = d->l2tab[hash].first; e; e = e->next)
    437  1.1.2.2  uebayasi         if (e->addr == addr && e->ifindex == ifidx) {
    438  1.1.2.2  uebayasi             mtx_lock(&e->lock);
    439  1.1.2.2  uebayasi             goto found;
    440  1.1.2.2  uebayasi         }
    441  1.1.2.2  uebayasi     rw_runlock(&d->lock);
    442  1.1.2.2  uebayasi     return;
    443  1.1.2.2  uebayasi 
    444  1.1.2.2  uebayasi found:
    445  1.1.2.2  uebayasi     rw_runlock(&d->lock);
    446  1.1.2.2  uebayasi     if (atomic_load_acq_int(&e->refcnt)) {
    447  1.1.2.2  uebayasi         if (neigh != e->neigh)
    448  1.1.2.2  uebayasi             neigh_replace(e, neigh);
    449  1.1.2.2  uebayasi 
    450  1.1.2.2  uebayasi         la = (struct llinfo_arp *)neigh->rt_llinfo;
    451  1.1.2.2  uebayasi         if (e->state == L2T_STATE_RESOLVING) {
    452  1.1.2.2  uebayasi 
    453  1.1.2.2  uebayasi             if (la->la_asked >= 5 /* arp_maxtries */) {
    454  1.1.2.2  uebayasi                 arpq = e->arpq_head;
    455  1.1.2.2  uebayasi                 e->arpq_head = e->arpq_tail = NULL;
    456  1.1.2.2  uebayasi             } else if (la->la_hold == NULL)
    457  1.1.2.2  uebayasi                 setup_l2e_send_pending(dev, NULL, e);
    458  1.1.2.2  uebayasi         } else {
    459  1.1.2.2  uebayasi             e->state = (la->la_hold == NULL) ?
    460  1.1.2.2  uebayasi                 L2T_STATE_VALID : L2T_STATE_STALE;
    461  1.1.2.2  uebayasi             if (memcmp(e->dmac, RT_ENADDR(neigh), 6))
    462  1.1.2.2  uebayasi                 setup_l2e_send_pending(dev, NULL, e);
    463  1.1.2.2  uebayasi         }
    464  1.1.2.2  uebayasi     }
    465  1.1.2.2  uebayasi     mtx_unlock(&e->lock);
    466  1.1.2.2  uebayasi 
    467  1.1.2.2  uebayasi     if (arpq)
    468  1.1.2.2  uebayasi         handle_failed_resolution(dev, arpq);
    469  1.1.2.2  uebayasi }
    470  1.1.2.2  uebayasi #else
    471  1.1.2.2  uebayasi /*
    472  1.1.2.2  uebayasi  * Called from a kprobe, interrupts are off.
    473  1.1.2.2  uebayasi  */
    474  1.1.2.2  uebayasi void
    475  1.1.2.2  uebayasi t3_l2t_update(struct toedev *dev, struct rtentry *neigh)
    476  1.1.2.2  uebayasi {
    477  1.1.2.2  uebayasi     struct l2t_entry *e;
    478  1.1.2.2  uebayasi     struct l2t_data *d = L2DATA(dev);
    479  1.1.2.2  uebayasi     u32 addr = *(u32 *) rt_key(neigh);
    480  1.1.2.2  uebayasi     int ifidx = neigh->dev->ifindex;
    481  1.1.2.2  uebayasi     int hash = arp_hash(addr, ifidx, d);
    482  1.1.2.2  uebayasi 
    483  1.1.2.2  uebayasi     rw_rlock(&d->lock);
    484  1.1.2.2  uebayasi     for (e = d->l2tab[hash].first; e; e = e->next)
    485  1.1.2.2  uebayasi         if (e->addr == addr && e->ifindex == ifidx) {
    486  1.1.2.2  uebayasi             mtx_lock(&e->lock);
    487  1.1.2.2  uebayasi             if (atomic_load_acq_int(&e->refcnt)) {
    488  1.1.2.2  uebayasi                 if (neigh != e->neigh)
    489  1.1.2.2  uebayasi                     neigh_replace(e, neigh);
    490  1.1.2.2  uebayasi                 e->tdev = dev;
    491  1.1.2.2  uebayasi                 mod_timer(&e->update_timer, jiffies + 1);
    492  1.1.2.2  uebayasi             }
    493  1.1.2.2  uebayasi             mtx_unlock(&e->lock);
    494  1.1.2.2  uebayasi             break;
    495  1.1.2.2  uebayasi         }
    496  1.1.2.2  uebayasi     rw_runlock(&d->lock);
    497  1.1.2.2  uebayasi }
    498  1.1.2.2  uebayasi 
    499  1.1.2.2  uebayasi static void
    500  1.1.2.2  uebayasi update_timer_cb(unsigned long data)
    501  1.1.2.2  uebayasi {
    502  1.1.2.2  uebayasi     struct mbuf *arpq = NULL;
    503  1.1.2.2  uebayasi     struct l2t_entry *e = (struct l2t_entry *)data;
    504  1.1.2.2  uebayasi     struct rtentry *neigh = e->neigh;
    505  1.1.2.2  uebayasi     struct toedev *dev = e->tdev;
    506  1.1.2.2  uebayasi 
    507  1.1.2.2  uebayasi     barrier();
    508  1.1.2.2  uebayasi     if (!atomic_load_acq_int(&e->refcnt))
    509  1.1.2.2  uebayasi         return;
    510  1.1.2.2  uebayasi 
    511  1.1.2.2  uebayasi     rw_rlock(&neigh->lock);
    512  1.1.2.2  uebayasi     mtx_lock(&e->lock);
    513  1.1.2.2  uebayasi 
    514  1.1.2.2  uebayasi     if (atomic_load_acq_int(&e->refcnt)) {
    515  1.1.2.2  uebayasi         if (e->state == L2T_STATE_RESOLVING) {
    516  1.1.2.2  uebayasi             if (neigh->nud_state & NUD_FAILED) {
    517  1.1.2.2  uebayasi                 arpq = e->arpq_head;
    518  1.1.2.2  uebayasi                 e->arpq_head = e->arpq_tail = NULL;
    519  1.1.2.2  uebayasi             } else if (neigh_is_connected(neigh) && e->arpq_head)
    520  1.1.2.2  uebayasi                 setup_l2e_send_pending(dev, NULL, e);
    521  1.1.2.2  uebayasi         } else {
    522  1.1.2.2  uebayasi             e->state = neigh_is_connected(neigh) ?
    523  1.1.2.2  uebayasi                 L2T_STATE_VALID : L2T_STATE_STALE;
    524  1.1.2.2  uebayasi             if (memcmp(e->dmac, RT_ENADDR(neigh), sizeof(e->dmac)))
    525  1.1.2.2  uebayasi                 setup_l2e_send_pending(dev, NULL, e);
    526  1.1.2.2  uebayasi         }
    527  1.1.2.2  uebayasi     }
    528  1.1.2.2  uebayasi     mtx_unlock(&e->lock);
    529  1.1.2.2  uebayasi     rw_runlock(&neigh->lock);
    530  1.1.2.2  uebayasi 
    531  1.1.2.2  uebayasi     if (arpq)
    532  1.1.2.2  uebayasi         handle_failed_resolution(dev, arpq);
    533  1.1.2.2  uebayasi }
    534  1.1.2.2  uebayasi #endif
    535  1.1.2.2  uebayasi 
    536  1.1.2.2  uebayasi struct l2t_data *
    537  1.1.2.2  uebayasi t3_init_l2t(unsigned int l2t_capacity)
    538  1.1.2.2  uebayasi {
    539  1.1.2.2  uebayasi     struct l2t_data *d;
    540  1.1.2.2  uebayasi     int i, size = sizeof(*d) + l2t_capacity * sizeof(struct l2t_entry);
    541  1.1.2.2  uebayasi 
    542  1.1.2.2  uebayasi     d = cxgb_alloc_mem(size);
    543  1.1.2.2  uebayasi     if (!d)
    544  1.1.2.2  uebayasi         return NULL;
    545  1.1.2.2  uebayasi 
    546  1.1.2.2  uebayasi     d->nentries = l2t_capacity;
    547  1.1.2.2  uebayasi     d->rover = &d->l2tab[1];    /* entry 0 is not used */
    548  1.1.2.2  uebayasi     atomic_store_rel_int(&d->nfree, l2t_capacity - 1);
    549  1.1.2.2  uebayasi     rw_init(&d->lock, "L2T");
    550  1.1.2.2  uebayasi 
    551  1.1.2.2  uebayasi     for (i = 0; i < l2t_capacity; ++i) {
    552  1.1.2.2  uebayasi         d->l2tab[i].idx = i;
    553  1.1.2.2  uebayasi         d->l2tab[i].state = L2T_STATE_UNUSED;
    554  1.1.2.2  uebayasi         mtx_init(&d->l2tab[i].lock, "L2TAB", NULL, MTX_DEF);
    555  1.1.2.2  uebayasi         atomic_store_rel_int(&d->l2tab[i].refcnt, 0);
    556  1.1.2.2  uebayasi #ifndef NETEVENT
    557  1.1.2.2  uebayasi #ifdef CONFIG_CHELSIO_T3_MODULE
    558  1.1.2.2  uebayasi         setup_timer(&d->l2tab[i].update_timer, update_timer_cb,
    559  1.1.2.2  uebayasi                 (unsigned long)&d->l2tab[i]);
    560  1.1.2.2  uebayasi #endif
    561  1.1.2.2  uebayasi #endif
    562  1.1.2.2  uebayasi     }
    563  1.1.2.2  uebayasi     return d;
    564  1.1.2.2  uebayasi }
    565  1.1.2.2  uebayasi 
    566  1.1.2.2  uebayasi void
    567  1.1.2.2  uebayasi t3_free_l2t(struct l2t_data *d)
    568  1.1.2.2  uebayasi {
    569  1.1.2.2  uebayasi #ifndef NETEVENT
    570  1.1.2.2  uebayasi #ifdef CONFIG_CHELSIO_T3_MODULE
    571  1.1.2.2  uebayasi     int i;
    572  1.1.2.2  uebayasi 
    573  1.1.2.2  uebayasi     /* Stop all L2T timers */
    574  1.1.2.2  uebayasi     for (i = 0; i < d->nentries; ++i)
    575  1.1.2.2  uebayasi         del_timer_sync(&d->l2tab[i].update_timer);
    576  1.1.2.2  uebayasi #endif
    577  1.1.2.2  uebayasi #endif
    578  1.1.2.2  uebayasi     cxgb_free_mem(d);
    579  1.1.2.2  uebayasi }
    580  1.1.2.2  uebayasi 
    581  1.1.2.2  uebayasi #ifdef CONFIG_PROC_FS
    582  1.1.2.2  uebayasi #include <linux/module.h>
    583  1.1.2.2  uebayasi #include <linux/proc_fs.h>
    584  1.1.2.2  uebayasi #include <linux/seq_file.h>
    585  1.1.2.2  uebayasi 
    586  1.1.2.2  uebayasi static inline void *
    587  1.1.2.2  uebayasi l2t_get_idx(struct seq_file *seq, loff_t pos)
    588  1.1.2.2  uebayasi {
    589  1.1.2.2  uebayasi     struct l2t_data *d = seq->private;
    590  1.1.2.2  uebayasi 
    591  1.1.2.2  uebayasi     return pos >= d->nentries ? NULL : &d->l2tab[pos];
    592  1.1.2.2  uebayasi }
    593  1.1.2.2  uebayasi 
    594  1.1.2.2  uebayasi static void *
    595  1.1.2.2  uebayasi l2t_seq_start(struct seq_file *seq, loff_t *pos)
    596  1.1.2.2  uebayasi {
    597  1.1.2.2  uebayasi     return *pos ? l2t_get_idx(seq, *pos) : SEQ_START_TOKEN;
    598  1.1.2.2  uebayasi }
    599  1.1.2.2  uebayasi 
    600  1.1.2.2  uebayasi static void *
    601  1.1.2.2  uebayasi l2t_seq_next(struct seq_file *seq, void *v, loff_t *pos)
    602  1.1.2.2  uebayasi {
    603  1.1.2.2  uebayasi     v = l2t_get_idx(seq, *pos + 1);
    604  1.1.2.2  uebayasi     if (v)
    605  1.1.2.2  uebayasi         ++*pos;
    606  1.1.2.2  uebayasi     return v;
    607  1.1.2.2  uebayasi }
    608  1.1.2.2  uebayasi 
    609  1.1.2.2  uebayasi static void
    610  1.1.2.2  uebayasi l2t_seq_stop(struct seq_file *seq, void *v)
    611  1.1.2.2  uebayasi {
    612  1.1.2.2  uebayasi }
    613  1.1.2.2  uebayasi 
    614  1.1.2.2  uebayasi static char
    615  1.1.2.2  uebayasi l2e_state(const struct l2t_entry *e)
    616  1.1.2.2  uebayasi {
    617  1.1.2.2  uebayasi     switch (e->state) {
    618  1.1.2.2  uebayasi     case L2T_STATE_VALID: return 'V';  /* valid, fast-path entry */
    619  1.1.2.2  uebayasi     case L2T_STATE_STALE: return 'S';  /* needs revalidation, but usable */
    620  1.1.2.2  uebayasi     case L2T_STATE_RESOLVING:
    621  1.1.2.2  uebayasi         return e->arpq_head ? 'A' : 'R';
    622  1.1.2.2  uebayasi     default:
    623  1.1.2.2  uebayasi         return 'U';
    624  1.1.2.2  uebayasi     }
    625  1.1.2.2  uebayasi }
    626  1.1.2.2  uebayasi 
    627  1.1.2.2  uebayasi static int
    628  1.1.2.2  uebayasi l2t_seq_show(struct seq_file *seq, void *v)
    629  1.1.2.2  uebayasi {
    630  1.1.2.2  uebayasi     if (v == SEQ_START_TOKEN)
    631  1.1.2.2  uebayasi         seq_puts(seq, "Index IP address      Ethernet address   VLAN  "
    632  1.1.2.2  uebayasi              "Prio  State   Users SMTIDX  Port\n");
    633  1.1.2.2  uebayasi     else {
    634  1.1.2.2  uebayasi         char ip[20];
    635  1.1.2.2  uebayasi         struct l2t_entry *e = v;
    636  1.1.2.2  uebayasi 
    637  1.1.2.2  uebayasi         mtx_lock(&e->lock);
    638  1.1.2.2  uebayasi         sprintf(ip, "%u.%u.%u.%u", NIPQUAD(e->addr));
    639  1.1.2.2  uebayasi         seq_printf(seq, "%-5u %-15s %02x:%02x:%02x:%02x:%02x:%02x  %4d"
    640  1.1.2.2  uebayasi                "  %3u     %c   %7u   %4u %s\n",
    641  1.1.2.2  uebayasi                e->idx, ip, e->dmac[0], e->dmac[1], e->dmac[2],
    642  1.1.2.2  uebayasi                e->dmac[3], e->dmac[4], e->dmac[5],
    643  1.1.2.2  uebayasi                e->vlan & EVL_VLID_MASK, vlan_prio(e),
    644  1.1.2.2  uebayasi                l2e_state(e), atomic_load_acq_int(&e->refcnt), e->smt_idx,
    645  1.1.2.2  uebayasi                e->neigh ? e->neigh->dev->name : "");
    646  1.1.2.2  uebayasi         mtx_unlock(&e->lock);
    647  1.1.2.2  uebayasi     }
    648  1.1.2.2  uebayasi     return 0;
    649  1.1.2.2  uebayasi }
    650  1.1.2.2  uebayasi 
    651  1.1.2.2  uebayasi #endif
    652