cxgb_l2t.c revision 1.1.2.2 1 1.1.2.2 uebayasi /**************************************************************************
2 1.1.2.2 uebayasi
3 1.1.2.2 uebayasi Copyright (c) 2007, Chelsio Inc.
4 1.1.2.2 uebayasi All rights reserved.
5 1.1.2.2 uebayasi
6 1.1.2.2 uebayasi Redistribution and use in source and binary forms, with or without
7 1.1.2.2 uebayasi modification, are permitted provided that the following conditions are met:
8 1.1.2.2 uebayasi
9 1.1.2.2 uebayasi 1. Redistributions of source code must retain the above copyright notice,
10 1.1.2.2 uebayasi this list of conditions and the following disclaimer.
11 1.1.2.2 uebayasi
12 1.1.2.2 uebayasi 2. Neither the name of the Chelsio Corporation nor the names of its
13 1.1.2.2 uebayasi contributors may be used to endorse or promote products derived from
14 1.1.2.2 uebayasi this software without specific prior written permission.
15 1.1.2.2 uebayasi
16 1.1.2.2 uebayasi THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 1.1.2.2 uebayasi AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 1.1.2.2 uebayasi IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 1.1.2.2 uebayasi ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 1.1.2.2 uebayasi LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 1.1.2.2 uebayasi CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 1.1.2.2 uebayasi SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 1.1.2.2 uebayasi INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 1.1.2.2 uebayasi CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 1.1.2.2 uebayasi ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 1.1.2.2 uebayasi POSSIBILITY OF SUCH DAMAGE.
27 1.1.2.2 uebayasi
28 1.1.2.2 uebayasi ***************************************************************************/
29 1.1.2.2 uebayasi
30 1.1.2.2 uebayasi #include <sys/cdefs.h>
31 1.1.2.2 uebayasi __KERNEL_RCSID(0, "$NetBSD: cxgb_l2t.c,v 1.1.2.2 2010/04/30 14:43:44 uebayasi Exp $");
32 1.1.2.2 uebayasi
33 1.1.2.2 uebayasi #include <sys/param.h>
34 1.1.2.2 uebayasi #include <sys/systm.h>
35 1.1.2.2 uebayasi #include <sys/kernel.h>
36 1.1.2.2 uebayasi #include <sys/lock.h>
37 1.1.2.2 uebayasi #include <sys/mutex.h>
38 1.1.2.2 uebayasi
39 1.1.2.2 uebayasi #include <sys/socket.h>
40 1.1.2.2 uebayasi #include <sys/socketvar.h>
41 1.1.2.2 uebayasi #include <net/if.h>
42 1.1.2.2 uebayasi #include <netinet/in.h>
43 1.1.2.2 uebayasi #include <netinet/in_var.h>
44 1.1.2.2 uebayasi #include <netinet/if_inarp.h>
45 1.1.2.2 uebayasi #include <net/if_dl.h>
46 1.1.2.2 uebayasi #include <net/route.h>
47 1.1.2.2 uebayasi #include <netinet/in.h>
48 1.1.2.2 uebayasi
49 1.1.2.2 uebayasi #ifdef CONFIG_DEFINED
50 1.1.2.2 uebayasi #include <cxgb_include.h>
51 1.1.2.2 uebayasi #else
52 1.1.2.2 uebayasi #include "cxgb_include.h"
53 1.1.2.2 uebayasi #endif
54 1.1.2.2 uebayasi
55 1.1.2.2 uebayasi #define VLAN_NONE 0xfff
56 1.1.2.2 uebayasi #define SDL(s) ((struct sockaddr_dl *)s)
57 1.1.2.2 uebayasi #define RT_ENADDR(rt) ((u_char *)LLADDR(SDL((rt))))
58 1.1.2.2 uebayasi #define rt_expire rt_rmx.rmx_expire
59 1.1.2.2 uebayasi
60 1.1.2.2 uebayasi /*
61 1.1.2.2 uebayasi * Module locking notes: There is a RW lock protecting the L2 table as a
62 1.1.2.2 uebayasi * whole plus a spinlock per L2T entry. Entry lookups and allocations happen
63 1.1.2.2 uebayasi * under the protection of the table lock, individual entry changes happen
64 1.1.2.2 uebayasi * while holding that entry's spinlock. The table lock nests outside the
65 1.1.2.2 uebayasi * entry locks. Allocations of new entries take the table lock as writers so
66 1.1.2.2 uebayasi * no other lookups can happen while allocating new entries. Entry updates
67 1.1.2.2 uebayasi * take the table lock as readers so multiple entries can be updated in
68 1.1.2.2 uebayasi * parallel. An L2T entry can be dropped by decrementing its reference count
69 1.1.2.2 uebayasi * and therefore can happen in parallel with entry allocation but no entry
70 1.1.2.2 uebayasi * can change state or increment its ref count during allocation as both of
71 1.1.2.2 uebayasi * these perform lookups.
72 1.1.2.2 uebayasi */
73 1.1.2.2 uebayasi
74 1.1.2.2 uebayasi static inline unsigned int
75 1.1.2.2 uebayasi vlan_prio(const struct l2t_entry *e)
76 1.1.2.2 uebayasi {
77 1.1.2.2 uebayasi return e->vlan >> 13;
78 1.1.2.2 uebayasi }
79 1.1.2.2 uebayasi
80 1.1.2.2 uebayasi static inline unsigned int
81 1.1.2.2 uebayasi arp_hash(u32 key, int ifindex, const struct l2t_data *d)
82 1.1.2.2 uebayasi {
83 1.1.2.2 uebayasi return jhash_2words(key, ifindex, 0) & (d->nentries - 1);
84 1.1.2.2 uebayasi }
85 1.1.2.2 uebayasi
86 1.1.2.2 uebayasi static inline void
87 1.1.2.2 uebayasi neigh_replace(struct l2t_entry *e, struct rtentry *rt)
88 1.1.2.2 uebayasi {
89 1.1.2.2 uebayasi RT_LOCK(rt);
90 1.1.2.2 uebayasi RT_ADDREF(rt);
91 1.1.2.2 uebayasi RT_UNLOCK(rt);
92 1.1.2.2 uebayasi
93 1.1.2.2 uebayasi if (e->neigh) {
94 1.1.2.2 uebayasi RT_LOCK(e->neigh);
95 1.1.2.2 uebayasi RT_REMREF(e->neigh);
96 1.1.2.2 uebayasi RT_UNLOCK(e->neigh);
97 1.1.2.2 uebayasi }
98 1.1.2.2 uebayasi e->neigh = rt;
99 1.1.2.2 uebayasi }
100 1.1.2.2 uebayasi
101 1.1.2.2 uebayasi /*
102 1.1.2.2 uebayasi * Set up an L2T entry and send any packets waiting in the arp queue. The
103 1.1.2.2 uebayasi * supplied mbuf is used for the CPL_L2T_WRITE_REQ. Must be called with the
104 1.1.2.2 uebayasi * entry locked.
105 1.1.2.2 uebayasi */
106 1.1.2.2 uebayasi static int
107 1.1.2.2 uebayasi setup_l2e_send_pending(struct toedev *dev, struct mbuf *m,
108 1.1.2.2 uebayasi struct l2t_entry *e)
109 1.1.2.2 uebayasi {
110 1.1.2.2 uebayasi struct cpl_l2t_write_req *req;
111 1.1.2.2 uebayasi
112 1.1.2.2 uebayasi if (!m) {
113 1.1.2.2 uebayasi if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
114 1.1.2.2 uebayasi return (ENOMEM);
115 1.1.2.2 uebayasi }
116 1.1.2.2 uebayasi /*
117 1.1.2.2 uebayasi * XXX MH_ALIGN
118 1.1.2.2 uebayasi */
119 1.1.2.2 uebayasi req = mtod(m, struct cpl_l2t_write_req *);
120 1.1.2.2 uebayasi req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
121 1.1.2.2 uebayasi OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx));
122 1.1.2.2 uebayasi req->params = htonl(V_L2T_W_IDX(e->idx) | V_L2T_W_IFF(e->smt_idx) |
123 1.1.2.2 uebayasi V_L2T_W_VLAN(e->vlan & EVL_VLID_MASK) |
124 1.1.2.2 uebayasi V_L2T_W_PRIO(vlan_prio(e)));
125 1.1.2.2 uebayasi
126 1.1.2.2 uebayasi memcpy(e->dmac, RT_ENADDR(e->neigh), sizeof(e->dmac));
127 1.1.2.2 uebayasi memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
128 1.1.2.2 uebayasi m_set_priority(m, CPL_PRIORITY_CONTROL);
129 1.1.2.2 uebayasi while (e->arpq_head) {
130 1.1.2.2 uebayasi m = e->arpq_head;
131 1.1.2.2 uebayasi e->arpq_head = m->m_next;
132 1.1.2.2 uebayasi m->m_next = NULL;
133 1.1.2.2 uebayasi }
134 1.1.2.2 uebayasi e->arpq_tail = NULL;
135 1.1.2.2 uebayasi e->state = L2T_STATE_VALID;
136 1.1.2.2 uebayasi
137 1.1.2.2 uebayasi return 0;
138 1.1.2.2 uebayasi }
139 1.1.2.2 uebayasi
140 1.1.2.2 uebayasi /*
141 1.1.2.2 uebayasi * Add a packet to the an L2T entry's queue of packets awaiting resolution.
142 1.1.2.2 uebayasi * Must be called with the entry's lock held.
143 1.1.2.2 uebayasi */
144 1.1.2.2 uebayasi static inline void
145 1.1.2.2 uebayasi arpq_enqueue(struct l2t_entry *e, struct mbuf *m)
146 1.1.2.2 uebayasi {
147 1.1.2.2 uebayasi m->m_next = NULL;
148 1.1.2.2 uebayasi if (e->arpq_head)
149 1.1.2.2 uebayasi e->arpq_tail->m_next = m;
150 1.1.2.2 uebayasi else
151 1.1.2.2 uebayasi e->arpq_head = m;
152 1.1.2.2 uebayasi e->arpq_tail = m;
153 1.1.2.2 uebayasi }
154 1.1.2.2 uebayasi
155 1.1.2.2 uebayasi int
156 1.1.2.2 uebayasi t3_l2t_send_slow(struct toedev *dev, struct mbuf *m,
157 1.1.2.2 uebayasi struct l2t_entry *e)
158 1.1.2.2 uebayasi {
159 1.1.2.2 uebayasi struct rtentry *rt;
160 1.1.2.2 uebayasi struct mbuf *m0;
161 1.1.2.2 uebayasi
162 1.1.2.2 uebayasi if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
163 1.1.2.2 uebayasi return (ENOMEM);
164 1.1.2.2 uebayasi
165 1.1.2.2 uebayasi rt = e->neigh;
166 1.1.2.2 uebayasi
167 1.1.2.2 uebayasi again:
168 1.1.2.2 uebayasi switch (e->state) {
169 1.1.2.2 uebayasi case L2T_STATE_STALE: /* entry is stale, kick off revalidation */
170 1.1.2.2 uebayasi arpresolve(rt->rt_ifp, rt, m0, rt->rt_gateway, RT_ENADDR(rt));
171 1.1.2.2 uebayasi mtx_lock(&e->lock);
172 1.1.2.2 uebayasi if (e->state == L2T_STATE_STALE)
173 1.1.2.2 uebayasi e->state = L2T_STATE_VALID;
174 1.1.2.2 uebayasi mtx_unlock(&e->lock);
175 1.1.2.2 uebayasi case L2T_STATE_VALID: /* fast-path, send the packet on */
176 1.1.2.2 uebayasi case L2T_STATE_RESOLVING:
177 1.1.2.2 uebayasi mtx_lock(&e->lock);
178 1.1.2.2 uebayasi if (e->state != L2T_STATE_RESOLVING) { // ARP already completed
179 1.1.2.2 uebayasi mtx_unlock(&e->lock);
180 1.1.2.2 uebayasi goto again;
181 1.1.2.2 uebayasi }
182 1.1.2.2 uebayasi arpq_enqueue(e, m);
183 1.1.2.2 uebayasi mtx_unlock(&e->lock);
184 1.1.2.2 uebayasi
185 1.1.2.2 uebayasi if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
186 1.1.2.2 uebayasi return (ENOMEM);
187 1.1.2.2 uebayasi /*
188 1.1.2.2 uebayasi * Only the first packet added to the arpq should kick off
189 1.1.2.2 uebayasi * resolution. However, because the m_gethdr below can fail,
190 1.1.2.2 uebayasi * we allow each packet added to the arpq to retry resolution
191 1.1.2.2 uebayasi * as a way of recovering from transient memory exhaustion.
192 1.1.2.2 uebayasi * A better way would be to use a work request to retry L2T
193 1.1.2.2 uebayasi * entries when there's no memory.
194 1.1.2.2 uebayasi */
195 1.1.2.2 uebayasi if (arpresolve(rt->rt_ifp, rt, m0, rt->rt_gateway, RT_ENADDR(rt)) == 0) {
196 1.1.2.2 uebayasi
197 1.1.2.2 uebayasi mtx_lock(&e->lock);
198 1.1.2.2 uebayasi if (e->arpq_head)
199 1.1.2.2 uebayasi setup_l2e_send_pending(dev, m, e);
200 1.1.2.2 uebayasi else
201 1.1.2.2 uebayasi m_freem(m);
202 1.1.2.2 uebayasi mtx_unlock(&e->lock);
203 1.1.2.2 uebayasi }
204 1.1.2.2 uebayasi }
205 1.1.2.2 uebayasi return 0;
206 1.1.2.2 uebayasi }
207 1.1.2.2 uebayasi
208 1.1.2.2 uebayasi void
209 1.1.2.2 uebayasi t3_l2t_send_event(struct toedev *dev, struct l2t_entry *e)
210 1.1.2.2 uebayasi {
211 1.1.2.2 uebayasi struct rtentry *rt;
212 1.1.2.2 uebayasi struct mbuf *m0;
213 1.1.2.2 uebayasi
214 1.1.2.2 uebayasi if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
215 1.1.2.2 uebayasi return;
216 1.1.2.2 uebayasi
217 1.1.2.2 uebayasi rt = e->neigh;
218 1.1.2.2 uebayasi again:
219 1.1.2.2 uebayasi switch (e->state) {
220 1.1.2.2 uebayasi case L2T_STATE_STALE: /* entry is stale, kick off revalidation */
221 1.1.2.2 uebayasi arpresolve(rt->rt_ifp, rt, m0, rt->rt_gateway, RT_ENADDR(rt));
222 1.1.2.2 uebayasi mtx_lock(&e->lock);
223 1.1.2.2 uebayasi if (e->state == L2T_STATE_STALE) {
224 1.1.2.2 uebayasi e->state = L2T_STATE_VALID;
225 1.1.2.2 uebayasi }
226 1.1.2.2 uebayasi mtx_unlock(&e->lock);
227 1.1.2.2 uebayasi return;
228 1.1.2.2 uebayasi case L2T_STATE_VALID: /* fast-path, send the packet on */
229 1.1.2.2 uebayasi return;
230 1.1.2.2 uebayasi case L2T_STATE_RESOLVING:
231 1.1.2.2 uebayasi mtx_lock(&e->lock);
232 1.1.2.2 uebayasi if (e->state != L2T_STATE_RESOLVING) { // ARP already completed
233 1.1.2.2 uebayasi mtx_unlock(&e->lock);
234 1.1.2.2 uebayasi goto again;
235 1.1.2.2 uebayasi }
236 1.1.2.2 uebayasi mtx_unlock(&e->lock);
237 1.1.2.2 uebayasi
238 1.1.2.2 uebayasi if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
239 1.1.2.2 uebayasi return;
240 1.1.2.2 uebayasi /*
241 1.1.2.2 uebayasi * Only the first packet added to the arpq should kick off
242 1.1.2.2 uebayasi * resolution. However, because the alloc_skb below can fail,
243 1.1.2.2 uebayasi * we allow each packet added to the arpq to retry resolution
244 1.1.2.2 uebayasi * as a way of recovering from transient memory exhaustion.
245 1.1.2.2 uebayasi * A better way would be to use a work request to retry L2T
246 1.1.2.2 uebayasi * entries when there's no memory.
247 1.1.2.2 uebayasi */
248 1.1.2.2 uebayasi arpresolve(rt->rt_ifp, rt, m0, rt->rt_gateway, RT_ENADDR(rt));
249 1.1.2.2 uebayasi
250 1.1.2.2 uebayasi }
251 1.1.2.2 uebayasi return;
252 1.1.2.2 uebayasi }
253 1.1.2.2 uebayasi /*
254 1.1.2.2 uebayasi * Allocate a free L2T entry. Must be called with l2t_data.lock held.
255 1.1.2.2 uebayasi */
256 1.1.2.2 uebayasi static struct l2t_entry *
257 1.1.2.2 uebayasi alloc_l2e(struct l2t_data *d)
258 1.1.2.2 uebayasi {
259 1.1.2.2 uebayasi struct l2t_entry *end, *e, **p;
260 1.1.2.2 uebayasi
261 1.1.2.2 uebayasi if (!atomic_load_acq_int(&d->nfree))
262 1.1.2.2 uebayasi return NULL;
263 1.1.2.2 uebayasi
264 1.1.2.2 uebayasi /* there's definitely a free entry */
265 1.1.2.2 uebayasi for (e = d->rover, end = &d->l2tab[d->nentries]; e != end; ++e)
266 1.1.2.2 uebayasi if (atomic_load_acq_int(&e->refcnt) == 0)
267 1.1.2.2 uebayasi goto found;
268 1.1.2.2 uebayasi
269 1.1.2.2 uebayasi for (e = &d->l2tab[1]; atomic_load_acq_int(&e->refcnt); ++e) ;
270 1.1.2.2 uebayasi found:
271 1.1.2.2 uebayasi d->rover = e + 1;
272 1.1.2.2 uebayasi atomic_add_int(&d->nfree, -1);
273 1.1.2.2 uebayasi
274 1.1.2.2 uebayasi /*
275 1.1.2.2 uebayasi * The entry we found may be an inactive entry that is
276 1.1.2.2 uebayasi * presently in the hash table. We need to remove it.
277 1.1.2.2 uebayasi */
278 1.1.2.2 uebayasi if (e->state != L2T_STATE_UNUSED) {
279 1.1.2.2 uebayasi int hash = arp_hash(e->addr, e->ifindex, d);
280 1.1.2.2 uebayasi
281 1.1.2.2 uebayasi for (p = &d->l2tab[hash].first; *p; p = &(*p)->next)
282 1.1.2.2 uebayasi if (*p == e) {
283 1.1.2.2 uebayasi *p = e->next;
284 1.1.2.2 uebayasi break;
285 1.1.2.2 uebayasi }
286 1.1.2.2 uebayasi e->state = L2T_STATE_UNUSED;
287 1.1.2.2 uebayasi }
288 1.1.2.2 uebayasi return e;
289 1.1.2.2 uebayasi }
290 1.1.2.2 uebayasi
291 1.1.2.2 uebayasi /*
292 1.1.2.2 uebayasi * Called when an L2T entry has no more users. The entry is left in the hash
293 1.1.2.2 uebayasi * table since it is likely to be reused but we also bump nfree to indicate
294 1.1.2.2 uebayasi * that the entry can be reallocated for a different neighbor. We also drop
295 1.1.2.2 uebayasi * the existing neighbor reference in case the neighbor is going away and is
296 1.1.2.2 uebayasi * waiting on our reference.
297 1.1.2.2 uebayasi *
298 1.1.2.2 uebayasi * Because entries can be reallocated to other neighbors once their ref count
299 1.1.2.2 uebayasi * drops to 0 we need to take the entry's lock to avoid races with a new
300 1.1.2.2 uebayasi * incarnation.
301 1.1.2.2 uebayasi */
302 1.1.2.2 uebayasi void
303 1.1.2.2 uebayasi t3_l2e_free(struct l2t_data *d, struct l2t_entry *e)
304 1.1.2.2 uebayasi {
305 1.1.2.2 uebayasi mtx_lock(&e->lock);
306 1.1.2.2 uebayasi if (atomic_load_acq_int(&e->refcnt) == 0) { /* hasn't been recycled */
307 1.1.2.2 uebayasi if (e->neigh) {
308 1.1.2.2 uebayasi RT_LOCK(e->neigh);
309 1.1.2.2 uebayasi RT_REMREF(e->neigh);
310 1.1.2.2 uebayasi RT_UNLOCK(e->neigh);
311 1.1.2.2 uebayasi e->neigh = NULL;
312 1.1.2.2 uebayasi }
313 1.1.2.2 uebayasi }
314 1.1.2.2 uebayasi mtx_unlock(&e->lock);
315 1.1.2.2 uebayasi atomic_add_int(&d->nfree, 1);
316 1.1.2.2 uebayasi }
317 1.1.2.2 uebayasi
318 1.1.2.2 uebayasi /*
319 1.1.2.2 uebayasi * Update an L2T entry that was previously used for the same next hop as neigh.
320 1.1.2.2 uebayasi * Must be called with softirqs disabled.
321 1.1.2.2 uebayasi */
322 1.1.2.2 uebayasi static inline void
323 1.1.2.2 uebayasi reuse_entry(struct l2t_entry *e, struct rtentry *neigh)
324 1.1.2.2 uebayasi {
325 1.1.2.2 uebayasi struct llinfo_arp *la;
326 1.1.2.2 uebayasi
327 1.1.2.2 uebayasi la = (struct llinfo_arp *)neigh->rt_llinfo;
328 1.1.2.2 uebayasi
329 1.1.2.2 uebayasi mtx_lock(&e->lock); /* avoid race with t3_l2t_free */
330 1.1.2.2 uebayasi if (neigh != e->neigh)
331 1.1.2.2 uebayasi neigh_replace(e, neigh);
332 1.1.2.2 uebayasi
333 1.1.2.2 uebayasi if (memcmp(e->dmac, RT_ENADDR(neigh), sizeof(e->dmac)) ||
334 1.1.2.2 uebayasi (neigh->rt_expire > time_uptime))
335 1.1.2.2 uebayasi e->state = L2T_STATE_RESOLVING;
336 1.1.2.2 uebayasi else if (la->la_hold == NULL)
337 1.1.2.2 uebayasi e->state = L2T_STATE_VALID;
338 1.1.2.2 uebayasi else
339 1.1.2.2 uebayasi e->state = L2T_STATE_STALE;
340 1.1.2.2 uebayasi mtx_unlock(&e->lock);
341 1.1.2.2 uebayasi }
342 1.1.2.2 uebayasi
343 1.1.2.2 uebayasi struct l2t_entry *
344 1.1.2.2 uebayasi t3_l2t_get(struct toedev *dev, struct rtentry *neigh,
345 1.1.2.2 uebayasi unsigned int smt_idx)
346 1.1.2.2 uebayasi {
347 1.1.2.2 uebayasi struct l2t_entry *e;
348 1.1.2.2 uebayasi struct l2t_data *d = L2DATA(dev);
349 1.1.2.2 uebayasi u32 addr = *(u32 *)neigh->_rt_key;
350 1.1.2.2 uebayasi int ifidx = neigh->rt_ifp->if_index;
351 1.1.2.2 uebayasi int hash = arp_hash(addr, ifidx, d);
352 1.1.2.2 uebayasi
353 1.1.2.2 uebayasi rw_wlock(&d->lock);
354 1.1.2.2 uebayasi for (e = d->l2tab[hash].first; e; e = e->next)
355 1.1.2.2 uebayasi if (e->addr == addr && e->ifindex == ifidx &&
356 1.1.2.2 uebayasi e->smt_idx == smt_idx) {
357 1.1.2.2 uebayasi l2t_hold(d, e);
358 1.1.2.2 uebayasi if (atomic_load_acq_int(&e->refcnt) == 1)
359 1.1.2.2 uebayasi reuse_entry(e, neigh);
360 1.1.2.2 uebayasi goto done;
361 1.1.2.2 uebayasi }
362 1.1.2.2 uebayasi
363 1.1.2.2 uebayasi /* Need to allocate a new entry */
364 1.1.2.2 uebayasi e = alloc_l2e(d);
365 1.1.2.2 uebayasi if (e) {
366 1.1.2.2 uebayasi mtx_lock(&e->lock); /* avoid race with t3_l2t_free */
367 1.1.2.2 uebayasi e->next = d->l2tab[hash].first;
368 1.1.2.2 uebayasi d->l2tab[hash].first = e;
369 1.1.2.2 uebayasi e->state = L2T_STATE_RESOLVING;
370 1.1.2.2 uebayasi e->addr = addr;
371 1.1.2.2 uebayasi e->ifindex = ifidx;
372 1.1.2.2 uebayasi e->smt_idx = smt_idx;
373 1.1.2.2 uebayasi atomic_store_rel_int(&e->refcnt, 1);
374 1.1.2.2 uebayasi neigh_replace(e, neigh);
375 1.1.2.2 uebayasi #ifdef notyet
376 1.1.2.2 uebayasi /*
377 1.1.2.2 uebayasi * XXX need to add accessor function for vlan tag
378 1.1.2.2 uebayasi */
379 1.1.2.2 uebayasi if (neigh->rt_ifp->if_vlantrunk)
380 1.1.2.2 uebayasi e->vlan = VLAN_DEV_INFO(neigh->dev)->vlan_id;
381 1.1.2.2 uebayasi else
382 1.1.2.2 uebayasi #endif
383 1.1.2.2 uebayasi e->vlan = VLAN_NONE;
384 1.1.2.2 uebayasi mtx_unlock(&e->lock);
385 1.1.2.2 uebayasi }
386 1.1.2.2 uebayasi done:
387 1.1.2.2 uebayasi rw_wunlock(&d->lock);
388 1.1.2.2 uebayasi return e;
389 1.1.2.2 uebayasi }
390 1.1.2.2 uebayasi
391 1.1.2.2 uebayasi /*
392 1.1.2.2 uebayasi * Called when address resolution fails for an L2T entry to handle packets
393 1.1.2.2 uebayasi * on the arpq head. If a packet specifies a failure handler it is invoked,
394 1.1.2.2 uebayasi * otherwise the packets is sent to the TOE.
395 1.1.2.2 uebayasi *
396 1.1.2.2 uebayasi * XXX: maybe we should abandon the latter behavior and just require a failure
397 1.1.2.2 uebayasi * handler.
398 1.1.2.2 uebayasi */
399 1.1.2.2 uebayasi static void
400 1.1.2.2 uebayasi handle_failed_resolution(struct toedev *dev, struct mbuf *arpq)
401 1.1.2.2 uebayasi {
402 1.1.2.2 uebayasi
403 1.1.2.2 uebayasi while (arpq) {
404 1.1.2.2 uebayasi struct mbuf *m = arpq;
405 1.1.2.2 uebayasi #ifdef notyet
406 1.1.2.2 uebayasi struct l2t_mbuf_cb *cb = L2T_MBUF_CB(m);
407 1.1.2.2 uebayasi #endif
408 1.1.2.2 uebayasi arpq = m->m_next;
409 1.1.2.2 uebayasi m->m_next = NULL;
410 1.1.2.2 uebayasi #ifdef notyet
411 1.1.2.2 uebayasi if (cb->arp_failure_handler)
412 1.1.2.2 uebayasi cb->arp_failure_handler(dev, m);
413 1.1.2.2 uebayasi else
414 1.1.2.2 uebayasi #endif
415 1.1.2.2 uebayasi }
416 1.1.2.2 uebayasi
417 1.1.2.2 uebayasi }
418 1.1.2.2 uebayasi
419 1.1.2.2 uebayasi #if defined(NETEVENT) || !defined(CONFIG_CHELSIO_T3_MODULE)
420 1.1.2.2 uebayasi /*
421 1.1.2.2 uebayasi * Called when the host's ARP layer makes a change to some entry that is
422 1.1.2.2 uebayasi * loaded into the HW L2 table.
423 1.1.2.2 uebayasi */
424 1.1.2.2 uebayasi void
425 1.1.2.2 uebayasi t3_l2t_update(struct toedev *dev, struct rtentry *neigh)
426 1.1.2.2 uebayasi {
427 1.1.2.2 uebayasi struct l2t_entry *e;
428 1.1.2.2 uebayasi struct mbuf *arpq = NULL;
429 1.1.2.2 uebayasi struct l2t_data *d = L2DATA(dev);
430 1.1.2.2 uebayasi u32 addr = *(u32 *)neigh->_rt_key;
431 1.1.2.2 uebayasi int ifidx = neigh->rt_ifp->if_index;
432 1.1.2.2 uebayasi int hash = arp_hash(addr, ifidx, d);
433 1.1.2.2 uebayasi struct llinfo_arp *la;
434 1.1.2.2 uebayasi
435 1.1.2.2 uebayasi rw_rlock(&d->lock);
436 1.1.2.2 uebayasi for (e = d->l2tab[hash].first; e; e = e->next)
437 1.1.2.2 uebayasi if (e->addr == addr && e->ifindex == ifidx) {
438 1.1.2.2 uebayasi mtx_lock(&e->lock);
439 1.1.2.2 uebayasi goto found;
440 1.1.2.2 uebayasi }
441 1.1.2.2 uebayasi rw_runlock(&d->lock);
442 1.1.2.2 uebayasi return;
443 1.1.2.2 uebayasi
444 1.1.2.2 uebayasi found:
445 1.1.2.2 uebayasi rw_runlock(&d->lock);
446 1.1.2.2 uebayasi if (atomic_load_acq_int(&e->refcnt)) {
447 1.1.2.2 uebayasi if (neigh != e->neigh)
448 1.1.2.2 uebayasi neigh_replace(e, neigh);
449 1.1.2.2 uebayasi
450 1.1.2.2 uebayasi la = (struct llinfo_arp *)neigh->rt_llinfo;
451 1.1.2.2 uebayasi if (e->state == L2T_STATE_RESOLVING) {
452 1.1.2.2 uebayasi
453 1.1.2.2 uebayasi if (la->la_asked >= 5 /* arp_maxtries */) {
454 1.1.2.2 uebayasi arpq = e->arpq_head;
455 1.1.2.2 uebayasi e->arpq_head = e->arpq_tail = NULL;
456 1.1.2.2 uebayasi } else if (la->la_hold == NULL)
457 1.1.2.2 uebayasi setup_l2e_send_pending(dev, NULL, e);
458 1.1.2.2 uebayasi } else {
459 1.1.2.2 uebayasi e->state = (la->la_hold == NULL) ?
460 1.1.2.2 uebayasi L2T_STATE_VALID : L2T_STATE_STALE;
461 1.1.2.2 uebayasi if (memcmp(e->dmac, RT_ENADDR(neigh), 6))
462 1.1.2.2 uebayasi setup_l2e_send_pending(dev, NULL, e);
463 1.1.2.2 uebayasi }
464 1.1.2.2 uebayasi }
465 1.1.2.2 uebayasi mtx_unlock(&e->lock);
466 1.1.2.2 uebayasi
467 1.1.2.2 uebayasi if (arpq)
468 1.1.2.2 uebayasi handle_failed_resolution(dev, arpq);
469 1.1.2.2 uebayasi }
470 1.1.2.2 uebayasi #else
471 1.1.2.2 uebayasi /*
472 1.1.2.2 uebayasi * Called from a kprobe, interrupts are off.
473 1.1.2.2 uebayasi */
474 1.1.2.2 uebayasi void
475 1.1.2.2 uebayasi t3_l2t_update(struct toedev *dev, struct rtentry *neigh)
476 1.1.2.2 uebayasi {
477 1.1.2.2 uebayasi struct l2t_entry *e;
478 1.1.2.2 uebayasi struct l2t_data *d = L2DATA(dev);
479 1.1.2.2 uebayasi u32 addr = *(u32 *) rt_key(neigh);
480 1.1.2.2 uebayasi int ifidx = neigh->dev->ifindex;
481 1.1.2.2 uebayasi int hash = arp_hash(addr, ifidx, d);
482 1.1.2.2 uebayasi
483 1.1.2.2 uebayasi rw_rlock(&d->lock);
484 1.1.2.2 uebayasi for (e = d->l2tab[hash].first; e; e = e->next)
485 1.1.2.2 uebayasi if (e->addr == addr && e->ifindex == ifidx) {
486 1.1.2.2 uebayasi mtx_lock(&e->lock);
487 1.1.2.2 uebayasi if (atomic_load_acq_int(&e->refcnt)) {
488 1.1.2.2 uebayasi if (neigh != e->neigh)
489 1.1.2.2 uebayasi neigh_replace(e, neigh);
490 1.1.2.2 uebayasi e->tdev = dev;
491 1.1.2.2 uebayasi mod_timer(&e->update_timer, jiffies + 1);
492 1.1.2.2 uebayasi }
493 1.1.2.2 uebayasi mtx_unlock(&e->lock);
494 1.1.2.2 uebayasi break;
495 1.1.2.2 uebayasi }
496 1.1.2.2 uebayasi rw_runlock(&d->lock);
497 1.1.2.2 uebayasi }
498 1.1.2.2 uebayasi
499 1.1.2.2 uebayasi static void
500 1.1.2.2 uebayasi update_timer_cb(unsigned long data)
501 1.1.2.2 uebayasi {
502 1.1.2.2 uebayasi struct mbuf *arpq = NULL;
503 1.1.2.2 uebayasi struct l2t_entry *e = (struct l2t_entry *)data;
504 1.1.2.2 uebayasi struct rtentry *neigh = e->neigh;
505 1.1.2.2 uebayasi struct toedev *dev = e->tdev;
506 1.1.2.2 uebayasi
507 1.1.2.2 uebayasi barrier();
508 1.1.2.2 uebayasi if (!atomic_load_acq_int(&e->refcnt))
509 1.1.2.2 uebayasi return;
510 1.1.2.2 uebayasi
511 1.1.2.2 uebayasi rw_rlock(&neigh->lock);
512 1.1.2.2 uebayasi mtx_lock(&e->lock);
513 1.1.2.2 uebayasi
514 1.1.2.2 uebayasi if (atomic_load_acq_int(&e->refcnt)) {
515 1.1.2.2 uebayasi if (e->state == L2T_STATE_RESOLVING) {
516 1.1.2.2 uebayasi if (neigh->nud_state & NUD_FAILED) {
517 1.1.2.2 uebayasi arpq = e->arpq_head;
518 1.1.2.2 uebayasi e->arpq_head = e->arpq_tail = NULL;
519 1.1.2.2 uebayasi } else if (neigh_is_connected(neigh) && e->arpq_head)
520 1.1.2.2 uebayasi setup_l2e_send_pending(dev, NULL, e);
521 1.1.2.2 uebayasi } else {
522 1.1.2.2 uebayasi e->state = neigh_is_connected(neigh) ?
523 1.1.2.2 uebayasi L2T_STATE_VALID : L2T_STATE_STALE;
524 1.1.2.2 uebayasi if (memcmp(e->dmac, RT_ENADDR(neigh), sizeof(e->dmac)))
525 1.1.2.2 uebayasi setup_l2e_send_pending(dev, NULL, e);
526 1.1.2.2 uebayasi }
527 1.1.2.2 uebayasi }
528 1.1.2.2 uebayasi mtx_unlock(&e->lock);
529 1.1.2.2 uebayasi rw_runlock(&neigh->lock);
530 1.1.2.2 uebayasi
531 1.1.2.2 uebayasi if (arpq)
532 1.1.2.2 uebayasi handle_failed_resolution(dev, arpq);
533 1.1.2.2 uebayasi }
534 1.1.2.2 uebayasi #endif
535 1.1.2.2 uebayasi
536 1.1.2.2 uebayasi struct l2t_data *
537 1.1.2.2 uebayasi t3_init_l2t(unsigned int l2t_capacity)
538 1.1.2.2 uebayasi {
539 1.1.2.2 uebayasi struct l2t_data *d;
540 1.1.2.2 uebayasi int i, size = sizeof(*d) + l2t_capacity * sizeof(struct l2t_entry);
541 1.1.2.2 uebayasi
542 1.1.2.2 uebayasi d = cxgb_alloc_mem(size);
543 1.1.2.2 uebayasi if (!d)
544 1.1.2.2 uebayasi return NULL;
545 1.1.2.2 uebayasi
546 1.1.2.2 uebayasi d->nentries = l2t_capacity;
547 1.1.2.2 uebayasi d->rover = &d->l2tab[1]; /* entry 0 is not used */
548 1.1.2.2 uebayasi atomic_store_rel_int(&d->nfree, l2t_capacity - 1);
549 1.1.2.2 uebayasi rw_init(&d->lock, "L2T");
550 1.1.2.2 uebayasi
551 1.1.2.2 uebayasi for (i = 0; i < l2t_capacity; ++i) {
552 1.1.2.2 uebayasi d->l2tab[i].idx = i;
553 1.1.2.2 uebayasi d->l2tab[i].state = L2T_STATE_UNUSED;
554 1.1.2.2 uebayasi mtx_init(&d->l2tab[i].lock, "L2TAB", NULL, MTX_DEF);
555 1.1.2.2 uebayasi atomic_store_rel_int(&d->l2tab[i].refcnt, 0);
556 1.1.2.2 uebayasi #ifndef NETEVENT
557 1.1.2.2 uebayasi #ifdef CONFIG_CHELSIO_T3_MODULE
558 1.1.2.2 uebayasi setup_timer(&d->l2tab[i].update_timer, update_timer_cb,
559 1.1.2.2 uebayasi (unsigned long)&d->l2tab[i]);
560 1.1.2.2 uebayasi #endif
561 1.1.2.2 uebayasi #endif
562 1.1.2.2 uebayasi }
563 1.1.2.2 uebayasi return d;
564 1.1.2.2 uebayasi }
565 1.1.2.2 uebayasi
566 1.1.2.2 uebayasi void
567 1.1.2.2 uebayasi t3_free_l2t(struct l2t_data *d)
568 1.1.2.2 uebayasi {
569 1.1.2.2 uebayasi #ifndef NETEVENT
570 1.1.2.2 uebayasi #ifdef CONFIG_CHELSIO_T3_MODULE
571 1.1.2.2 uebayasi int i;
572 1.1.2.2 uebayasi
573 1.1.2.2 uebayasi /* Stop all L2T timers */
574 1.1.2.2 uebayasi for (i = 0; i < d->nentries; ++i)
575 1.1.2.2 uebayasi del_timer_sync(&d->l2tab[i].update_timer);
576 1.1.2.2 uebayasi #endif
577 1.1.2.2 uebayasi #endif
578 1.1.2.2 uebayasi cxgb_free_mem(d);
579 1.1.2.2 uebayasi }
580 1.1.2.2 uebayasi
581 1.1.2.2 uebayasi #ifdef CONFIG_PROC_FS
582 1.1.2.2 uebayasi #include <linux/module.h>
583 1.1.2.2 uebayasi #include <linux/proc_fs.h>
584 1.1.2.2 uebayasi #include <linux/seq_file.h>
585 1.1.2.2 uebayasi
586 1.1.2.2 uebayasi static inline void *
587 1.1.2.2 uebayasi l2t_get_idx(struct seq_file *seq, loff_t pos)
588 1.1.2.2 uebayasi {
589 1.1.2.2 uebayasi struct l2t_data *d = seq->private;
590 1.1.2.2 uebayasi
591 1.1.2.2 uebayasi return pos >= d->nentries ? NULL : &d->l2tab[pos];
592 1.1.2.2 uebayasi }
593 1.1.2.2 uebayasi
594 1.1.2.2 uebayasi static void *
595 1.1.2.2 uebayasi l2t_seq_start(struct seq_file *seq, loff_t *pos)
596 1.1.2.2 uebayasi {
597 1.1.2.2 uebayasi return *pos ? l2t_get_idx(seq, *pos) : SEQ_START_TOKEN;
598 1.1.2.2 uebayasi }
599 1.1.2.2 uebayasi
600 1.1.2.2 uebayasi static void *
601 1.1.2.2 uebayasi l2t_seq_next(struct seq_file *seq, void *v, loff_t *pos)
602 1.1.2.2 uebayasi {
603 1.1.2.2 uebayasi v = l2t_get_idx(seq, *pos + 1);
604 1.1.2.2 uebayasi if (v)
605 1.1.2.2 uebayasi ++*pos;
606 1.1.2.2 uebayasi return v;
607 1.1.2.2 uebayasi }
608 1.1.2.2 uebayasi
609 1.1.2.2 uebayasi static void
610 1.1.2.2 uebayasi l2t_seq_stop(struct seq_file *seq, void *v)
611 1.1.2.2 uebayasi {
612 1.1.2.2 uebayasi }
613 1.1.2.2 uebayasi
614 1.1.2.2 uebayasi static char
615 1.1.2.2 uebayasi l2e_state(const struct l2t_entry *e)
616 1.1.2.2 uebayasi {
617 1.1.2.2 uebayasi switch (e->state) {
618 1.1.2.2 uebayasi case L2T_STATE_VALID: return 'V'; /* valid, fast-path entry */
619 1.1.2.2 uebayasi case L2T_STATE_STALE: return 'S'; /* needs revalidation, but usable */
620 1.1.2.2 uebayasi case L2T_STATE_RESOLVING:
621 1.1.2.2 uebayasi return e->arpq_head ? 'A' : 'R';
622 1.1.2.2 uebayasi default:
623 1.1.2.2 uebayasi return 'U';
624 1.1.2.2 uebayasi }
625 1.1.2.2 uebayasi }
626 1.1.2.2 uebayasi
627 1.1.2.2 uebayasi static int
628 1.1.2.2 uebayasi l2t_seq_show(struct seq_file *seq, void *v)
629 1.1.2.2 uebayasi {
630 1.1.2.2 uebayasi if (v == SEQ_START_TOKEN)
631 1.1.2.2 uebayasi seq_puts(seq, "Index IP address Ethernet address VLAN "
632 1.1.2.2 uebayasi "Prio State Users SMTIDX Port\n");
633 1.1.2.2 uebayasi else {
634 1.1.2.2 uebayasi char ip[20];
635 1.1.2.2 uebayasi struct l2t_entry *e = v;
636 1.1.2.2 uebayasi
637 1.1.2.2 uebayasi mtx_lock(&e->lock);
638 1.1.2.2 uebayasi sprintf(ip, "%u.%u.%u.%u", NIPQUAD(e->addr));
639 1.1.2.2 uebayasi seq_printf(seq, "%-5u %-15s %02x:%02x:%02x:%02x:%02x:%02x %4d"
640 1.1.2.2 uebayasi " %3u %c %7u %4u %s\n",
641 1.1.2.2 uebayasi e->idx, ip, e->dmac[0], e->dmac[1], e->dmac[2],
642 1.1.2.2 uebayasi e->dmac[3], e->dmac[4], e->dmac[5],
643 1.1.2.2 uebayasi e->vlan & EVL_VLID_MASK, vlan_prio(e),
644 1.1.2.2 uebayasi l2e_state(e), atomic_load_acq_int(&e->refcnt), e->smt_idx,
645 1.1.2.2 uebayasi e->neigh ? e->neigh->dev->name : "");
646 1.1.2.2 uebayasi mtx_unlock(&e->lock);
647 1.1.2.2 uebayasi }
648 1.1.2.2 uebayasi return 0;
649 1.1.2.2 uebayasi }
650 1.1.2.2 uebayasi
651 1.1.2.2 uebayasi #endif
652