1 /* $NetBSD: nouveau_dma.c,v 1.5 2021/12/18 23:45:32 riastradh Exp $ */ 2 3 /* 4 * Copyright (C) 2007 Ben Skeggs. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining 8 * a copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sublicense, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial 17 * portions of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 */ 28 29 #include <sys/cdefs.h> 30 __KERNEL_RCSID(0, "$NetBSD: nouveau_dma.c,v 1.5 2021/12/18 23:45:32 riastradh Exp $"); 31 32 #include "nouveau_drv.h" 33 #include "nouveau_dma.h" 34 #include "nouveau_vmm.h" 35 36 #include <nvif/user.h> 37 38 #ifdef __NetBSD__ 39 # define __iomem 40 # define __force 41 #endif 42 43 void 44 OUT_RINGp(struct nouveau_channel *chan, const void *data, unsigned nr_dwords) 45 { 46 bool is_iomem; 47 u32 *mem = ttm_kmap_obj_virtual(&chan->push.buffer->kmap, &is_iomem); 48 mem = &mem[chan->dma.cur]; 49 if (is_iomem) 50 memcpy_toio((void __force __iomem *)mem, data, nr_dwords * 4); 51 else 52 memcpy(mem, data, nr_dwords * 4); 53 chan->dma.cur += nr_dwords; 54 } 55 56 #ifdef __NetBSD__ 57 # undef __force 58 # undef __iomem 59 #endif 60 61 /* Fetch and adjust GPU GET pointer 62 * 63 * Returns: 64 * value >= 0, the adjusted GET pointer 65 * -EINVAL if GET pointer currently outside main push buffer 66 * -EBUSY if timeout exceeded 67 */ 68 static inline int 69 READ_GET(struct nouveau_channel *chan, uint64_t *prev_get, int *timeout) 70 { 71 uint64_t val; 72 73 val = nvif_rd32(&chan->user, chan->user_get); 74 if (chan->user_get_hi) 75 val |= (uint64_t)nvif_rd32(&chan->user, chan->user_get_hi) << 32; 76 77 /* reset counter as long as GET is still advancing, this is 78 * to avoid misdetecting a GPU lockup if the GPU happens to 79 * just be processing an operation that takes a long time 80 */ 81 if (val != *prev_get) { 82 *prev_get = val; 83 *timeout = 0; 84 } 85 86 if ((++*timeout & 0xff) == 0) { 87 udelay(1); 88 if (*timeout > 100000) 89 return -EBUSY; 90 } 91 92 if (val < chan->push.addr || 93 val > chan->push.addr + (chan->dma.max << 2)) 94 return -EINVAL; 95 96 return (val - chan->push.addr) >> 2; 97 } 98 99 void 100 nv50_dma_push(struct nouveau_channel *chan, u64 offset, int length) 101 { 102 struct nvif_user *user = &chan->drm->client.device.user; 103 struct nouveau_bo *pb = chan->push.buffer; 104 int ip = (chan->dma.ib_put * 2) + chan->dma.ib_base; 105 106 BUG_ON(chan->dma.ib_free < 1); 107 108 nouveau_bo_wr32(pb, ip++, lower_32_bits(offset)); 109 nouveau_bo_wr32(pb, ip++, upper_32_bits(offset) | length << 8); 110 111 chan->dma.ib_put = (chan->dma.ib_put + 1) & chan->dma.ib_max; 112 113 mb(); 114 /* Flush writes. */ 115 nouveau_bo_rd32(pb, 0); 116 117 nvif_wr32(&chan->user, 0x8c, chan->dma.ib_put); 118 if (user->func && user->func->doorbell) 119 user->func->doorbell(user, chan->token); 120 chan->dma.ib_free--; 121 } 122 123 static int 124 nv50_dma_push_wait(struct nouveau_channel *chan, int count) 125 { 126 uint32_t cnt = 0, prev_get = 0; 127 128 while (chan->dma.ib_free < count) { 129 uint32_t get = nvif_rd32(&chan->user, 0x88); 130 if (get != prev_get) { 131 prev_get = get; 132 cnt = 0; 133 } 134 135 if ((++cnt & 0xff) == 0) { 136 udelay(1); 137 if (cnt > 100000) 138 return -EBUSY; 139 } 140 141 chan->dma.ib_free = get - chan->dma.ib_put; 142 if (chan->dma.ib_free <= 0) 143 chan->dma.ib_free += chan->dma.ib_max; 144 } 145 146 return 0; 147 } 148 149 static int 150 nv50_dma_wait(struct nouveau_channel *chan, int slots, int count) 151 { 152 uint64_t prev_get = 0; 153 int ret, cnt = 0; 154 155 ret = nv50_dma_push_wait(chan, slots + 1); 156 if (unlikely(ret)) 157 return ret; 158 159 while (chan->dma.free < count) { 160 int get = READ_GET(chan, &prev_get, &cnt); 161 if (unlikely(get < 0)) { 162 if (get == -EINVAL) 163 continue; 164 165 return get; 166 } 167 168 if (get <= chan->dma.cur) { 169 chan->dma.free = chan->dma.max - chan->dma.cur; 170 if (chan->dma.free >= count) 171 break; 172 173 FIRE_RING(chan); 174 do { 175 get = READ_GET(chan, &prev_get, &cnt); 176 if (unlikely(get < 0)) { 177 if (get == -EINVAL) 178 continue; 179 return get; 180 } 181 } while (get == 0); 182 chan->dma.cur = 0; 183 chan->dma.put = 0; 184 } 185 186 chan->dma.free = get - chan->dma.cur - 1; 187 } 188 189 return 0; 190 } 191 192 int 193 nouveau_dma_wait(struct nouveau_channel *chan, int slots, int size) 194 { 195 uint64_t prev_get = 0; 196 int cnt = 0, get; 197 198 if (chan->dma.ib_max) 199 return nv50_dma_wait(chan, slots, size); 200 201 while (chan->dma.free < size) { 202 get = READ_GET(chan, &prev_get, &cnt); 203 if (unlikely(get == -EBUSY)) 204 return -EBUSY; 205 206 /* loop until we have a usable GET pointer. the value 207 * we read from the GPU may be outside the main ring if 208 * PFIFO is processing a buffer called from the main ring, 209 * discard these values until something sensible is seen. 210 * 211 * the other case we discard GET is while the GPU is fetching 212 * from the SKIPS area, so the code below doesn't have to deal 213 * with some fun corner cases. 214 */ 215 if (unlikely(get == -EINVAL) || get < NOUVEAU_DMA_SKIPS) 216 continue; 217 218 if (get <= chan->dma.cur) { 219 /* engine is fetching behind us, or is completely 220 * idle (GET == PUT) so we have free space up until 221 * the end of the push buffer 222 * 223 * we can only hit that path once per call due to 224 * looping back to the beginning of the push buffer, 225 * we'll hit the fetching-ahead-of-us path from that 226 * point on. 227 * 228 * the *one* exception to that rule is if we read 229 * GET==PUT, in which case the below conditional will 230 * always succeed and break us out of the wait loop. 231 */ 232 chan->dma.free = chan->dma.max - chan->dma.cur; 233 if (chan->dma.free >= size) 234 break; 235 236 /* not enough space left at the end of the push buffer, 237 * instruct the GPU to jump back to the start right 238 * after processing the currently pending commands. 239 */ 240 OUT_RING(chan, chan->push.addr | 0x20000000); 241 242 /* wait for GET to depart from the skips area. 243 * prevents writing GET==PUT and causing a race 244 * condition that causes us to think the GPU is 245 * idle when it's not. 246 */ 247 do { 248 get = READ_GET(chan, &prev_get, &cnt); 249 if (unlikely(get == -EBUSY)) 250 return -EBUSY; 251 if (unlikely(get == -EINVAL)) 252 continue; 253 } while (get <= NOUVEAU_DMA_SKIPS); 254 WRITE_PUT(NOUVEAU_DMA_SKIPS); 255 256 /* we're now submitting commands at the start of 257 * the push buffer. 258 */ 259 chan->dma.cur = 260 chan->dma.put = NOUVEAU_DMA_SKIPS; 261 } 262 263 /* engine fetching ahead of us, we have space up until the 264 * current GET pointer. the "- 1" is to ensure there's 265 * space left to emit a jump back to the beginning of the 266 * push buffer if we require it. we can never get GET == PUT 267 * here, so this is safe. 268 */ 269 chan->dma.free = get - chan->dma.cur - 1; 270 } 271 272 return 0; 273 } 274 275