radeon_evergreen_cs.c revision 1.2.8.1 1 /* $NetBSD: radeon_evergreen_cs.c,v 1.2.8.1 2020/02/29 20:20:16 ad Exp $ */
2
3 /*
4 * Copyright 2010 Advanced Micro Devices, Inc.
5 * Copyright 2008 Red Hat Inc.
6 * Copyright 2009 Jerome Glisse.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
22 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 * OTHER DEALINGS IN THE SOFTWARE.
25 *
26 * Authors: Dave Airlie
27 * Alex Deucher
28 * Jerome Glisse
29 */
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: radeon_evergreen_cs.c,v 1.2.8.1 2020/02/29 20:20:16 ad Exp $");
32
33 #include <drm/drmP.h>
34 #include "radeon.h"
35 #include "evergreend.h"
36 #include "evergreen_reg_safe.h"
37 #include "cayman_reg_safe.h"
38
39 #include <linux/nbsd-namespace.h>
40
41 #ifndef __NetBSD__
42 #define MAX(a,b) (((a)>(b))?(a):(b))
43 #define MIN(a,b) (((a)<(b))?(a):(b))
44 #endif
45
46 #define REG_SAFE_BM_SIZE ARRAY_SIZE(evergreen_reg_safe_bm)
47
48 int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
49 struct radeon_bo_list **cs_reloc);
50 struct evergreen_cs_track {
51 u32 group_size;
52 u32 nbanks;
53 u32 npipes;
54 u32 row_size;
55 /* value we track */
56 u32 nsamples; /* unused */
57 struct radeon_bo *cb_color_bo[12];
58 u32 cb_color_bo_offset[12];
59 struct radeon_bo *cb_color_fmask_bo[8]; /* unused */
60 struct radeon_bo *cb_color_cmask_bo[8]; /* unused */
61 u32 cb_color_info[12];
62 u32 cb_color_view[12];
63 u32 cb_color_pitch[12];
64 u32 cb_color_slice[12];
65 u32 cb_color_slice_idx[12];
66 u32 cb_color_attrib[12];
67 u32 cb_color_cmask_slice[8];/* unused */
68 u32 cb_color_fmask_slice[8];/* unused */
69 u32 cb_target_mask;
70 u32 cb_shader_mask; /* unused */
71 u32 vgt_strmout_config;
72 u32 vgt_strmout_buffer_config;
73 struct radeon_bo *vgt_strmout_bo[4];
74 u32 vgt_strmout_bo_offset[4];
75 u32 vgt_strmout_size[4];
76 u32 db_depth_control;
77 u32 db_depth_view;
78 u32 db_depth_slice;
79 u32 db_depth_size;
80 u32 db_z_info;
81 u32 db_z_read_offset;
82 u32 db_z_write_offset;
83 struct radeon_bo *db_z_read_bo;
84 struct radeon_bo *db_z_write_bo;
85 u32 db_s_info;
86 u32 db_s_read_offset;
87 u32 db_s_write_offset;
88 struct radeon_bo *db_s_read_bo;
89 struct radeon_bo *db_s_write_bo;
90 bool sx_misc_kill_all_prims;
91 bool cb_dirty;
92 bool db_dirty;
93 bool streamout_dirty;
94 u32 htile_offset;
95 u32 htile_surface;
96 struct radeon_bo *htile_bo;
97 unsigned long indirect_draw_buffer_size;
98 const unsigned *reg_safe_bm;
99 };
100
101 static u32 evergreen_cs_get_aray_mode(u32 tiling_flags)
102 {
103 if (tiling_flags & RADEON_TILING_MACRO)
104 return ARRAY_2D_TILED_THIN1;
105 else if (tiling_flags & RADEON_TILING_MICRO)
106 return ARRAY_1D_TILED_THIN1;
107 else
108 return ARRAY_LINEAR_GENERAL;
109 }
110
111 static u32 evergreen_cs_get_num_banks(u32 nbanks)
112 {
113 switch (nbanks) {
114 case 2:
115 return ADDR_SURF_2_BANK;
116 case 4:
117 return ADDR_SURF_4_BANK;
118 case 8:
119 default:
120 return ADDR_SURF_8_BANK;
121 case 16:
122 return ADDR_SURF_16_BANK;
123 }
124 }
125
126 static void evergreen_cs_track_init(struct evergreen_cs_track *track)
127 {
128 int i;
129
130 for (i = 0; i < 8; i++) {
131 track->cb_color_fmask_bo[i] = NULL;
132 track->cb_color_cmask_bo[i] = NULL;
133 track->cb_color_cmask_slice[i] = 0;
134 track->cb_color_fmask_slice[i] = 0;
135 }
136
137 for (i = 0; i < 12; i++) {
138 track->cb_color_bo[i] = NULL;
139 track->cb_color_bo_offset[i] = 0xFFFFFFFF;
140 track->cb_color_info[i] = 0;
141 track->cb_color_view[i] = 0xFFFFFFFF;
142 track->cb_color_pitch[i] = 0;
143 track->cb_color_slice[i] = 0xfffffff;
144 track->cb_color_slice_idx[i] = 0;
145 }
146 track->cb_target_mask = 0xFFFFFFFF;
147 track->cb_shader_mask = 0xFFFFFFFF;
148 track->cb_dirty = true;
149
150 track->db_depth_slice = 0xffffffff;
151 track->db_depth_view = 0xFFFFC000;
152 track->db_depth_size = 0xFFFFFFFF;
153 track->db_depth_control = 0xFFFFFFFF;
154 track->db_z_info = 0xFFFFFFFF;
155 track->db_z_read_offset = 0xFFFFFFFF;
156 track->db_z_write_offset = 0xFFFFFFFF;
157 track->db_z_read_bo = NULL;
158 track->db_z_write_bo = NULL;
159 track->db_s_info = 0xFFFFFFFF;
160 track->db_s_read_offset = 0xFFFFFFFF;
161 track->db_s_write_offset = 0xFFFFFFFF;
162 track->db_s_read_bo = NULL;
163 track->db_s_write_bo = NULL;
164 track->db_dirty = true;
165 track->htile_bo = NULL;
166 track->htile_offset = 0xFFFFFFFF;
167 track->htile_surface = 0;
168
169 for (i = 0; i < 4; i++) {
170 track->vgt_strmout_size[i] = 0;
171 track->vgt_strmout_bo[i] = NULL;
172 track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF;
173 }
174 track->streamout_dirty = true;
175 track->sx_misc_kill_all_prims = false;
176 }
177
178 struct eg_surface {
179 /* value gathered from cs */
180 unsigned nbx;
181 unsigned nby;
182 unsigned format;
183 unsigned mode;
184 unsigned nbanks;
185 unsigned bankw;
186 unsigned bankh;
187 unsigned tsplit;
188 unsigned mtilea;
189 unsigned nsamples;
190 /* output value */
191 unsigned bpe;
192 unsigned layer_size;
193 unsigned palign;
194 unsigned halign;
195 unsigned long base_align;
196 };
197
198 static int evergreen_surface_check_linear(struct radeon_cs_parser *p,
199 struct eg_surface *surf,
200 const char *prefix)
201 {
202 surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
203 surf->base_align = surf->bpe;
204 surf->palign = 1;
205 surf->halign = 1;
206 return 0;
207 }
208
209 static int evergreen_surface_check_linear_aligned(struct radeon_cs_parser *p,
210 struct eg_surface *surf,
211 const char *prefix)
212 {
213 struct evergreen_cs_track *track = p->track;
214 unsigned palign;
215
216 palign = MAX(64, track->group_size / surf->bpe);
217 surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
218 surf->base_align = track->group_size;
219 surf->palign = palign;
220 surf->halign = 1;
221 if (surf->nbx & (palign - 1)) {
222 if (prefix) {
223 dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
224 __func__, __LINE__, prefix, surf->nbx, palign);
225 }
226 return -EINVAL;
227 }
228 return 0;
229 }
230
231 static int evergreen_surface_check_1d(struct radeon_cs_parser *p,
232 struct eg_surface *surf,
233 const char *prefix)
234 {
235 struct evergreen_cs_track *track = p->track;
236 unsigned palign;
237
238 palign = track->group_size / (8 * surf->bpe * surf->nsamples);
239 palign = MAX(8, palign);
240 surf->layer_size = surf->nbx * surf->nby * surf->bpe;
241 surf->base_align = track->group_size;
242 surf->palign = palign;
243 surf->halign = 8;
244 if ((surf->nbx & (palign - 1))) {
245 if (prefix) {
246 dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d (%d %d %d)\n",
247 __func__, __LINE__, prefix, surf->nbx, palign,
248 track->group_size, surf->bpe, surf->nsamples);
249 }
250 return -EINVAL;
251 }
252 if ((surf->nby & (8 - 1))) {
253 if (prefix) {
254 dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with 8\n",
255 __func__, __LINE__, prefix, surf->nby);
256 }
257 return -EINVAL;
258 }
259 return 0;
260 }
261
262 static int evergreen_surface_check_2d(struct radeon_cs_parser *p,
263 struct eg_surface *surf,
264 const char *prefix)
265 {
266 struct evergreen_cs_track *track = p->track;
267 unsigned palign, halign, tileb, slice_pt;
268 unsigned mtile_pr, mtile_ps, mtileb;
269
270 tileb = 64 * surf->bpe * surf->nsamples;
271 slice_pt = 1;
272 if (tileb > surf->tsplit) {
273 slice_pt = tileb / surf->tsplit;
274 }
275 tileb = tileb / slice_pt;
276 /* macro tile width & height */
277 palign = (8 * surf->bankw * track->npipes) * surf->mtilea;
278 halign = (8 * surf->bankh * surf->nbanks) / surf->mtilea;
279 mtileb = (palign / 8) * (halign / 8) * tileb;
280 mtile_pr = surf->nbx / palign;
281 mtile_ps = (mtile_pr * surf->nby) / halign;
282 surf->layer_size = mtile_ps * mtileb * slice_pt;
283 surf->base_align = (palign / 8) * (halign / 8) * tileb;
284 surf->palign = palign;
285 surf->halign = halign;
286
287 if ((surf->nbx & (palign - 1))) {
288 if (prefix) {
289 dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
290 __func__, __LINE__, prefix, surf->nbx, palign);
291 }
292 return -EINVAL;
293 }
294 if ((surf->nby & (halign - 1))) {
295 if (prefix) {
296 dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with %d\n",
297 __func__, __LINE__, prefix, surf->nby, halign);
298 }
299 return -EINVAL;
300 }
301
302 return 0;
303 }
304
305 static int evergreen_surface_check(struct radeon_cs_parser *p,
306 struct eg_surface *surf,
307 const char *prefix)
308 {
309 /* some common value computed here */
310 surf->bpe = r600_fmt_get_blocksize(surf->format);
311
312 switch (surf->mode) {
313 case ARRAY_LINEAR_GENERAL:
314 return evergreen_surface_check_linear(p, surf, prefix);
315 case ARRAY_LINEAR_ALIGNED:
316 return evergreen_surface_check_linear_aligned(p, surf, prefix);
317 case ARRAY_1D_TILED_THIN1:
318 return evergreen_surface_check_1d(p, surf, prefix);
319 case ARRAY_2D_TILED_THIN1:
320 return evergreen_surface_check_2d(p, surf, prefix);
321 default:
322 dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
323 __func__, __LINE__, prefix, surf->mode);
324 return -EINVAL;
325 }
326 return -EINVAL;
327 }
328
329 static int evergreen_surface_value_conv_check(struct radeon_cs_parser *p,
330 struct eg_surface *surf,
331 const char *prefix)
332 {
333 switch (surf->mode) {
334 case ARRAY_2D_TILED_THIN1:
335 break;
336 case ARRAY_LINEAR_GENERAL:
337 case ARRAY_LINEAR_ALIGNED:
338 case ARRAY_1D_TILED_THIN1:
339 return 0;
340 default:
341 dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
342 __func__, __LINE__, prefix, surf->mode);
343 return -EINVAL;
344 }
345
346 switch (surf->nbanks) {
347 case 0: surf->nbanks = 2; break;
348 case 1: surf->nbanks = 4; break;
349 case 2: surf->nbanks = 8; break;
350 case 3: surf->nbanks = 16; break;
351 default:
352 dev_warn(p->dev, "%s:%d %s invalid number of banks %d\n",
353 __func__, __LINE__, prefix, surf->nbanks);
354 return -EINVAL;
355 }
356 switch (surf->bankw) {
357 case 0: surf->bankw = 1; break;
358 case 1: surf->bankw = 2; break;
359 case 2: surf->bankw = 4; break;
360 case 3: surf->bankw = 8; break;
361 default:
362 dev_warn(p->dev, "%s:%d %s invalid bankw %d\n",
363 __func__, __LINE__, prefix, surf->bankw);
364 return -EINVAL;
365 }
366 switch (surf->bankh) {
367 case 0: surf->bankh = 1; break;
368 case 1: surf->bankh = 2; break;
369 case 2: surf->bankh = 4; break;
370 case 3: surf->bankh = 8; break;
371 default:
372 dev_warn(p->dev, "%s:%d %s invalid bankh %d\n",
373 __func__, __LINE__, prefix, surf->bankh);
374 return -EINVAL;
375 }
376 switch (surf->mtilea) {
377 case 0: surf->mtilea = 1; break;
378 case 1: surf->mtilea = 2; break;
379 case 2: surf->mtilea = 4; break;
380 case 3: surf->mtilea = 8; break;
381 default:
382 dev_warn(p->dev, "%s:%d %s invalid macro tile aspect %d\n",
383 __func__, __LINE__, prefix, surf->mtilea);
384 return -EINVAL;
385 }
386 switch (surf->tsplit) {
387 case 0: surf->tsplit = 64; break;
388 case 1: surf->tsplit = 128; break;
389 case 2: surf->tsplit = 256; break;
390 case 3: surf->tsplit = 512; break;
391 case 4: surf->tsplit = 1024; break;
392 case 5: surf->tsplit = 2048; break;
393 case 6: surf->tsplit = 4096; break;
394 default:
395 dev_warn(p->dev, "%s:%d %s invalid tile split %d\n",
396 __func__, __LINE__, prefix, surf->tsplit);
397 return -EINVAL;
398 }
399 return 0;
400 }
401
402 static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned id)
403 {
404 struct evergreen_cs_track *track = p->track;
405 struct eg_surface surf;
406 unsigned pitch, slice, mslice;
407 unsigned long offset;
408 int r;
409
410 mslice = G_028C6C_SLICE_MAX(track->cb_color_view[id]) + 1;
411 pitch = track->cb_color_pitch[id];
412 slice = track->cb_color_slice[id];
413 surf.nbx = (pitch + 1) * 8;
414 surf.nby = ((slice + 1) * 64) / surf.nbx;
415 surf.mode = G_028C70_ARRAY_MODE(track->cb_color_info[id]);
416 surf.format = G_028C70_FORMAT(track->cb_color_info[id]);
417 surf.tsplit = G_028C74_TILE_SPLIT(track->cb_color_attrib[id]);
418 surf.nbanks = G_028C74_NUM_BANKS(track->cb_color_attrib[id]);
419 surf.bankw = G_028C74_BANK_WIDTH(track->cb_color_attrib[id]);
420 surf.bankh = G_028C74_BANK_HEIGHT(track->cb_color_attrib[id]);
421 surf.mtilea = G_028C74_MACRO_TILE_ASPECT(track->cb_color_attrib[id]);
422 surf.nsamples = 1;
423
424 if (!r600_fmt_is_valid_color(surf.format)) {
425 dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08x)\n",
426 __func__, __LINE__, surf.format,
427 id, track->cb_color_info[id]);
428 return -EINVAL;
429 }
430
431 r = evergreen_surface_value_conv_check(p, &surf, "cb");
432 if (r) {
433 return r;
434 }
435
436 r = evergreen_surface_check(p, &surf, "cb");
437 if (r) {
438 dev_warn(p->dev, "%s:%d cb[%d] invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
439 __func__, __LINE__, id, track->cb_color_pitch[id],
440 track->cb_color_slice[id], track->cb_color_attrib[id],
441 track->cb_color_info[id]);
442 return r;
443 }
444
445 offset = track->cb_color_bo_offset[id] << 8;
446 if (offset & (surf.base_align - 1)) {
447 dev_warn(p->dev, "%s:%d cb[%d] bo base %ld not aligned with %ld\n",
448 __func__, __LINE__, id, offset, surf.base_align);
449 return -EINVAL;
450 }
451
452 offset += surf.layer_size * mslice;
453 if (offset > radeon_bo_size(track->cb_color_bo[id])) {
454 /* old ddx are broken they allocate bo with w*h*bpp but
455 * program slice with ALIGN(h, 8), catch this and patch
456 * command stream.
457 */
458 if (!surf.mode) {
459 uint32_t *ib = p->ib.ptr;
460 unsigned long tmp, nby, bsize, size, min = 0;
461
462 /* find the height the ddx wants */
463 if (surf.nby > 8) {
464 min = surf.nby - 8;
465 }
466 bsize = radeon_bo_size(track->cb_color_bo[id]);
467 tmp = track->cb_color_bo_offset[id] << 8;
468 for (nby = surf.nby; nby > min; nby--) {
469 size = nby * surf.nbx * surf.bpe * surf.nsamples;
470 if ((tmp + size * mslice) <= bsize) {
471 break;
472 }
473 }
474 if (nby > min) {
475 surf.nby = nby;
476 slice = ((nby * surf.nbx) / 64) - 1;
477 if (!evergreen_surface_check(p, &surf, "cb")) {
478 /* check if this one works */
479 tmp += surf.layer_size * mslice;
480 if (tmp <= bsize) {
481 ib[track->cb_color_slice_idx[id]] = slice;
482 goto old_ddx_ok;
483 }
484 }
485 }
486 }
487 dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, "
488 "offset %d, max layer %d, bo size %ld, slice %d)\n",
489 __func__, __LINE__, id, surf.layer_size,
490 track->cb_color_bo_offset[id] << 8, mslice,
491 radeon_bo_size(track->cb_color_bo[id]), slice);
492 dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
493 __func__, __LINE__, surf.nbx, surf.nby,
494 surf.mode, surf.bpe, surf.nsamples,
495 surf.bankw, surf.bankh,
496 surf.tsplit, surf.mtilea);
497 return -EINVAL;
498 }
499 old_ddx_ok:
500
501 return 0;
502 }
503
504 static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
505 unsigned nbx, unsigned nby)
506 {
507 struct evergreen_cs_track *track = p->track;
508 unsigned long size;
509
510 if (track->htile_bo == NULL) {
511 dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n",
512 __func__, __LINE__, track->db_z_info);
513 return -EINVAL;
514 }
515
516 if (G_028ABC_LINEAR(track->htile_surface)) {
517 /* pitch must be 16 htiles aligned == 16 * 8 pixel aligned */
518 nbx = round_up(nbx, 16 * 8);
519 /* height is npipes htiles aligned == npipes * 8 pixel aligned */
520 nby = round_up(nby, track->npipes * 8);
521 } else {
522 /* always assume 8x8 htile */
523 /* align is htile align * 8, htile align vary according to
524 * number of pipe and tile width and nby
525 */
526 switch (track->npipes) {
527 case 8:
528 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
529 nbx = round_up(nbx, 64 * 8);
530 nby = round_up(nby, 64 * 8);
531 break;
532 case 4:
533 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
534 nbx = round_up(nbx, 64 * 8);
535 nby = round_up(nby, 32 * 8);
536 break;
537 case 2:
538 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
539 nbx = round_up(nbx, 32 * 8);
540 nby = round_up(nby, 32 * 8);
541 break;
542 case 1:
543 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
544 nbx = round_up(nbx, 32 * 8);
545 nby = round_up(nby, 16 * 8);
546 break;
547 default:
548 dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
549 __func__, __LINE__, track->npipes);
550 return -EINVAL;
551 }
552 }
553 /* compute number of htile */
554 nbx = nbx >> 3;
555 nby = nby >> 3;
556 /* size must be aligned on npipes * 2K boundary */
557 size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
558 size += track->htile_offset;
559
560 if (!track->htile_bo) {
561 dev_warn(p->dev, "%s:%d htile_bo not set", __func__, __LINE__);
562 return -EINVAL;
563 }
564 if (size > radeon_bo_size(track->htile_bo)) {
565 dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n",
566 __func__, __LINE__, radeon_bo_size(track->htile_bo),
567 size, nbx, nby);
568 return -EINVAL;
569 }
570 return 0;
571 }
572
573 static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p)
574 {
575 struct evergreen_cs_track *track = p->track;
576 struct eg_surface surf;
577 unsigned pitch, slice, mslice;
578 unsigned long offset;
579 int r;
580
581 mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
582 pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
583 slice = track->db_depth_slice;
584 surf.nbx = (pitch + 1) * 8;
585 surf.nby = ((slice + 1) * 64) / surf.nbx;
586 surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
587 surf.format = G_028044_FORMAT(track->db_s_info);
588 surf.tsplit = G_028044_TILE_SPLIT(track->db_s_info);
589 surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
590 surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
591 surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
592 surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
593 surf.nsamples = 1;
594
595 if (surf.format != 1) {
596 dev_warn(p->dev, "%s:%d stencil invalid format %d\n",
597 __func__, __LINE__, surf.format);
598 return -EINVAL;
599 }
600 /* replace by color format so we can use same code */
601 surf.format = V_028C70_COLOR_8;
602
603 r = evergreen_surface_value_conv_check(p, &surf, "stencil");
604 if (r) {
605 return r;
606 }
607
608 r = evergreen_surface_check(p, &surf, NULL);
609 if (r) {
610 /* old userspace doesn't compute proper depth/stencil alignment
611 * check that alignment against a bigger byte per elements and
612 * only report if that alignment is wrong too.
613 */
614 surf.format = V_028C70_COLOR_8_8_8_8;
615 r = evergreen_surface_check(p, &surf, "stencil");
616 if (r) {
617 dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
618 __func__, __LINE__, track->db_depth_size,
619 track->db_depth_slice, track->db_s_info, track->db_z_info);
620 }
621 return r;
622 }
623
624 offset = track->db_s_read_offset << 8;
625 if (offset & (surf.base_align - 1)) {
626 dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
627 __func__, __LINE__, offset, surf.base_align);
628 return -EINVAL;
629 }
630 offset += surf.layer_size * mslice;
631 if (!track->db_s_read_bo) {
632 dev_warn(p->dev, "%s:%d db_s_read_bo not set", __func__, __LINE__);
633 return -EINVAL;
634 }
635 if (offset > radeon_bo_size(track->db_s_read_bo)) {
636 dev_warn(p->dev, "%s:%d stencil read bo too small (layer size %d, "
637 "offset %ld, max layer %d, bo size %ld)\n",
638 __func__, __LINE__, surf.layer_size,
639 (unsigned long)track->db_s_read_offset << 8, mslice,
640 radeon_bo_size(track->db_s_read_bo));
641 dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
642 __func__, __LINE__, track->db_depth_size,
643 track->db_depth_slice, track->db_s_info, track->db_z_info);
644 return -EINVAL;
645 }
646
647 offset = track->db_s_write_offset << 8;
648 if (offset & (surf.base_align - 1)) {
649 dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
650 __func__, __LINE__, offset, surf.base_align);
651 return -EINVAL;
652 }
653 offset += surf.layer_size * mslice;
654 if (!track->db_s_write_bo) {
655 dev_warn(p->dev, "%s:%d db_s_write_bo not set", __func__, __LINE__);
656 return -EINVAL;
657 }
658 if (offset > radeon_bo_size(track->db_s_write_bo)) {
659 dev_warn(p->dev, "%s:%d stencil write bo too small (layer size %d, "
660 "offset %ld, max layer %d, bo size %ld)\n",
661 __func__, __LINE__, surf.layer_size,
662 (unsigned long)track->db_s_write_offset << 8, mslice,
663 radeon_bo_size(track->db_s_write_bo));
664 return -EINVAL;
665 }
666
667 /* hyperz */
668 if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
669 r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
670 if (r) {
671 return r;
672 }
673 }
674
675 return 0;
676 }
677
678 static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p)
679 {
680 struct evergreen_cs_track *track = p->track;
681 struct eg_surface surf;
682 unsigned pitch, slice, mslice;
683 unsigned long offset;
684 int r;
685
686 mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
687 pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
688 slice = track->db_depth_slice;
689 surf.nbx = (pitch + 1) * 8;
690 surf.nby = ((slice + 1) * 64) / surf.nbx;
691 surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
692 surf.format = G_028040_FORMAT(track->db_z_info);
693 surf.tsplit = G_028040_TILE_SPLIT(track->db_z_info);
694 surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
695 surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
696 surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
697 surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
698 surf.nsamples = 1;
699
700 switch (surf.format) {
701 case V_028040_Z_16:
702 surf.format = V_028C70_COLOR_16;
703 break;
704 case V_028040_Z_24:
705 case V_028040_Z_32_FLOAT:
706 surf.format = V_028C70_COLOR_8_8_8_8;
707 break;
708 default:
709 dev_warn(p->dev, "%s:%d depth invalid format %d\n",
710 __func__, __LINE__, surf.format);
711 return -EINVAL;
712 }
713
714 r = evergreen_surface_value_conv_check(p, &surf, "depth");
715 if (r) {
716 dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
717 __func__, __LINE__, track->db_depth_size,
718 track->db_depth_slice, track->db_z_info);
719 return r;
720 }
721
722 r = evergreen_surface_check(p, &surf, "depth");
723 if (r) {
724 dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
725 __func__, __LINE__, track->db_depth_size,
726 track->db_depth_slice, track->db_z_info);
727 return r;
728 }
729
730 offset = track->db_z_read_offset << 8;
731 if (offset & (surf.base_align - 1)) {
732 dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
733 __func__, __LINE__, offset, surf.base_align);
734 return -EINVAL;
735 }
736 offset += surf.layer_size * mslice;
737 if (!track->db_z_read_bo) {
738 dev_warn(p->dev, "%s:%d db_z_read_bo not set", __func__, __LINE__);
739 return -EINVAL;
740 }
741 if (offset > radeon_bo_size(track->db_z_read_bo)) {
742 dev_warn(p->dev, "%s:%d depth read bo too small (layer size %d, "
743 "offset %ld, max layer %d, bo size %ld)\n",
744 __func__, __LINE__, surf.layer_size,
745 (unsigned long)track->db_z_read_offset << 8, mslice,
746 radeon_bo_size(track->db_z_read_bo));
747 return -EINVAL;
748 }
749
750 offset = track->db_z_write_offset << 8;
751 if (offset & (surf.base_align - 1)) {
752 dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
753 __func__, __LINE__, offset, surf.base_align);
754 return -EINVAL;
755 }
756 offset += surf.layer_size * mslice;
757 if (!track->db_z_write_bo) {
758 dev_warn(p->dev, "%s:%d db_z_write_bo not set", __func__, __LINE__);
759 return -EINVAL;
760 }
761 if (offset > radeon_bo_size(track->db_z_write_bo)) {
762 dev_warn(p->dev, "%s:%d depth write bo too small (layer size %d, "
763 "offset %ld, max layer %d, bo size %ld)\n",
764 __func__, __LINE__, surf.layer_size,
765 (unsigned long)track->db_z_write_offset << 8, mslice,
766 radeon_bo_size(track->db_z_write_bo));
767 return -EINVAL;
768 }
769
770 /* hyperz */
771 if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
772 r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
773 if (r) {
774 return r;
775 }
776 }
777
778 return 0;
779 }
780
781 static int evergreen_cs_track_validate_texture(struct radeon_cs_parser *p,
782 struct radeon_bo *texture,
783 struct radeon_bo *mipmap,
784 unsigned idx)
785 {
786 struct eg_surface surf;
787 unsigned long toffset, moffset;
788 unsigned dim, llevel, mslice, width, height, depth, i;
789 u32 texdw[8];
790 int r;
791
792 texdw[0] = radeon_get_ib_value(p, idx + 0);
793 texdw[1] = radeon_get_ib_value(p, idx + 1);
794 texdw[2] = radeon_get_ib_value(p, idx + 2);
795 texdw[3] = radeon_get_ib_value(p, idx + 3);
796 texdw[4] = radeon_get_ib_value(p, idx + 4);
797 texdw[5] = radeon_get_ib_value(p, idx + 5);
798 texdw[6] = radeon_get_ib_value(p, idx + 6);
799 texdw[7] = radeon_get_ib_value(p, idx + 7);
800 dim = G_030000_DIM(texdw[0]);
801 llevel = G_030014_LAST_LEVEL(texdw[5]);
802 mslice = G_030014_LAST_ARRAY(texdw[5]) + 1;
803 width = G_030000_TEX_WIDTH(texdw[0]) + 1;
804 height = G_030004_TEX_HEIGHT(texdw[1]) + 1;
805 depth = G_030004_TEX_DEPTH(texdw[1]) + 1;
806 surf.format = G_03001C_DATA_FORMAT(texdw[7]);
807 surf.nbx = (G_030000_PITCH(texdw[0]) + 1) * 8;
808 surf.nbx = r600_fmt_get_nblocksx(surf.format, surf.nbx);
809 surf.nby = r600_fmt_get_nblocksy(surf.format, height);
810 surf.mode = G_030004_ARRAY_MODE(texdw[1]);
811 surf.tsplit = G_030018_TILE_SPLIT(texdw[6]);
812 surf.nbanks = G_03001C_NUM_BANKS(texdw[7]);
813 surf.bankw = G_03001C_BANK_WIDTH(texdw[7]);
814 surf.bankh = G_03001C_BANK_HEIGHT(texdw[7]);
815 surf.mtilea = G_03001C_MACRO_TILE_ASPECT(texdw[7]);
816 surf.nsamples = 1;
817 toffset = texdw[2] << 8;
818 moffset = texdw[3] << 8;
819
820 if (!r600_fmt_is_valid_texture(surf.format, p->family)) {
821 dev_warn(p->dev, "%s:%d texture invalid format %d\n",
822 __func__, __LINE__, surf.format);
823 return -EINVAL;
824 }
825 switch (dim) {
826 case V_030000_SQ_TEX_DIM_1D:
827 case V_030000_SQ_TEX_DIM_2D:
828 case V_030000_SQ_TEX_DIM_CUBEMAP:
829 case V_030000_SQ_TEX_DIM_1D_ARRAY:
830 case V_030000_SQ_TEX_DIM_2D_ARRAY:
831 depth = 1;
832 break;
833 case V_030000_SQ_TEX_DIM_2D_MSAA:
834 case V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA:
835 surf.nsamples = 1 << llevel;
836 llevel = 0;
837 depth = 1;
838 break;
839 case V_030000_SQ_TEX_DIM_3D:
840 break;
841 default:
842 dev_warn(p->dev, "%s:%d texture invalid dimension %d\n",
843 __func__, __LINE__, dim);
844 return -EINVAL;
845 }
846
847 r = evergreen_surface_value_conv_check(p, &surf, "texture");
848 if (r) {
849 return r;
850 }
851
852 /* align height */
853 evergreen_surface_check(p, &surf, NULL);
854 surf.nby = ALIGN(surf.nby, surf.halign);
855
856 r = evergreen_surface_check(p, &surf, "texture");
857 if (r) {
858 dev_warn(p->dev, "%s:%d texture invalid 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
859 __func__, __LINE__, texdw[0], texdw[1], texdw[4],
860 texdw[5], texdw[6], texdw[7]);
861 return r;
862 }
863
864 /* check texture size */
865 if (toffset & (surf.base_align - 1)) {
866 dev_warn(p->dev, "%s:%d texture bo base %ld not aligned with %ld\n",
867 __func__, __LINE__, toffset, surf.base_align);
868 return -EINVAL;
869 }
870 if (surf.nsamples <= 1 && moffset & (surf.base_align - 1)) {
871 dev_warn(p->dev, "%s:%d mipmap bo base %ld not aligned with %ld\n",
872 __func__, __LINE__, moffset, surf.base_align);
873 return -EINVAL;
874 }
875 if (dim == SQ_TEX_DIM_3D) {
876 toffset += surf.layer_size * depth;
877 } else {
878 toffset += surf.layer_size * mslice;
879 }
880 if (toffset > radeon_bo_size(texture)) {
881 dev_warn(p->dev, "%s:%d texture bo too small (layer size %d, "
882 "offset %ld, max layer %d, depth %d, bo size %ld) (%d %d)\n",
883 __func__, __LINE__, surf.layer_size,
884 (unsigned long)texdw[2] << 8, mslice,
885 depth, radeon_bo_size(texture),
886 surf.nbx, surf.nby);
887 return -EINVAL;
888 }
889
890 if (!mipmap) {
891 if (llevel) {
892 dev_warn(p->dev, "%s:%i got NULL MIP_ADDRESS relocation\n",
893 __func__, __LINE__);
894 return -EINVAL;
895 } else {
896 return 0; /* everything's ok */
897 }
898 }
899
900 /* check mipmap size */
901 for (i = 1; i <= llevel; i++) {
902 unsigned w, h, d;
903
904 w = r600_mip_minify(width, i);
905 h = r600_mip_minify(height, i);
906 d = r600_mip_minify(depth, i);
907 surf.nbx = r600_fmt_get_nblocksx(surf.format, w);
908 surf.nby = r600_fmt_get_nblocksy(surf.format, h);
909
910 switch (surf.mode) {
911 case ARRAY_2D_TILED_THIN1:
912 if (surf.nbx < surf.palign || surf.nby < surf.halign) {
913 surf.mode = ARRAY_1D_TILED_THIN1;
914 }
915 /* recompute alignment */
916 evergreen_surface_check(p, &surf, NULL);
917 break;
918 case ARRAY_LINEAR_GENERAL:
919 case ARRAY_LINEAR_ALIGNED:
920 case ARRAY_1D_TILED_THIN1:
921 break;
922 default:
923 dev_warn(p->dev, "%s:%d invalid array mode %d\n",
924 __func__, __LINE__, surf.mode);
925 return -EINVAL;
926 }
927 surf.nbx = ALIGN(surf.nbx, surf.palign);
928 surf.nby = ALIGN(surf.nby, surf.halign);
929
930 r = evergreen_surface_check(p, &surf, "mipmap");
931 if (r) {
932 return r;
933 }
934
935 if (dim == SQ_TEX_DIM_3D) {
936 moffset += surf.layer_size * d;
937 } else {
938 moffset += surf.layer_size * mslice;
939 }
940 if (moffset > radeon_bo_size(mipmap)) {
941 dev_warn(p->dev, "%s:%d mipmap [%d] bo too small (layer size %d, "
942 "offset %ld, coffset %ld, max layer %d, depth %d, "
943 "bo size %ld) level0 (%d %d %d)\n",
944 __func__, __LINE__, i, surf.layer_size,
945 (unsigned long)texdw[3] << 8, moffset, mslice,
946 d, radeon_bo_size(mipmap),
947 width, height, depth);
948 dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
949 __func__, __LINE__, surf.nbx, surf.nby,
950 surf.mode, surf.bpe, surf.nsamples,
951 surf.bankw, surf.bankh,
952 surf.tsplit, surf.mtilea);
953 return -EINVAL;
954 }
955 }
956
957 return 0;
958 }
959
960 static int evergreen_cs_track_check(struct radeon_cs_parser *p)
961 {
962 struct evergreen_cs_track *track = p->track;
963 unsigned tmp, i;
964 int r;
965 unsigned buffer_mask = 0;
966
967 /* check streamout */
968 if (track->streamout_dirty && track->vgt_strmout_config) {
969 for (i = 0; i < 4; i++) {
970 if (track->vgt_strmout_config & (1 << i)) {
971 buffer_mask |= (track->vgt_strmout_buffer_config >> (i * 4)) & 0xf;
972 }
973 }
974
975 for (i = 0; i < 4; i++) {
976 if (buffer_mask & (1 << i)) {
977 if (track->vgt_strmout_bo[i]) {
978 u64 offset = (u64)track->vgt_strmout_bo_offset[i] +
979 (u64)track->vgt_strmout_size[i];
980 if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) {
981 DRM_ERROR("streamout %d bo too small: 0x%"PRIx64", 0x%lx\n",
982 i, offset,
983 radeon_bo_size(track->vgt_strmout_bo[i]));
984 return -EINVAL;
985 }
986 } else {
987 dev_warn(p->dev, "No buffer for streamout %d\n", i);
988 return -EINVAL;
989 }
990 }
991 }
992 track->streamout_dirty = false;
993 }
994
995 if (track->sx_misc_kill_all_prims)
996 return 0;
997
998 /* check that we have a cb for each enabled target
999 */
1000 if (track->cb_dirty) {
1001 tmp = track->cb_target_mask;
1002 for (i = 0; i < 8; i++) {
1003 u32 format = G_028C70_FORMAT(track->cb_color_info[i]);
1004
1005 if (format != V_028C70_COLOR_INVALID &&
1006 (tmp >> (i * 4)) & 0xF) {
1007 /* at least one component is enabled */
1008 if (track->cb_color_bo[i] == NULL) {
1009 dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
1010 __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i);
1011 return -EINVAL;
1012 }
1013 /* check cb */
1014 r = evergreen_cs_track_validate_cb(p, i);
1015 if (r) {
1016 return r;
1017 }
1018 }
1019 }
1020 track->cb_dirty = false;
1021 }
1022
1023 if (track->db_dirty) {
1024 /* Check stencil buffer */
1025 if (G_028044_FORMAT(track->db_s_info) != V_028044_STENCIL_INVALID &&
1026 G_028800_STENCIL_ENABLE(track->db_depth_control)) {
1027 r = evergreen_cs_track_validate_stencil(p);
1028 if (r)
1029 return r;
1030 }
1031 /* Check depth buffer */
1032 if (G_028040_FORMAT(track->db_z_info) != V_028040_Z_INVALID &&
1033 G_028800_Z_ENABLE(track->db_depth_control)) {
1034 r = evergreen_cs_track_validate_depth(p);
1035 if (r)
1036 return r;
1037 }
1038 track->db_dirty = false;
1039 }
1040
1041 return 0;
1042 }
1043
1044 /**
1045 * evergreen_cs_packet_parse_vline() - parse userspace VLINE packet
1046 * @parser: parser structure holding parsing context.
1047 *
1048 * This is an Evergreen(+)-specific function for parsing VLINE packets.
1049 * Real work is done by r600_cs_common_vline_parse function.
1050 * Here we just set up ASIC-specific register table and call
1051 * the common implementation function.
1052 */
1053 static int evergreen_cs_packet_parse_vline(struct radeon_cs_parser *p)
1054 {
1055
1056 static uint32_t vline_start_end[6] = {
1057 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC0_REGISTER_OFFSET,
1058 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC1_REGISTER_OFFSET,
1059 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC2_REGISTER_OFFSET,
1060 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC3_REGISTER_OFFSET,
1061 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC4_REGISTER_OFFSET,
1062 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC5_REGISTER_OFFSET
1063 };
1064 static uint32_t vline_status[6] = {
1065 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
1066 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
1067 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
1068 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
1069 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
1070 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET
1071 };
1072
1073 return r600_cs_common_vline_parse(p, vline_start_end, vline_status);
1074 }
1075
1076 static int evergreen_packet0_check(struct radeon_cs_parser *p,
1077 struct radeon_cs_packet *pkt,
1078 unsigned idx, unsigned reg)
1079 {
1080 int r;
1081
1082 switch (reg) {
1083 case EVERGREEN_VLINE_START_END:
1084 r = evergreen_cs_packet_parse_vline(p);
1085 if (r) {
1086 DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1087 idx, reg);
1088 return r;
1089 }
1090 break;
1091 default:
1092 printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
1093 reg, idx);
1094 return -EINVAL;
1095 }
1096 return 0;
1097 }
1098
1099 static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p,
1100 struct radeon_cs_packet *pkt)
1101 {
1102 unsigned reg, i;
1103 unsigned idx;
1104 int r;
1105
1106 idx = pkt->idx + 1;
1107 reg = pkt->reg;
1108 for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
1109 r = evergreen_packet0_check(p, pkt, idx, reg);
1110 if (r) {
1111 return r;
1112 }
1113 }
1114 return 0;
1115 }
1116
1117 /**
1118 * evergreen_cs_handle_reg() - process registers that need special handling.
1119 * @parser: parser structure holding parsing context
1120 * @reg: register we are testing
1121 * @idx: index into the cs buffer
1122 */
1123 static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1124 {
1125 struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track;
1126 struct radeon_bo_list *reloc;
1127 u32 tmp, *ib;
1128 int r;
1129
1130 ib = p->ib.ptr;
1131 switch (reg) {
1132 /* force following reg to 0 in an attempt to disable out buffer
1133 * which will need us to better understand how it works to perform
1134 * security check on it (Jerome)
1135 */
1136 case SQ_ESGS_RING_SIZE:
1137 case SQ_GSVS_RING_SIZE:
1138 case SQ_ESTMP_RING_SIZE:
1139 case SQ_GSTMP_RING_SIZE:
1140 case SQ_HSTMP_RING_SIZE:
1141 case SQ_LSTMP_RING_SIZE:
1142 case SQ_PSTMP_RING_SIZE:
1143 case SQ_VSTMP_RING_SIZE:
1144 case SQ_ESGS_RING_ITEMSIZE:
1145 case SQ_ESTMP_RING_ITEMSIZE:
1146 case SQ_GSTMP_RING_ITEMSIZE:
1147 case SQ_GSVS_RING_ITEMSIZE:
1148 case SQ_GS_VERT_ITEMSIZE:
1149 case SQ_GS_VERT_ITEMSIZE_1:
1150 case SQ_GS_VERT_ITEMSIZE_2:
1151 case SQ_GS_VERT_ITEMSIZE_3:
1152 case SQ_GSVS_RING_OFFSET_1:
1153 case SQ_GSVS_RING_OFFSET_2:
1154 case SQ_GSVS_RING_OFFSET_3:
1155 case SQ_HSTMP_RING_ITEMSIZE:
1156 case SQ_LSTMP_RING_ITEMSIZE:
1157 case SQ_PSTMP_RING_ITEMSIZE:
1158 case SQ_VSTMP_RING_ITEMSIZE:
1159 case VGT_TF_RING_SIZE:
1160 /* get value to populate the IB don't remove */
1161 /*tmp =radeon_get_ib_value(p, idx);
1162 ib[idx] = 0;*/
1163 break;
1164 case SQ_ESGS_RING_BASE:
1165 case SQ_GSVS_RING_BASE:
1166 case SQ_ESTMP_RING_BASE:
1167 case SQ_GSTMP_RING_BASE:
1168 case SQ_HSTMP_RING_BASE:
1169 case SQ_LSTMP_RING_BASE:
1170 case SQ_PSTMP_RING_BASE:
1171 case SQ_VSTMP_RING_BASE:
1172 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1173 if (r) {
1174 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1175 "0x%04X\n", reg);
1176 return -EINVAL;
1177 }
1178 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1179 break;
1180 case DB_DEPTH_CONTROL:
1181 track->db_depth_control = radeon_get_ib_value(p, idx);
1182 track->db_dirty = true;
1183 break;
1184 case CAYMAN_DB_EQAA:
1185 if (p->rdev->family < CHIP_CAYMAN) {
1186 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1187 "0x%04X\n", reg);
1188 return -EINVAL;
1189 }
1190 break;
1191 case CAYMAN_DB_DEPTH_INFO:
1192 if (p->rdev->family < CHIP_CAYMAN) {
1193 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1194 "0x%04X\n", reg);
1195 return -EINVAL;
1196 }
1197 break;
1198 case DB_Z_INFO:
1199 track->db_z_info = radeon_get_ib_value(p, idx);
1200 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1201 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1202 if (r) {
1203 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1204 "0x%04X\n", reg);
1205 return -EINVAL;
1206 }
1207 ib[idx] &= ~Z_ARRAY_MODE(0xf);
1208 track->db_z_info &= ~Z_ARRAY_MODE(0xf);
1209 ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1210 track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1211 if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1212 unsigned bankw, bankh, mtaspect, tile_split;
1213
1214 evergreen_tiling_fields(reloc->tiling_flags,
1215 &bankw, &bankh, &mtaspect,
1216 &tile_split);
1217 ib[idx] |= DB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1218 ib[idx] |= DB_TILE_SPLIT(tile_split) |
1219 DB_BANK_WIDTH(bankw) |
1220 DB_BANK_HEIGHT(bankh) |
1221 DB_MACRO_TILE_ASPECT(mtaspect);
1222 }
1223 }
1224 track->db_dirty = true;
1225 break;
1226 case DB_STENCIL_INFO:
1227 track->db_s_info = radeon_get_ib_value(p, idx);
1228 track->db_dirty = true;
1229 break;
1230 case DB_DEPTH_VIEW:
1231 track->db_depth_view = radeon_get_ib_value(p, idx);
1232 track->db_dirty = true;
1233 break;
1234 case DB_DEPTH_SIZE:
1235 track->db_depth_size = radeon_get_ib_value(p, idx);
1236 track->db_dirty = true;
1237 break;
1238 case R_02805C_DB_DEPTH_SLICE:
1239 track->db_depth_slice = radeon_get_ib_value(p, idx);
1240 track->db_dirty = true;
1241 break;
1242 case DB_Z_READ_BASE:
1243 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1244 if (r) {
1245 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1246 "0x%04X\n", reg);
1247 return -EINVAL;
1248 }
1249 track->db_z_read_offset = radeon_get_ib_value(p, idx);
1250 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1251 track->db_z_read_bo = reloc->robj;
1252 track->db_dirty = true;
1253 break;
1254 case DB_Z_WRITE_BASE:
1255 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1256 if (r) {
1257 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1258 "0x%04X\n", reg);
1259 return -EINVAL;
1260 }
1261 track->db_z_write_offset = radeon_get_ib_value(p, idx);
1262 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1263 track->db_z_write_bo = reloc->robj;
1264 track->db_dirty = true;
1265 break;
1266 case DB_STENCIL_READ_BASE:
1267 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1268 if (r) {
1269 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1270 "0x%04X\n", reg);
1271 return -EINVAL;
1272 }
1273 track->db_s_read_offset = radeon_get_ib_value(p, idx);
1274 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1275 track->db_s_read_bo = reloc->robj;
1276 track->db_dirty = true;
1277 break;
1278 case DB_STENCIL_WRITE_BASE:
1279 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1280 if (r) {
1281 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1282 "0x%04X\n", reg);
1283 return -EINVAL;
1284 }
1285 track->db_s_write_offset = radeon_get_ib_value(p, idx);
1286 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1287 track->db_s_write_bo = reloc->robj;
1288 track->db_dirty = true;
1289 break;
1290 case VGT_STRMOUT_CONFIG:
1291 track->vgt_strmout_config = radeon_get_ib_value(p, idx);
1292 track->streamout_dirty = true;
1293 break;
1294 case VGT_STRMOUT_BUFFER_CONFIG:
1295 track->vgt_strmout_buffer_config = radeon_get_ib_value(p, idx);
1296 track->streamout_dirty = true;
1297 break;
1298 case VGT_STRMOUT_BUFFER_BASE_0:
1299 case VGT_STRMOUT_BUFFER_BASE_1:
1300 case VGT_STRMOUT_BUFFER_BASE_2:
1301 case VGT_STRMOUT_BUFFER_BASE_3:
1302 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1303 if (r) {
1304 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1305 "0x%04X\n", reg);
1306 return -EINVAL;
1307 }
1308 tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16;
1309 track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
1310 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1311 track->vgt_strmout_bo[tmp] = reloc->robj;
1312 track->streamout_dirty = true;
1313 break;
1314 case VGT_STRMOUT_BUFFER_SIZE_0:
1315 case VGT_STRMOUT_BUFFER_SIZE_1:
1316 case VGT_STRMOUT_BUFFER_SIZE_2:
1317 case VGT_STRMOUT_BUFFER_SIZE_3:
1318 tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16;
1319 /* size in register is DWs, convert to bytes */
1320 track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4;
1321 track->streamout_dirty = true;
1322 break;
1323 case CP_COHER_BASE:
1324 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1325 if (r) {
1326 dev_warn(p->dev, "missing reloc for CP_COHER_BASE "
1327 "0x%04X\n", reg);
1328 return -EINVAL;
1329 }
1330 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1331 break;
1332 case CB_TARGET_MASK:
1333 track->cb_target_mask = radeon_get_ib_value(p, idx);
1334 track->cb_dirty = true;
1335 break;
1336 case CB_SHADER_MASK:
1337 track->cb_shader_mask = radeon_get_ib_value(p, idx);
1338 track->cb_dirty = true;
1339 break;
1340 case PA_SC_AA_CONFIG:
1341 if (p->rdev->family >= CHIP_CAYMAN) {
1342 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1343 "0x%04X\n", reg);
1344 return -EINVAL;
1345 }
1346 tmp = radeon_get_ib_value(p, idx) & MSAA_NUM_SAMPLES_MASK;
1347 track->nsamples = 1 << tmp;
1348 break;
1349 case CAYMAN_PA_SC_AA_CONFIG:
1350 if (p->rdev->family < CHIP_CAYMAN) {
1351 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1352 "0x%04X\n", reg);
1353 return -EINVAL;
1354 }
1355 tmp = radeon_get_ib_value(p, idx) & CAYMAN_MSAA_NUM_SAMPLES_MASK;
1356 track->nsamples = 1 << tmp;
1357 break;
1358 case CB_COLOR0_VIEW:
1359 case CB_COLOR1_VIEW:
1360 case CB_COLOR2_VIEW:
1361 case CB_COLOR3_VIEW:
1362 case CB_COLOR4_VIEW:
1363 case CB_COLOR5_VIEW:
1364 case CB_COLOR6_VIEW:
1365 case CB_COLOR7_VIEW:
1366 tmp = (reg - CB_COLOR0_VIEW) / 0x3c;
1367 track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1368 track->cb_dirty = true;
1369 break;
1370 case CB_COLOR8_VIEW:
1371 case CB_COLOR9_VIEW:
1372 case CB_COLOR10_VIEW:
1373 case CB_COLOR11_VIEW:
1374 tmp = ((reg - CB_COLOR8_VIEW) / 0x1c) + 8;
1375 track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1376 track->cb_dirty = true;
1377 break;
1378 case CB_COLOR0_INFO:
1379 case CB_COLOR1_INFO:
1380 case CB_COLOR2_INFO:
1381 case CB_COLOR3_INFO:
1382 case CB_COLOR4_INFO:
1383 case CB_COLOR5_INFO:
1384 case CB_COLOR6_INFO:
1385 case CB_COLOR7_INFO:
1386 tmp = (reg - CB_COLOR0_INFO) / 0x3c;
1387 track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1388 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1389 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1390 if (r) {
1391 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1392 "0x%04X\n", reg);
1393 return -EINVAL;
1394 }
1395 ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1396 track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1397 }
1398 track->cb_dirty = true;
1399 break;
1400 case CB_COLOR8_INFO:
1401 case CB_COLOR9_INFO:
1402 case CB_COLOR10_INFO:
1403 case CB_COLOR11_INFO:
1404 tmp = ((reg - CB_COLOR8_INFO) / 0x1c) + 8;
1405 track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1406 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1407 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1408 if (r) {
1409 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1410 "0x%04X\n", reg);
1411 return -EINVAL;
1412 }
1413 ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1414 track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1415 }
1416 track->cb_dirty = true;
1417 break;
1418 case CB_COLOR0_PITCH:
1419 case CB_COLOR1_PITCH:
1420 case CB_COLOR2_PITCH:
1421 case CB_COLOR3_PITCH:
1422 case CB_COLOR4_PITCH:
1423 case CB_COLOR5_PITCH:
1424 case CB_COLOR6_PITCH:
1425 case CB_COLOR7_PITCH:
1426 tmp = (reg - CB_COLOR0_PITCH) / 0x3c;
1427 track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1428 track->cb_dirty = true;
1429 break;
1430 case CB_COLOR8_PITCH:
1431 case CB_COLOR9_PITCH:
1432 case CB_COLOR10_PITCH:
1433 case CB_COLOR11_PITCH:
1434 tmp = ((reg - CB_COLOR8_PITCH) / 0x1c) + 8;
1435 track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1436 track->cb_dirty = true;
1437 break;
1438 case CB_COLOR0_SLICE:
1439 case CB_COLOR1_SLICE:
1440 case CB_COLOR2_SLICE:
1441 case CB_COLOR3_SLICE:
1442 case CB_COLOR4_SLICE:
1443 case CB_COLOR5_SLICE:
1444 case CB_COLOR6_SLICE:
1445 case CB_COLOR7_SLICE:
1446 tmp = (reg - CB_COLOR0_SLICE) / 0x3c;
1447 track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1448 track->cb_color_slice_idx[tmp] = idx;
1449 track->cb_dirty = true;
1450 break;
1451 case CB_COLOR8_SLICE:
1452 case CB_COLOR9_SLICE:
1453 case CB_COLOR10_SLICE:
1454 case CB_COLOR11_SLICE:
1455 tmp = ((reg - CB_COLOR8_SLICE) / 0x1c) + 8;
1456 track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1457 track->cb_color_slice_idx[tmp] = idx;
1458 track->cb_dirty = true;
1459 break;
1460 case CB_COLOR0_ATTRIB:
1461 case CB_COLOR1_ATTRIB:
1462 case CB_COLOR2_ATTRIB:
1463 case CB_COLOR3_ATTRIB:
1464 case CB_COLOR4_ATTRIB:
1465 case CB_COLOR5_ATTRIB:
1466 case CB_COLOR6_ATTRIB:
1467 case CB_COLOR7_ATTRIB:
1468 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1469 if (r) {
1470 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1471 "0x%04X\n", reg);
1472 return -EINVAL;
1473 }
1474 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1475 if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1476 unsigned bankw, bankh, mtaspect, tile_split;
1477
1478 evergreen_tiling_fields(reloc->tiling_flags,
1479 &bankw, &bankh, &mtaspect,
1480 &tile_split);
1481 ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1482 ib[idx] |= CB_TILE_SPLIT(tile_split) |
1483 CB_BANK_WIDTH(bankw) |
1484 CB_BANK_HEIGHT(bankh) |
1485 CB_MACRO_TILE_ASPECT(mtaspect);
1486 }
1487 }
1488 tmp = ((reg - CB_COLOR0_ATTRIB) / 0x3c);
1489 track->cb_color_attrib[tmp] = ib[idx];
1490 track->cb_dirty = true;
1491 break;
1492 case CB_COLOR8_ATTRIB:
1493 case CB_COLOR9_ATTRIB:
1494 case CB_COLOR10_ATTRIB:
1495 case CB_COLOR11_ATTRIB:
1496 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1497 if (r) {
1498 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1499 "0x%04X\n", reg);
1500 return -EINVAL;
1501 }
1502 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1503 if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1504 unsigned bankw, bankh, mtaspect, tile_split;
1505
1506 evergreen_tiling_fields(reloc->tiling_flags,
1507 &bankw, &bankh, &mtaspect,
1508 &tile_split);
1509 ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1510 ib[idx] |= CB_TILE_SPLIT(tile_split) |
1511 CB_BANK_WIDTH(bankw) |
1512 CB_BANK_HEIGHT(bankh) |
1513 CB_MACRO_TILE_ASPECT(mtaspect);
1514 }
1515 }
1516 tmp = ((reg - CB_COLOR8_ATTRIB) / 0x1c) + 8;
1517 track->cb_color_attrib[tmp] = ib[idx];
1518 track->cb_dirty = true;
1519 break;
1520 case CB_COLOR0_FMASK:
1521 case CB_COLOR1_FMASK:
1522 case CB_COLOR2_FMASK:
1523 case CB_COLOR3_FMASK:
1524 case CB_COLOR4_FMASK:
1525 case CB_COLOR5_FMASK:
1526 case CB_COLOR6_FMASK:
1527 case CB_COLOR7_FMASK:
1528 tmp = (reg - CB_COLOR0_FMASK) / 0x3c;
1529 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1530 if (r) {
1531 dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1532 return -EINVAL;
1533 }
1534 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1535 track->cb_color_fmask_bo[tmp] = reloc->robj;
1536 break;
1537 case CB_COLOR0_CMASK:
1538 case CB_COLOR1_CMASK:
1539 case CB_COLOR2_CMASK:
1540 case CB_COLOR3_CMASK:
1541 case CB_COLOR4_CMASK:
1542 case CB_COLOR5_CMASK:
1543 case CB_COLOR6_CMASK:
1544 case CB_COLOR7_CMASK:
1545 tmp = (reg - CB_COLOR0_CMASK) / 0x3c;
1546 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1547 if (r) {
1548 dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1549 return -EINVAL;
1550 }
1551 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1552 track->cb_color_cmask_bo[tmp] = reloc->robj;
1553 break;
1554 case CB_COLOR0_FMASK_SLICE:
1555 case CB_COLOR1_FMASK_SLICE:
1556 case CB_COLOR2_FMASK_SLICE:
1557 case CB_COLOR3_FMASK_SLICE:
1558 case CB_COLOR4_FMASK_SLICE:
1559 case CB_COLOR5_FMASK_SLICE:
1560 case CB_COLOR6_FMASK_SLICE:
1561 case CB_COLOR7_FMASK_SLICE:
1562 tmp = (reg - CB_COLOR0_FMASK_SLICE) / 0x3c;
1563 track->cb_color_fmask_slice[tmp] = radeon_get_ib_value(p, idx);
1564 break;
1565 case CB_COLOR0_CMASK_SLICE:
1566 case CB_COLOR1_CMASK_SLICE:
1567 case CB_COLOR2_CMASK_SLICE:
1568 case CB_COLOR3_CMASK_SLICE:
1569 case CB_COLOR4_CMASK_SLICE:
1570 case CB_COLOR5_CMASK_SLICE:
1571 case CB_COLOR6_CMASK_SLICE:
1572 case CB_COLOR7_CMASK_SLICE:
1573 tmp = (reg - CB_COLOR0_CMASK_SLICE) / 0x3c;
1574 track->cb_color_cmask_slice[tmp] = radeon_get_ib_value(p, idx);
1575 break;
1576 case CB_COLOR0_BASE:
1577 case CB_COLOR1_BASE:
1578 case CB_COLOR2_BASE:
1579 case CB_COLOR3_BASE:
1580 case CB_COLOR4_BASE:
1581 case CB_COLOR5_BASE:
1582 case CB_COLOR6_BASE:
1583 case CB_COLOR7_BASE:
1584 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1585 if (r) {
1586 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1587 "0x%04X\n", reg);
1588 return -EINVAL;
1589 }
1590 tmp = (reg - CB_COLOR0_BASE) / 0x3c;
1591 track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1592 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1593 track->cb_color_bo[tmp] = reloc->robj;
1594 track->cb_dirty = true;
1595 break;
1596 case CB_COLOR8_BASE:
1597 case CB_COLOR9_BASE:
1598 case CB_COLOR10_BASE:
1599 case CB_COLOR11_BASE:
1600 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1601 if (r) {
1602 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1603 "0x%04X\n", reg);
1604 return -EINVAL;
1605 }
1606 tmp = ((reg - CB_COLOR8_BASE) / 0x1c) + 8;
1607 track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1608 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1609 track->cb_color_bo[tmp] = reloc->robj;
1610 track->cb_dirty = true;
1611 break;
1612 case DB_HTILE_DATA_BASE:
1613 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1614 if (r) {
1615 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1616 "0x%04X\n", reg);
1617 return -EINVAL;
1618 }
1619 track->htile_offset = radeon_get_ib_value(p, idx);
1620 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1621 track->htile_bo = reloc->robj;
1622 track->db_dirty = true;
1623 break;
1624 case DB_HTILE_SURFACE:
1625 /* 8x8 only */
1626 track->htile_surface = radeon_get_ib_value(p, idx);
1627 /* force 8x8 htile width and height */
1628 ib[idx] |= 3;
1629 track->db_dirty = true;
1630 break;
1631 case CB_IMMED0_BASE:
1632 case CB_IMMED1_BASE:
1633 case CB_IMMED2_BASE:
1634 case CB_IMMED3_BASE:
1635 case CB_IMMED4_BASE:
1636 case CB_IMMED5_BASE:
1637 case CB_IMMED6_BASE:
1638 case CB_IMMED7_BASE:
1639 case CB_IMMED8_BASE:
1640 case CB_IMMED9_BASE:
1641 case CB_IMMED10_BASE:
1642 case CB_IMMED11_BASE:
1643 case SQ_PGM_START_FS:
1644 case SQ_PGM_START_ES:
1645 case SQ_PGM_START_VS:
1646 case SQ_PGM_START_GS:
1647 case SQ_PGM_START_PS:
1648 case SQ_PGM_START_HS:
1649 case SQ_PGM_START_LS:
1650 case SQ_CONST_MEM_BASE:
1651 case SQ_ALU_CONST_CACHE_GS_0:
1652 case SQ_ALU_CONST_CACHE_GS_1:
1653 case SQ_ALU_CONST_CACHE_GS_2:
1654 case SQ_ALU_CONST_CACHE_GS_3:
1655 case SQ_ALU_CONST_CACHE_GS_4:
1656 case SQ_ALU_CONST_CACHE_GS_5:
1657 case SQ_ALU_CONST_CACHE_GS_6:
1658 case SQ_ALU_CONST_CACHE_GS_7:
1659 case SQ_ALU_CONST_CACHE_GS_8:
1660 case SQ_ALU_CONST_CACHE_GS_9:
1661 case SQ_ALU_CONST_CACHE_GS_10:
1662 case SQ_ALU_CONST_CACHE_GS_11:
1663 case SQ_ALU_CONST_CACHE_GS_12:
1664 case SQ_ALU_CONST_CACHE_GS_13:
1665 case SQ_ALU_CONST_CACHE_GS_14:
1666 case SQ_ALU_CONST_CACHE_GS_15:
1667 case SQ_ALU_CONST_CACHE_PS_0:
1668 case SQ_ALU_CONST_CACHE_PS_1:
1669 case SQ_ALU_CONST_CACHE_PS_2:
1670 case SQ_ALU_CONST_CACHE_PS_3:
1671 case SQ_ALU_CONST_CACHE_PS_4:
1672 case SQ_ALU_CONST_CACHE_PS_5:
1673 case SQ_ALU_CONST_CACHE_PS_6:
1674 case SQ_ALU_CONST_CACHE_PS_7:
1675 case SQ_ALU_CONST_CACHE_PS_8:
1676 case SQ_ALU_CONST_CACHE_PS_9:
1677 case SQ_ALU_CONST_CACHE_PS_10:
1678 case SQ_ALU_CONST_CACHE_PS_11:
1679 case SQ_ALU_CONST_CACHE_PS_12:
1680 case SQ_ALU_CONST_CACHE_PS_13:
1681 case SQ_ALU_CONST_CACHE_PS_14:
1682 case SQ_ALU_CONST_CACHE_PS_15:
1683 case SQ_ALU_CONST_CACHE_VS_0:
1684 case SQ_ALU_CONST_CACHE_VS_1:
1685 case SQ_ALU_CONST_CACHE_VS_2:
1686 case SQ_ALU_CONST_CACHE_VS_3:
1687 case SQ_ALU_CONST_CACHE_VS_4:
1688 case SQ_ALU_CONST_CACHE_VS_5:
1689 case SQ_ALU_CONST_CACHE_VS_6:
1690 case SQ_ALU_CONST_CACHE_VS_7:
1691 case SQ_ALU_CONST_CACHE_VS_8:
1692 case SQ_ALU_CONST_CACHE_VS_9:
1693 case SQ_ALU_CONST_CACHE_VS_10:
1694 case SQ_ALU_CONST_CACHE_VS_11:
1695 case SQ_ALU_CONST_CACHE_VS_12:
1696 case SQ_ALU_CONST_CACHE_VS_13:
1697 case SQ_ALU_CONST_CACHE_VS_14:
1698 case SQ_ALU_CONST_CACHE_VS_15:
1699 case SQ_ALU_CONST_CACHE_HS_0:
1700 case SQ_ALU_CONST_CACHE_HS_1:
1701 case SQ_ALU_CONST_CACHE_HS_2:
1702 case SQ_ALU_CONST_CACHE_HS_3:
1703 case SQ_ALU_CONST_CACHE_HS_4:
1704 case SQ_ALU_CONST_CACHE_HS_5:
1705 case SQ_ALU_CONST_CACHE_HS_6:
1706 case SQ_ALU_CONST_CACHE_HS_7:
1707 case SQ_ALU_CONST_CACHE_HS_8:
1708 case SQ_ALU_CONST_CACHE_HS_9:
1709 case SQ_ALU_CONST_CACHE_HS_10:
1710 case SQ_ALU_CONST_CACHE_HS_11:
1711 case SQ_ALU_CONST_CACHE_HS_12:
1712 case SQ_ALU_CONST_CACHE_HS_13:
1713 case SQ_ALU_CONST_CACHE_HS_14:
1714 case SQ_ALU_CONST_CACHE_HS_15:
1715 case SQ_ALU_CONST_CACHE_LS_0:
1716 case SQ_ALU_CONST_CACHE_LS_1:
1717 case SQ_ALU_CONST_CACHE_LS_2:
1718 case SQ_ALU_CONST_CACHE_LS_3:
1719 case SQ_ALU_CONST_CACHE_LS_4:
1720 case SQ_ALU_CONST_CACHE_LS_5:
1721 case SQ_ALU_CONST_CACHE_LS_6:
1722 case SQ_ALU_CONST_CACHE_LS_7:
1723 case SQ_ALU_CONST_CACHE_LS_8:
1724 case SQ_ALU_CONST_CACHE_LS_9:
1725 case SQ_ALU_CONST_CACHE_LS_10:
1726 case SQ_ALU_CONST_CACHE_LS_11:
1727 case SQ_ALU_CONST_CACHE_LS_12:
1728 case SQ_ALU_CONST_CACHE_LS_13:
1729 case SQ_ALU_CONST_CACHE_LS_14:
1730 case SQ_ALU_CONST_CACHE_LS_15:
1731 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1732 if (r) {
1733 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1734 "0x%04X\n", reg);
1735 return -EINVAL;
1736 }
1737 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1738 break;
1739 case SX_MEMORY_EXPORT_BASE:
1740 if (p->rdev->family >= CHIP_CAYMAN) {
1741 dev_warn(p->dev, "bad SET_CONFIG_REG "
1742 "0x%04X\n", reg);
1743 return -EINVAL;
1744 }
1745 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1746 if (r) {
1747 dev_warn(p->dev, "bad SET_CONFIG_REG "
1748 "0x%04X\n", reg);
1749 return -EINVAL;
1750 }
1751 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1752 break;
1753 case CAYMAN_SX_SCATTER_EXPORT_BASE:
1754 if (p->rdev->family < CHIP_CAYMAN) {
1755 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1756 "0x%04X\n", reg);
1757 return -EINVAL;
1758 }
1759 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1760 if (r) {
1761 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1762 "0x%04X\n", reg);
1763 return -EINVAL;
1764 }
1765 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1766 break;
1767 case SX_MISC:
1768 track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0;
1769 break;
1770 default:
1771 dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1772 return -EINVAL;
1773 }
1774 return 0;
1775 }
1776
1777 /**
1778 * evergreen_is_safe_reg() - check if register is authorized or not
1779 * @parser: parser structure holding parsing context
1780 * @reg: register we are testing
1781 *
1782 * This function will test against reg_safe_bm and return true
1783 * if register is safe or false otherwise.
1784 */
1785 static inline bool evergreen_is_safe_reg(struct radeon_cs_parser *p, u32 reg)
1786 {
1787 struct evergreen_cs_track *track = p->track;
1788 u32 m, i;
1789
1790 i = (reg >> 7);
1791 if (unlikely(i >= REG_SAFE_BM_SIZE)) {
1792 return false;
1793 }
1794 m = 1 << ((reg >> 2) & 31);
1795 if (!(track->reg_safe_bm[i] & m))
1796 return true;
1797
1798 return false;
1799 }
1800
1801 static int evergreen_packet3_check(struct radeon_cs_parser *p,
1802 struct radeon_cs_packet *pkt)
1803 {
1804 struct radeon_bo_list *reloc;
1805 struct evergreen_cs_track *track;
1806 uint32_t *ib;
1807 unsigned idx;
1808 unsigned i;
1809 unsigned start_reg, end_reg, reg;
1810 int r;
1811 u32 idx_value;
1812
1813 track = (struct evergreen_cs_track *)p->track;
1814 ib = p->ib.ptr;
1815 idx = pkt->idx + 1;
1816 idx_value = radeon_get_ib_value(p, idx);
1817
1818 switch (pkt->opcode) {
1819 case PACKET3_SET_PREDICATION:
1820 {
1821 int pred_op;
1822 int tmp;
1823 uint64_t offset;
1824
1825 if (pkt->count != 1) {
1826 DRM_ERROR("bad SET PREDICATION\n");
1827 return -EINVAL;
1828 }
1829
1830 tmp = radeon_get_ib_value(p, idx + 1);
1831 pred_op = (tmp >> 16) & 0x7;
1832
1833 /* for the clear predicate operation */
1834 if (pred_op == 0)
1835 return 0;
1836
1837 if (pred_op > 2) {
1838 DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op);
1839 return -EINVAL;
1840 }
1841
1842 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1843 if (r) {
1844 DRM_ERROR("bad SET PREDICATION\n");
1845 return -EINVAL;
1846 }
1847
1848 offset = reloc->gpu_offset +
1849 (idx_value & 0xfffffff0) +
1850 ((u64)(tmp & 0xff) << 32);
1851
1852 ib[idx + 0] = offset;
1853 ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff);
1854 }
1855 break;
1856 case PACKET3_CONTEXT_CONTROL:
1857 if (pkt->count != 1) {
1858 DRM_ERROR("bad CONTEXT_CONTROL\n");
1859 return -EINVAL;
1860 }
1861 break;
1862 case PACKET3_INDEX_TYPE:
1863 case PACKET3_NUM_INSTANCES:
1864 case PACKET3_CLEAR_STATE:
1865 if (pkt->count) {
1866 DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
1867 return -EINVAL;
1868 }
1869 break;
1870 case CAYMAN_PACKET3_DEALLOC_STATE:
1871 if (p->rdev->family < CHIP_CAYMAN) {
1872 DRM_ERROR("bad PACKET3_DEALLOC_STATE\n");
1873 return -EINVAL;
1874 }
1875 if (pkt->count) {
1876 DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
1877 return -EINVAL;
1878 }
1879 break;
1880 case PACKET3_INDEX_BASE:
1881 {
1882 uint64_t offset;
1883
1884 if (pkt->count != 1) {
1885 DRM_ERROR("bad INDEX_BASE\n");
1886 return -EINVAL;
1887 }
1888 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1889 if (r) {
1890 DRM_ERROR("bad INDEX_BASE\n");
1891 return -EINVAL;
1892 }
1893
1894 offset = reloc->gpu_offset +
1895 idx_value +
1896 ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1897
1898 ib[idx+0] = offset;
1899 ib[idx+1] = upper_32_bits(offset) & 0xff;
1900
1901 r = evergreen_cs_track_check(p);
1902 if (r) {
1903 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1904 return r;
1905 }
1906 break;
1907 }
1908 case PACKET3_INDEX_BUFFER_SIZE:
1909 {
1910 if (pkt->count != 0) {
1911 DRM_ERROR("bad INDEX_BUFFER_SIZE\n");
1912 return -EINVAL;
1913 }
1914 break;
1915 }
1916 case PACKET3_DRAW_INDEX:
1917 {
1918 uint64_t offset;
1919 if (pkt->count != 3) {
1920 DRM_ERROR("bad DRAW_INDEX\n");
1921 return -EINVAL;
1922 }
1923 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1924 if (r) {
1925 DRM_ERROR("bad DRAW_INDEX\n");
1926 return -EINVAL;
1927 }
1928
1929 offset = reloc->gpu_offset +
1930 idx_value +
1931 ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1932
1933 ib[idx+0] = offset;
1934 ib[idx+1] = upper_32_bits(offset) & 0xff;
1935
1936 r = evergreen_cs_track_check(p);
1937 if (r) {
1938 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1939 return r;
1940 }
1941 break;
1942 }
1943 case PACKET3_DRAW_INDEX_2:
1944 {
1945 uint64_t offset;
1946
1947 if (pkt->count != 4) {
1948 DRM_ERROR("bad DRAW_INDEX_2\n");
1949 return -EINVAL;
1950 }
1951 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1952 if (r) {
1953 DRM_ERROR("bad DRAW_INDEX_2\n");
1954 return -EINVAL;
1955 }
1956
1957 offset = reloc->gpu_offset +
1958 radeon_get_ib_value(p, idx+1) +
1959 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
1960
1961 ib[idx+1] = offset;
1962 ib[idx+2] = upper_32_bits(offset) & 0xff;
1963
1964 r = evergreen_cs_track_check(p);
1965 if (r) {
1966 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1967 return r;
1968 }
1969 break;
1970 }
1971 case PACKET3_DRAW_INDEX_AUTO:
1972 if (pkt->count != 1) {
1973 DRM_ERROR("bad DRAW_INDEX_AUTO\n");
1974 return -EINVAL;
1975 }
1976 r = evergreen_cs_track_check(p);
1977 if (r) {
1978 dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
1979 return r;
1980 }
1981 break;
1982 case PACKET3_DRAW_INDEX_MULTI_AUTO:
1983 if (pkt->count != 2) {
1984 DRM_ERROR("bad DRAW_INDEX_MULTI_AUTO\n");
1985 return -EINVAL;
1986 }
1987 r = evergreen_cs_track_check(p);
1988 if (r) {
1989 dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
1990 return r;
1991 }
1992 break;
1993 case PACKET3_DRAW_INDEX_IMMD:
1994 if (pkt->count < 2) {
1995 DRM_ERROR("bad DRAW_INDEX_IMMD\n");
1996 return -EINVAL;
1997 }
1998 r = evergreen_cs_track_check(p);
1999 if (r) {
2000 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2001 return r;
2002 }
2003 break;
2004 case PACKET3_DRAW_INDEX_OFFSET:
2005 if (pkt->count != 2) {
2006 DRM_ERROR("bad DRAW_INDEX_OFFSET\n");
2007 return -EINVAL;
2008 }
2009 r = evergreen_cs_track_check(p);
2010 if (r) {
2011 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2012 return r;
2013 }
2014 break;
2015 case PACKET3_DRAW_INDEX_OFFSET_2:
2016 if (pkt->count != 3) {
2017 DRM_ERROR("bad DRAW_INDEX_OFFSET_2\n");
2018 return -EINVAL;
2019 }
2020 r = evergreen_cs_track_check(p);
2021 if (r) {
2022 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2023 return r;
2024 }
2025 break;
2026 case PACKET3_SET_BASE:
2027 {
2028 /*
2029 DW 1 HEADER Header of the packet. Shader_Type in bit 1 of the Header will correspond to the shader type of the Load, see Type-3 Packet.
2030 2 BASE_INDEX Bits [3:0] BASE_INDEX - Base Index specifies which base address is specified in the last two DWs.
2031 0001: DX11 Draw_Index_Indirect Patch Table Base: Base address for Draw_Index_Indirect data.
2032 3 ADDRESS_LO Bits [31:3] - Lower bits of QWORD-Aligned Address. Bits [2:0] - Reserved
2033 4 ADDRESS_HI Bits [31:8] - Reserved. Bits [7:0] - Upper bits of Address [47:32]
2034 */
2035 if (pkt->count != 2) {
2036 DRM_ERROR("bad SET_BASE\n");
2037 return -EINVAL;
2038 }
2039
2040 /* currently only supporting setting indirect draw buffer base address */
2041 if (idx_value != 1) {
2042 DRM_ERROR("bad SET_BASE\n");
2043 return -EINVAL;
2044 }
2045
2046 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2047 if (r) {
2048 DRM_ERROR("bad SET_BASE\n");
2049 return -EINVAL;
2050 }
2051
2052 track->indirect_draw_buffer_size = radeon_bo_size(reloc->robj);
2053
2054 ib[idx+1] = reloc->gpu_offset;
2055 ib[idx+2] = upper_32_bits(reloc->gpu_offset) & 0xff;
2056
2057 break;
2058 }
2059 case PACKET3_DRAW_INDIRECT:
2060 case PACKET3_DRAW_INDEX_INDIRECT:
2061 {
2062 u64 size = pkt->opcode == PACKET3_DRAW_INDIRECT ? 16 : 20;
2063
2064 /*
2065 DW 1 HEADER
2066 2 DATA_OFFSET Bits [31:0] + byte aligned offset where the required data structure starts. Bits 1:0 are zero
2067 3 DRAW_INITIATOR Draw Initiator Register. Written to the VGT_DRAW_INITIATOR register for the assigned context
2068 */
2069 if (pkt->count != 1) {
2070 DRM_ERROR("bad DRAW_INDIRECT\n");
2071 return -EINVAL;
2072 }
2073
2074 if (idx_value + size > track->indirect_draw_buffer_size) {
2075 dev_warn(p->dev, "DRAW_INDIRECT buffer too small %u + %"PRIx64" > %lu\n",
2076 idx_value, size, track->indirect_draw_buffer_size);
2077 return -EINVAL;
2078 }
2079
2080 r = evergreen_cs_track_check(p);
2081 if (r) {
2082 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2083 return r;
2084 }
2085 break;
2086 }
2087 case PACKET3_DISPATCH_DIRECT:
2088 if (pkt->count != 3) {
2089 DRM_ERROR("bad DISPATCH_DIRECT\n");
2090 return -EINVAL;
2091 }
2092 r = evergreen_cs_track_check(p);
2093 if (r) {
2094 dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
2095 return r;
2096 }
2097 break;
2098 case PACKET3_DISPATCH_INDIRECT:
2099 if (pkt->count != 1) {
2100 DRM_ERROR("bad DISPATCH_INDIRECT\n");
2101 return -EINVAL;
2102 }
2103 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2104 if (r) {
2105 DRM_ERROR("bad DISPATCH_INDIRECT\n");
2106 return -EINVAL;
2107 }
2108 ib[idx+0] = idx_value + (u32)(reloc->gpu_offset & 0xffffffff);
2109 r = evergreen_cs_track_check(p);
2110 if (r) {
2111 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2112 return r;
2113 }
2114 break;
2115 case PACKET3_WAIT_REG_MEM:
2116 if (pkt->count != 5) {
2117 DRM_ERROR("bad WAIT_REG_MEM\n");
2118 return -EINVAL;
2119 }
2120 /* bit 4 is reg (0) or mem (1) */
2121 if (idx_value & 0x10) {
2122 uint64_t offset;
2123
2124 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2125 if (r) {
2126 DRM_ERROR("bad WAIT_REG_MEM\n");
2127 return -EINVAL;
2128 }
2129
2130 offset = reloc->gpu_offset +
2131 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2132 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2133
2134 ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc);
2135 ib[idx+2] = upper_32_bits(offset) & 0xff;
2136 } else if (idx_value & 0x100) {
2137 DRM_ERROR("cannot use PFP on REG wait\n");
2138 return -EINVAL;
2139 }
2140 break;
2141 case PACKET3_CP_DMA:
2142 {
2143 u32 command, size, info;
2144 u64 offset, tmp;
2145 if (pkt->count != 4) {
2146 DRM_ERROR("bad CP DMA\n");
2147 return -EINVAL;
2148 }
2149 command = radeon_get_ib_value(p, idx+4);
2150 size = command & 0x1fffff;
2151 info = radeon_get_ib_value(p, idx+1);
2152 if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
2153 (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
2154 ((((info & 0x00300000) >> 20) == 0) &&
2155 (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
2156 ((((info & 0x60000000) >> 29) == 0) &&
2157 (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
2158 /* non mem to mem copies requires dw aligned count */
2159 if (size % 4) {
2160 DRM_ERROR("CP DMA command requires dw count alignment\n");
2161 return -EINVAL;
2162 }
2163 }
2164 if (command & PACKET3_CP_DMA_CMD_SAS) {
2165 /* src address space is register */
2166 /* GDS is ok */
2167 if (((info & 0x60000000) >> 29) != 1) {
2168 DRM_ERROR("CP DMA SAS not supported\n");
2169 return -EINVAL;
2170 }
2171 } else {
2172 if (command & PACKET3_CP_DMA_CMD_SAIC) {
2173 DRM_ERROR("CP DMA SAIC only supported for registers\n");
2174 return -EINVAL;
2175 }
2176 /* src address space is memory */
2177 if (((info & 0x60000000) >> 29) == 0) {
2178 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2179 if (r) {
2180 DRM_ERROR("bad CP DMA SRC\n");
2181 return -EINVAL;
2182 }
2183
2184 tmp = radeon_get_ib_value(p, idx) +
2185 ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
2186
2187 offset = reloc->gpu_offset + tmp;
2188
2189 if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2190 dev_warn(p->dev, "CP DMA src buffer too small (%"PRIu64" %lu)\n",
2191 tmp + size, radeon_bo_size(reloc->robj));
2192 return -EINVAL;
2193 }
2194
2195 ib[idx] = offset;
2196 ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2197 } else if (((info & 0x60000000) >> 29) != 2) {
2198 DRM_ERROR("bad CP DMA SRC_SEL\n");
2199 return -EINVAL;
2200 }
2201 }
2202 if (command & PACKET3_CP_DMA_CMD_DAS) {
2203 /* dst address space is register */
2204 /* GDS is ok */
2205 if (((info & 0x00300000) >> 20) != 1) {
2206 DRM_ERROR("CP DMA DAS not supported\n");
2207 return -EINVAL;
2208 }
2209 } else {
2210 /* dst address space is memory */
2211 if (command & PACKET3_CP_DMA_CMD_DAIC) {
2212 DRM_ERROR("CP DMA DAIC only supported for registers\n");
2213 return -EINVAL;
2214 }
2215 if (((info & 0x00300000) >> 20) == 0) {
2216 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2217 if (r) {
2218 DRM_ERROR("bad CP DMA DST\n");
2219 return -EINVAL;
2220 }
2221
2222 tmp = radeon_get_ib_value(p, idx+2) +
2223 ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
2224
2225 offset = reloc->gpu_offset + tmp;
2226
2227 if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2228 dev_warn(p->dev, "CP DMA dst buffer too small (%"PRIu64" %lu)\n",
2229 tmp + size, radeon_bo_size(reloc->robj));
2230 return -EINVAL;
2231 }
2232
2233 ib[idx+2] = offset;
2234 ib[idx+3] = upper_32_bits(offset) & 0xff;
2235 } else {
2236 DRM_ERROR("bad CP DMA DST_SEL\n");
2237 return -EINVAL;
2238 }
2239 }
2240 break;
2241 }
2242 case PACKET3_SURFACE_SYNC:
2243 if (pkt->count != 3) {
2244 DRM_ERROR("bad SURFACE_SYNC\n");
2245 return -EINVAL;
2246 }
2247 /* 0xffffffff/0x0 is flush all cache flag */
2248 if (radeon_get_ib_value(p, idx + 1) != 0xffffffff ||
2249 radeon_get_ib_value(p, idx + 2) != 0) {
2250 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2251 if (r) {
2252 DRM_ERROR("bad SURFACE_SYNC\n");
2253 return -EINVAL;
2254 }
2255 ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2256 }
2257 break;
2258 case PACKET3_EVENT_WRITE:
2259 if (pkt->count != 2 && pkt->count != 0) {
2260 DRM_ERROR("bad EVENT_WRITE\n");
2261 return -EINVAL;
2262 }
2263 if (pkt->count) {
2264 uint64_t offset;
2265
2266 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2267 if (r) {
2268 DRM_ERROR("bad EVENT_WRITE\n");
2269 return -EINVAL;
2270 }
2271 offset = reloc->gpu_offset +
2272 (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
2273 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2274
2275 ib[idx+1] = offset & 0xfffffff8;
2276 ib[idx+2] = upper_32_bits(offset) & 0xff;
2277 }
2278 break;
2279 case PACKET3_EVENT_WRITE_EOP:
2280 {
2281 uint64_t offset;
2282
2283 if (pkt->count != 4) {
2284 DRM_ERROR("bad EVENT_WRITE_EOP\n");
2285 return -EINVAL;
2286 }
2287 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2288 if (r) {
2289 DRM_ERROR("bad EVENT_WRITE_EOP\n");
2290 return -EINVAL;
2291 }
2292
2293 offset = reloc->gpu_offset +
2294 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2295 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2296
2297 ib[idx+1] = offset & 0xfffffffc;
2298 ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2299 break;
2300 }
2301 case PACKET3_EVENT_WRITE_EOS:
2302 {
2303 uint64_t offset;
2304
2305 if (pkt->count != 3) {
2306 DRM_ERROR("bad EVENT_WRITE_EOS\n");
2307 return -EINVAL;
2308 }
2309 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2310 if (r) {
2311 DRM_ERROR("bad EVENT_WRITE_EOS\n");
2312 return -EINVAL;
2313 }
2314
2315 offset = reloc->gpu_offset +
2316 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2317 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2318
2319 ib[idx+1] = offset & 0xfffffffc;
2320 ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2321 break;
2322 }
2323 case PACKET3_SET_CONFIG_REG:
2324 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
2325 end_reg = 4 * pkt->count + start_reg - 4;
2326 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
2327 (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
2328 (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
2329 DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2330 return -EINVAL;
2331 }
2332 for (reg = start_reg, idx++; reg <= end_reg; reg += 4, idx++) {
2333 if (evergreen_is_safe_reg(p, reg))
2334 continue;
2335 r = evergreen_cs_handle_reg(p, reg, idx);
2336 if (r)
2337 return r;
2338 }
2339 break;
2340 case PACKET3_SET_CONTEXT_REG:
2341 start_reg = (idx_value << 2) + PACKET3_SET_CONTEXT_REG_START;
2342 end_reg = 4 * pkt->count + start_reg - 4;
2343 if ((start_reg < PACKET3_SET_CONTEXT_REG_START) ||
2344 (start_reg >= PACKET3_SET_CONTEXT_REG_END) ||
2345 (end_reg >= PACKET3_SET_CONTEXT_REG_END)) {
2346 DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n");
2347 return -EINVAL;
2348 }
2349 for (reg = start_reg, idx++; reg <= end_reg; reg += 4, idx++) {
2350 if (evergreen_is_safe_reg(p, reg))
2351 continue;
2352 r = evergreen_cs_handle_reg(p, reg, idx);
2353 if (r)
2354 return r;
2355 }
2356 break;
2357 case PACKET3_SET_RESOURCE:
2358 if (pkt->count % 8) {
2359 DRM_ERROR("bad SET_RESOURCE\n");
2360 return -EINVAL;
2361 }
2362 start_reg = (idx_value << 2) + PACKET3_SET_RESOURCE_START;
2363 end_reg = 4 * pkt->count + start_reg - 4;
2364 if ((start_reg < PACKET3_SET_RESOURCE_START) ||
2365 (start_reg >= PACKET3_SET_RESOURCE_END) ||
2366 (end_reg >= PACKET3_SET_RESOURCE_END)) {
2367 DRM_ERROR("bad SET_RESOURCE\n");
2368 return -EINVAL;
2369 }
2370 for (i = 0; i < (pkt->count / 8); i++) {
2371 struct radeon_bo *texture, *mipmap;
2372 u32 toffset, moffset;
2373 u32 size, offset, mip_address, tex_dim;
2374
2375 switch (G__SQ_CONSTANT_TYPE(radeon_get_ib_value(p, idx+1+(i*8)+7))) {
2376 case SQ_TEX_VTX_VALID_TEXTURE:
2377 /* tex base */
2378 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2379 if (r) {
2380 DRM_ERROR("bad SET_RESOURCE (tex)\n");
2381 return -EINVAL;
2382 }
2383 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
2384 ib[idx+1+(i*8)+1] |=
2385 TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
2386 if (reloc->tiling_flags & RADEON_TILING_MACRO) {
2387 unsigned bankw, bankh, mtaspect, tile_split;
2388
2389 evergreen_tiling_fields(reloc->tiling_flags,
2390 &bankw, &bankh, &mtaspect,
2391 &tile_split);
2392 ib[idx+1+(i*8)+6] |= TEX_TILE_SPLIT(tile_split);
2393 ib[idx+1+(i*8)+7] |=
2394 TEX_BANK_WIDTH(bankw) |
2395 TEX_BANK_HEIGHT(bankh) |
2396 MACRO_TILE_ASPECT(mtaspect) |
2397 TEX_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
2398 }
2399 }
2400 texture = reloc->robj;
2401 toffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2402
2403 /* tex mip base */
2404 tex_dim = ib[idx+1+(i*8)+0] & 0x7;
2405 mip_address = ib[idx+1+(i*8)+3];
2406
2407 if ((tex_dim == SQ_TEX_DIM_2D_MSAA || tex_dim == SQ_TEX_DIM_2D_ARRAY_MSAA) &&
2408 !mip_address &&
2409 !radeon_cs_packet_next_is_pkt3_nop(p)) {
2410 /* MIP_ADDRESS should point to FMASK for an MSAA texture.
2411 * It should be 0 if FMASK is disabled. */
2412 moffset = 0;
2413 mipmap = NULL;
2414 } else {
2415 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2416 if (r) {
2417 DRM_ERROR("bad SET_RESOURCE (tex)\n");
2418 return -EINVAL;
2419 }
2420 moffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2421 mipmap = reloc->robj;
2422 }
2423
2424 r = evergreen_cs_track_validate_texture(p, texture, mipmap, idx+1+(i*8));
2425 if (r)
2426 return r;
2427 ib[idx+1+(i*8)+2] += toffset;
2428 ib[idx+1+(i*8)+3] += moffset;
2429 break;
2430 case SQ_TEX_VTX_VALID_BUFFER:
2431 {
2432 uint64_t offset64;
2433 /* vtx base */
2434 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2435 if (r) {
2436 DRM_ERROR("bad SET_RESOURCE (vtx)\n");
2437 return -EINVAL;
2438 }
2439 offset = radeon_get_ib_value(p, idx+1+(i*8)+0);
2440 size = radeon_get_ib_value(p, idx+1+(i*8)+1);
2441 if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) {
2442 /* force size to size of the buffer */
2443 dev_warn(p->dev, "vbo resource seems too big for the bo\n");
2444 ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset;
2445 }
2446
2447 offset64 = reloc->gpu_offset + offset;
2448 ib[idx+1+(i*8)+0] = offset64;
2449 ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) |
2450 (upper_32_bits(offset64) & 0xff);
2451 break;
2452 }
2453 case SQ_TEX_VTX_INVALID_TEXTURE:
2454 case SQ_TEX_VTX_INVALID_BUFFER:
2455 default:
2456 DRM_ERROR("bad SET_RESOURCE\n");
2457 return -EINVAL;
2458 }
2459 }
2460 break;
2461 case PACKET3_SET_ALU_CONST:
2462 /* XXX fix me ALU const buffers only */
2463 break;
2464 case PACKET3_SET_BOOL_CONST:
2465 start_reg = (idx_value << 2) + PACKET3_SET_BOOL_CONST_START;
2466 end_reg = 4 * pkt->count + start_reg - 4;
2467 if ((start_reg < PACKET3_SET_BOOL_CONST_START) ||
2468 (start_reg >= PACKET3_SET_BOOL_CONST_END) ||
2469 (end_reg >= PACKET3_SET_BOOL_CONST_END)) {
2470 DRM_ERROR("bad SET_BOOL_CONST\n");
2471 return -EINVAL;
2472 }
2473 break;
2474 case PACKET3_SET_LOOP_CONST:
2475 start_reg = (idx_value << 2) + PACKET3_SET_LOOP_CONST_START;
2476 end_reg = 4 * pkt->count + start_reg - 4;
2477 if ((start_reg < PACKET3_SET_LOOP_CONST_START) ||
2478 (start_reg >= PACKET3_SET_LOOP_CONST_END) ||
2479 (end_reg >= PACKET3_SET_LOOP_CONST_END)) {
2480 DRM_ERROR("bad SET_LOOP_CONST\n");
2481 return -EINVAL;
2482 }
2483 break;
2484 case PACKET3_SET_CTL_CONST:
2485 start_reg = (idx_value << 2) + PACKET3_SET_CTL_CONST_START;
2486 end_reg = 4 * pkt->count + start_reg - 4;
2487 if ((start_reg < PACKET3_SET_CTL_CONST_START) ||
2488 (start_reg >= PACKET3_SET_CTL_CONST_END) ||
2489 (end_reg >= PACKET3_SET_CTL_CONST_END)) {
2490 DRM_ERROR("bad SET_CTL_CONST\n");
2491 return -EINVAL;
2492 }
2493 break;
2494 case PACKET3_SET_SAMPLER:
2495 if (pkt->count % 3) {
2496 DRM_ERROR("bad SET_SAMPLER\n");
2497 return -EINVAL;
2498 }
2499 start_reg = (idx_value << 2) + PACKET3_SET_SAMPLER_START;
2500 end_reg = 4 * pkt->count + start_reg - 4;
2501 if ((start_reg < PACKET3_SET_SAMPLER_START) ||
2502 (start_reg >= PACKET3_SET_SAMPLER_END) ||
2503 (end_reg >= PACKET3_SET_SAMPLER_END)) {
2504 DRM_ERROR("bad SET_SAMPLER\n");
2505 return -EINVAL;
2506 }
2507 break;
2508 case PACKET3_STRMOUT_BUFFER_UPDATE:
2509 if (pkt->count != 4) {
2510 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n");
2511 return -EINVAL;
2512 }
2513 /* Updating memory at DST_ADDRESS. */
2514 if (idx_value & 0x1) {
2515 u64 offset;
2516 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2517 if (r) {
2518 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n");
2519 return -EINVAL;
2520 }
2521 offset = radeon_get_ib_value(p, idx+1);
2522 offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2523 if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2524 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%"PRIx64", 0x%lx\n",
2525 offset + 4, radeon_bo_size(reloc->robj));
2526 return -EINVAL;
2527 }
2528 offset += reloc->gpu_offset;
2529 ib[idx+1] = offset;
2530 ib[idx+2] = upper_32_bits(offset) & 0xff;
2531 }
2532 /* Reading data from SRC_ADDRESS. */
2533 if (((idx_value >> 1) & 0x3) == 2) {
2534 u64 offset;
2535 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2536 if (r) {
2537 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n");
2538 return -EINVAL;
2539 }
2540 offset = radeon_get_ib_value(p, idx+3);
2541 offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2542 if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2543 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%"PRIx64", 0x%lx\n",
2544 offset + 4, radeon_bo_size(reloc->robj));
2545 return -EINVAL;
2546 }
2547 offset += reloc->gpu_offset;
2548 ib[idx+3] = offset;
2549 ib[idx+4] = upper_32_bits(offset) & 0xff;
2550 }
2551 break;
2552 case PACKET3_MEM_WRITE:
2553 {
2554 u64 offset;
2555
2556 if (pkt->count != 3) {
2557 DRM_ERROR("bad MEM_WRITE (invalid count)\n");
2558 return -EINVAL;
2559 }
2560 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2561 if (r) {
2562 DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
2563 return -EINVAL;
2564 }
2565 offset = radeon_get_ib_value(p, idx+0);
2566 offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
2567 if (offset & 0x7) {
2568 DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n");
2569 return -EINVAL;
2570 }
2571 if ((offset + 8) > radeon_bo_size(reloc->robj)) {
2572 DRM_ERROR("bad MEM_WRITE bo too small: 0x%"PRIx64", 0x%lx\n",
2573 offset + 8, radeon_bo_size(reloc->robj));
2574 return -EINVAL;
2575 }
2576 offset += reloc->gpu_offset;
2577 ib[idx+0] = offset;
2578 ib[idx+1] = upper_32_bits(offset) & 0xff;
2579 break;
2580 }
2581 case PACKET3_COPY_DW:
2582 if (pkt->count != 4) {
2583 DRM_ERROR("bad COPY_DW (invalid count)\n");
2584 return -EINVAL;
2585 }
2586 if (idx_value & 0x1) {
2587 u64 offset;
2588 /* SRC is memory. */
2589 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2590 if (r) {
2591 DRM_ERROR("bad COPY_DW (missing src reloc)\n");
2592 return -EINVAL;
2593 }
2594 offset = radeon_get_ib_value(p, idx+1);
2595 offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2596 if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2597 DRM_ERROR("bad COPY_DW src bo too small: 0x%"PRIx64", 0x%lx\n",
2598 offset + 4, radeon_bo_size(reloc->robj));
2599 return -EINVAL;
2600 }
2601 offset += reloc->gpu_offset;
2602 ib[idx+1] = offset;
2603 ib[idx+2] = upper_32_bits(offset) & 0xff;
2604 } else {
2605 /* SRC is a reg. */
2606 reg = radeon_get_ib_value(p, idx+1) << 2;
2607 if (!evergreen_is_safe_reg(p, reg)) {
2608 dev_warn(p->dev, "forbidden register 0x%08x at %d\n",
2609 reg, idx + 1);
2610 return -EINVAL;
2611 }
2612 }
2613 if (idx_value & 0x2) {
2614 u64 offset;
2615 /* DST is memory. */
2616 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2617 if (r) {
2618 DRM_ERROR("bad COPY_DW (missing dst reloc)\n");
2619 return -EINVAL;
2620 }
2621 offset = radeon_get_ib_value(p, idx+3);
2622 offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2623 if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2624 DRM_ERROR("bad COPY_DW dst bo too small: 0x%"PRIx64", 0x%lx\n",
2625 offset + 4, radeon_bo_size(reloc->robj));
2626 return -EINVAL;
2627 }
2628 offset += reloc->gpu_offset;
2629 ib[idx+3] = offset;
2630 ib[idx+4] = upper_32_bits(offset) & 0xff;
2631 } else {
2632 /* DST is a reg. */
2633 reg = radeon_get_ib_value(p, idx+3) << 2;
2634 if (!evergreen_is_safe_reg(p, reg)) {
2635 dev_warn(p->dev, "forbidden register 0x%08x at %d\n",
2636 reg, idx + 3);
2637 return -EINVAL;
2638 }
2639 }
2640 break;
2641 case PACKET3_NOP:
2642 break;
2643 default:
2644 DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
2645 return -EINVAL;
2646 }
2647 return 0;
2648 }
2649
2650 int evergreen_cs_parse(struct radeon_cs_parser *p)
2651 {
2652 struct radeon_cs_packet pkt;
2653 struct evergreen_cs_track *track;
2654 u32 tmp;
2655 int r;
2656
2657 if (p->track == NULL) {
2658 /* initialize tracker, we are in kms */
2659 track = kzalloc(sizeof(*track), GFP_KERNEL);
2660 if (track == NULL)
2661 return -ENOMEM;
2662 evergreen_cs_track_init(track);
2663 if (p->rdev->family >= CHIP_CAYMAN) {
2664 tmp = p->rdev->config.cayman.tile_config;
2665 track->reg_safe_bm = cayman_reg_safe_bm;
2666 } else {
2667 tmp = p->rdev->config.evergreen.tile_config;
2668 track->reg_safe_bm = evergreen_reg_safe_bm;
2669 }
2670 BUILD_BUG_ON(ARRAY_SIZE(cayman_reg_safe_bm) != REG_SAFE_BM_SIZE);
2671 BUILD_BUG_ON(ARRAY_SIZE(evergreen_reg_safe_bm) != REG_SAFE_BM_SIZE);
2672 switch (tmp & 0xf) {
2673 case 0:
2674 track->npipes = 1;
2675 break;
2676 case 1:
2677 default:
2678 track->npipes = 2;
2679 break;
2680 case 2:
2681 track->npipes = 4;
2682 break;
2683 case 3:
2684 track->npipes = 8;
2685 break;
2686 }
2687
2688 switch ((tmp & 0xf0) >> 4) {
2689 case 0:
2690 track->nbanks = 4;
2691 break;
2692 case 1:
2693 default:
2694 track->nbanks = 8;
2695 break;
2696 case 2:
2697 track->nbanks = 16;
2698 break;
2699 }
2700
2701 switch ((tmp & 0xf00) >> 8) {
2702 case 0:
2703 track->group_size = 256;
2704 break;
2705 case 1:
2706 default:
2707 track->group_size = 512;
2708 break;
2709 }
2710
2711 switch ((tmp & 0xf000) >> 12) {
2712 case 0:
2713 track->row_size = 1;
2714 break;
2715 case 1:
2716 default:
2717 track->row_size = 2;
2718 break;
2719 case 2:
2720 track->row_size = 4;
2721 break;
2722 }
2723
2724 p->track = track;
2725 }
2726 do {
2727 r = radeon_cs_packet_parse(p, &pkt, p->idx);
2728 if (r) {
2729 kfree(p->track);
2730 p->track = NULL;
2731 return r;
2732 }
2733 p->idx += pkt.count + 2;
2734 switch (pkt.type) {
2735 case RADEON_PACKET_TYPE0:
2736 r = evergreen_cs_parse_packet0(p, &pkt);
2737 break;
2738 case RADEON_PACKET_TYPE2:
2739 break;
2740 case RADEON_PACKET_TYPE3:
2741 r = evergreen_packet3_check(p, &pkt);
2742 break;
2743 default:
2744 DRM_ERROR("Unknown packet type %d !\n", pkt.type);
2745 kfree(p->track);
2746 p->track = NULL;
2747 return -EINVAL;
2748 }
2749 if (r) {
2750 kfree(p->track);
2751 p->track = NULL;
2752 return r;
2753 }
2754 } while (p->idx < p->chunk_ib->length_dw);
2755 #if 0
2756 for (r = 0; r < p->ib.length_dw; r++) {
2757 printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
2758 mdelay(1);
2759 }
2760 #endif
2761 kfree(p->track);
2762 p->track = NULL;
2763 return 0;
2764 }
2765
2766 /**
2767 * evergreen_dma_cs_parse() - parse the DMA IB
2768 * @p: parser structure holding parsing context.
2769 *
2770 * Parses the DMA IB from the CS ioctl and updates
2771 * the GPU addresses based on the reloc information and
2772 * checks for errors. (Evergreen-Cayman)
2773 * Returns 0 for success and an error on failure.
2774 **/
2775 int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
2776 {
2777 struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
2778 struct radeon_bo_list *src_reloc, *dst_reloc, *dst2_reloc;
2779 u32 header, cmd, count, sub_cmd;
2780 uint32_t *ib = p->ib.ptr;
2781 u32 idx;
2782 u64 src_offset, dst_offset, dst2_offset;
2783 int r;
2784
2785 do {
2786 if (p->idx >= ib_chunk->length_dw) {
2787 DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
2788 p->idx, ib_chunk->length_dw);
2789 return -EINVAL;
2790 }
2791 idx = p->idx;
2792 header = radeon_get_ib_value(p, idx);
2793 cmd = GET_DMA_CMD(header);
2794 count = GET_DMA_COUNT(header);
2795 sub_cmd = GET_DMA_SUB_CMD(header);
2796
2797 switch (cmd) {
2798 case DMA_PACKET_WRITE:
2799 r = r600_dma_cs_next_reloc(p, &dst_reloc);
2800 if (r) {
2801 DRM_ERROR("bad DMA_PACKET_WRITE\n");
2802 return -EINVAL;
2803 }
2804 switch (sub_cmd) {
2805 /* tiled */
2806 case 8:
2807 dst_offset = radeon_get_ib_value(p, idx+1);
2808 dst_offset <<= 8;
2809
2810 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2811 p->idx += count + 7;
2812 break;
2813 /* linear */
2814 case 0:
2815 dst_offset = radeon_get_ib_value(p, idx+1);
2816 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2817
2818 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2819 ib[idx+2] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2820 p->idx += count + 3;
2821 break;
2822 default:
2823 DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, header);
2824 return -EINVAL;
2825 }
2826 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2827 dev_warn(p->dev, "DMA write buffer too small (%"PRIu64" %lu)\n",
2828 dst_offset, radeon_bo_size(dst_reloc->robj));
2829 return -EINVAL;
2830 }
2831 break;
2832 case DMA_PACKET_COPY:
2833 r = r600_dma_cs_next_reloc(p, &src_reloc);
2834 if (r) {
2835 DRM_ERROR("bad DMA_PACKET_COPY\n");
2836 return -EINVAL;
2837 }
2838 r = r600_dma_cs_next_reloc(p, &dst_reloc);
2839 if (r) {
2840 DRM_ERROR("bad DMA_PACKET_COPY\n");
2841 return -EINVAL;
2842 }
2843 switch (sub_cmd) {
2844 /* Copy L2L, DW aligned */
2845 case 0x00:
2846 /* L2L, dw */
2847 src_offset = radeon_get_ib_value(p, idx+2);
2848 src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2849 dst_offset = radeon_get_ib_value(p, idx+1);
2850 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2851 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2852 dev_warn(p->dev, "DMA L2L, dw src buffer too small (%"PRIu64" %lu)\n",
2853 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2854 return -EINVAL;
2855 }
2856 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2857 dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%"PRIu64" %lu)\n",
2858 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2859 return -EINVAL;
2860 }
2861 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2862 ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2863 ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2864 ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2865 p->idx += 5;
2866 break;
2867 /* Copy L2T/T2L */
2868 case 0x08:
2869 /* detile bit */
2870 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2871 /* tiled src, linear dst */
2872 src_offset = radeon_get_ib_value(p, idx+1);
2873 src_offset <<= 8;
2874 ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
2875
2876 dst_offset = radeon_get_ib_value(p, idx + 7);
2877 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
2878 ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2879 ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2880 } else {
2881 /* linear src, tiled dst */
2882 src_offset = radeon_get_ib_value(p, idx+7);
2883 src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
2884 ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2885 ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2886
2887 dst_offset = radeon_get_ib_value(p, idx+1);
2888 dst_offset <<= 8;
2889 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2890 }
2891 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2892 dev_warn(p->dev, "DMA L2T, src buffer too small (%"PRIu64" %lu)\n",
2893 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2894 return -EINVAL;
2895 }
2896 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2897 dev_warn(p->dev, "DMA L2T, dst buffer too small (%"PRIu64" %lu)\n",
2898 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2899 return -EINVAL;
2900 }
2901 p->idx += 9;
2902 break;
2903 /* Copy L2L, byte aligned */
2904 case 0x40:
2905 /* L2L, byte */
2906 src_offset = radeon_get_ib_value(p, idx+2);
2907 src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2908 dst_offset = radeon_get_ib_value(p, idx+1);
2909 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2910 if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
2911 dev_warn(p->dev, "DMA L2L, byte src buffer too small (%"PRIu64" %lu)\n",
2912 src_offset + count, radeon_bo_size(src_reloc->robj));
2913 return -EINVAL;
2914 }
2915 if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
2916 dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%"PRIu64" %lu)\n",
2917 dst_offset + count, radeon_bo_size(dst_reloc->robj));
2918 return -EINVAL;
2919 }
2920 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xffffffff);
2921 ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xffffffff);
2922 ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2923 ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2924 p->idx += 5;
2925 break;
2926 /* Copy L2L, partial */
2927 case 0x41:
2928 /* L2L, partial */
2929 if (p->family < CHIP_CAYMAN) {
2930 DRM_ERROR("L2L Partial is cayman only !\n");
2931 return -EINVAL;
2932 }
2933 ib[idx+1] += (u32)(src_reloc->gpu_offset & 0xffffffff);
2934 ib[idx+2] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2935 ib[idx+4] += (u32)(dst_reloc->gpu_offset & 0xffffffff);
2936 ib[idx+5] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2937
2938 p->idx += 9;
2939 break;
2940 /* Copy L2L, DW aligned, broadcast */
2941 case 0x44:
2942 /* L2L, dw, broadcast */
2943 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2944 if (r) {
2945 DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
2946 return -EINVAL;
2947 }
2948 dst_offset = radeon_get_ib_value(p, idx+1);
2949 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2950 dst2_offset = radeon_get_ib_value(p, idx+2);
2951 dst2_offset |= ((u64)(radeon_get_ib_value(p, idx+5) & 0xff)) << 32;
2952 src_offset = radeon_get_ib_value(p, idx+3);
2953 src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
2954 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2955 dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%"PRIu64" %lu)\n",
2956 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2957 return -EINVAL;
2958 }
2959 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2960 dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%"PRIu64" %lu)\n",
2961 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2962 return -EINVAL;
2963 }
2964 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2965 dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%"PRIu64" %lu)\n",
2966 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2967 return -EINVAL;
2968 }
2969 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2970 ib[idx+2] += (u32)(dst2_reloc->gpu_offset & 0xfffffffc);
2971 ib[idx+3] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2972 ib[idx+4] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2973 ib[idx+5] += upper_32_bits(dst2_reloc->gpu_offset) & 0xff;
2974 ib[idx+6] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2975 p->idx += 7;
2976 break;
2977 /* Copy L2T Frame to Field */
2978 case 0x48:
2979 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2980 DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2981 return -EINVAL;
2982 }
2983 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2984 if (r) {
2985 DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2986 return -EINVAL;
2987 }
2988 dst_offset = radeon_get_ib_value(p, idx+1);
2989 dst_offset <<= 8;
2990 dst2_offset = radeon_get_ib_value(p, idx+2);
2991 dst2_offset <<= 8;
2992 src_offset = radeon_get_ib_value(p, idx+8);
2993 src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
2994 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2995 dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%"PRIu64" %lu)\n",
2996 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2997 return -EINVAL;
2998 }
2999 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3000 dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%"PRIu64" %lu)\n",
3001 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3002 return -EINVAL;
3003 }
3004 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3005 dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%"PRIu64" %lu)\n",
3006 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3007 return -EINVAL;
3008 }
3009 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3010 ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
3011 ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3012 ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3013 p->idx += 10;
3014 break;
3015 /* Copy L2T/T2L, partial */
3016 case 0x49:
3017 /* L2T, T2L partial */
3018 if (p->family < CHIP_CAYMAN) {
3019 DRM_ERROR("L2T, T2L Partial is cayman only !\n");
3020 return -EINVAL;
3021 }
3022 /* detile bit */
3023 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3024 /* tiled src, linear dst */
3025 ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3026
3027 ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3028 ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
3029 } else {
3030 /* linear src, tiled dst */
3031 ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3032 ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3033
3034 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3035 }
3036 p->idx += 12;
3037 break;
3038 /* Copy L2T broadcast */
3039 case 0x4b:
3040 /* L2T, broadcast */
3041 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3042 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3043 return -EINVAL;
3044 }
3045 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3046 if (r) {
3047 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3048 return -EINVAL;
3049 }
3050 dst_offset = radeon_get_ib_value(p, idx+1);
3051 dst_offset <<= 8;
3052 dst2_offset = radeon_get_ib_value(p, idx+2);
3053 dst2_offset <<= 8;
3054 src_offset = radeon_get_ib_value(p, idx+8);
3055 src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3056 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3057 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%"PRIu64" %lu)\n",
3058 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3059 return -EINVAL;
3060 }
3061 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3062 dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%"PRIu64" %lu)\n",
3063 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3064 return -EINVAL;
3065 }
3066 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3067 dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%"PRIu64" %lu)\n",
3068 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3069 return -EINVAL;
3070 }
3071 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3072 ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
3073 ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3074 ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3075 p->idx += 10;
3076 break;
3077 /* Copy L2T/T2L (tile units) */
3078 case 0x4c:
3079 /* L2T, T2L */
3080 /* detile bit */
3081 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3082 /* tiled src, linear dst */
3083 src_offset = radeon_get_ib_value(p, idx+1);
3084 src_offset <<= 8;
3085 ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3086
3087 dst_offset = radeon_get_ib_value(p, idx+7);
3088 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3089 ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3090 ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
3091 } else {
3092 /* linear src, tiled dst */
3093 src_offset = radeon_get_ib_value(p, idx+7);
3094 src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3095 ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3096 ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3097
3098 dst_offset = radeon_get_ib_value(p, idx+1);
3099 dst_offset <<= 8;
3100 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3101 }
3102 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3103 dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%"PRIu64" %lu)\n",
3104 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3105 return -EINVAL;
3106 }
3107 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3108 dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%"PRIu64" %lu)\n",
3109 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3110 return -EINVAL;
3111 }
3112 p->idx += 9;
3113 break;
3114 /* Copy T2T, partial (tile units) */
3115 case 0x4d:
3116 /* T2T partial */
3117 if (p->family < CHIP_CAYMAN) {
3118 DRM_ERROR("L2T, T2L Partial is cayman only !\n");
3119 return -EINVAL;
3120 }
3121 ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3122 ib[idx+4] += (u32)(dst_reloc->gpu_offset >> 8);
3123 p->idx += 13;
3124 break;
3125 /* Copy L2T broadcast (tile units) */
3126 case 0x4f:
3127 /* L2T, broadcast */
3128 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3129 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3130 return -EINVAL;
3131 }
3132 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3133 if (r) {
3134 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3135 return -EINVAL;
3136 }
3137 dst_offset = radeon_get_ib_value(p, idx+1);
3138 dst_offset <<= 8;
3139 dst2_offset = radeon_get_ib_value(p, idx+2);
3140 dst2_offset <<= 8;
3141 src_offset = radeon_get_ib_value(p, idx+8);
3142 src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3143 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3144 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%"PRIu64" %lu)\n",
3145 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3146 return -EINVAL;
3147 }
3148 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3149 dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%"PRIu64" %lu)\n",
3150 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3151 return -EINVAL;
3152 }
3153 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3154 dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%"PRIu64" %lu)\n",
3155 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3156 return -EINVAL;
3157 }
3158 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3159 ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
3160 ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3161 ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3162 p->idx += 10;
3163 break;
3164 default:
3165 DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, header);
3166 return -EINVAL;
3167 }
3168 break;
3169 case DMA_PACKET_CONSTANT_FILL:
3170 r = r600_dma_cs_next_reloc(p, &dst_reloc);
3171 if (r) {
3172 DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n");
3173 return -EINVAL;
3174 }
3175 dst_offset = radeon_get_ib_value(p, idx+1);
3176 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16;
3177 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3178 dev_warn(p->dev, "DMA constant fill buffer too small (%"PRIu64" %lu)\n",
3179 dst_offset, radeon_bo_size(dst_reloc->robj));
3180 return -EINVAL;
3181 }
3182 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3183 ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) << 16) & 0x00ff0000;
3184 p->idx += 4;
3185 break;
3186 case DMA_PACKET_NOP:
3187 p->idx += 1;
3188 break;
3189 default:
3190 DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3191 return -EINVAL;
3192 }
3193 } while (p->idx < p->chunk_ib->length_dw);
3194 #if 0
3195 for (r = 0; r < p->ib->length_dw; r++) {
3196 printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
3197 mdelay(1);
3198 }
3199 #endif
3200 return 0;
3201 }
3202
3203 /* vm parser */
3204 static bool evergreen_vm_reg_valid(u32 reg)
3205 {
3206 /* context regs are fine */
3207 if (reg >= 0x28000)
3208 return true;
3209
3210 /* check config regs */
3211 switch (reg) {
3212 case WAIT_UNTIL:
3213 case GRBM_GFX_INDEX:
3214 case CP_STRMOUT_CNTL:
3215 case CP_COHER_CNTL:
3216 case CP_COHER_SIZE:
3217 case VGT_VTX_VECT_EJECT_REG:
3218 case VGT_CACHE_INVALIDATION:
3219 case VGT_GS_VERTEX_REUSE:
3220 case VGT_PRIMITIVE_TYPE:
3221 case VGT_INDEX_TYPE:
3222 case VGT_NUM_INDICES:
3223 case VGT_NUM_INSTANCES:
3224 case VGT_COMPUTE_DIM_X:
3225 case VGT_COMPUTE_DIM_Y:
3226 case VGT_COMPUTE_DIM_Z:
3227 case VGT_COMPUTE_START_X:
3228 case VGT_COMPUTE_START_Y:
3229 case VGT_COMPUTE_START_Z:
3230 case VGT_COMPUTE_INDEX:
3231 case VGT_COMPUTE_THREAD_GROUP_SIZE:
3232 case VGT_HS_OFFCHIP_PARAM:
3233 case PA_CL_ENHANCE:
3234 case PA_SU_LINE_STIPPLE_VALUE:
3235 case PA_SC_LINE_STIPPLE_STATE:
3236 case PA_SC_ENHANCE:
3237 case SQ_DYN_GPR_CNTL_PS_FLUSH_REQ:
3238 case SQ_DYN_GPR_SIMD_LOCK_EN:
3239 case SQ_CONFIG:
3240 case SQ_GPR_RESOURCE_MGMT_1:
3241 case SQ_GLOBAL_GPR_RESOURCE_MGMT_1:
3242 case SQ_GLOBAL_GPR_RESOURCE_MGMT_2:
3243 case SQ_CONST_MEM_BASE:
3244 case SQ_STATIC_THREAD_MGMT_1:
3245 case SQ_STATIC_THREAD_MGMT_2:
3246 case SQ_STATIC_THREAD_MGMT_3:
3247 case SPI_CONFIG_CNTL:
3248 case SPI_CONFIG_CNTL_1:
3249 case TA_CNTL_AUX:
3250 case DB_DEBUG:
3251 case DB_DEBUG2:
3252 case DB_DEBUG3:
3253 case DB_DEBUG4:
3254 case DB_WATERMARKS:
3255 case TD_PS_BORDER_COLOR_INDEX:
3256 case TD_PS_BORDER_COLOR_RED:
3257 case TD_PS_BORDER_COLOR_GREEN:
3258 case TD_PS_BORDER_COLOR_BLUE:
3259 case TD_PS_BORDER_COLOR_ALPHA:
3260 case TD_VS_BORDER_COLOR_INDEX:
3261 case TD_VS_BORDER_COLOR_RED:
3262 case TD_VS_BORDER_COLOR_GREEN:
3263 case TD_VS_BORDER_COLOR_BLUE:
3264 case TD_VS_BORDER_COLOR_ALPHA:
3265 case TD_GS_BORDER_COLOR_INDEX:
3266 case TD_GS_BORDER_COLOR_RED:
3267 case TD_GS_BORDER_COLOR_GREEN:
3268 case TD_GS_BORDER_COLOR_BLUE:
3269 case TD_GS_BORDER_COLOR_ALPHA:
3270 case TD_HS_BORDER_COLOR_INDEX:
3271 case TD_HS_BORDER_COLOR_RED:
3272 case TD_HS_BORDER_COLOR_GREEN:
3273 case TD_HS_BORDER_COLOR_BLUE:
3274 case TD_HS_BORDER_COLOR_ALPHA:
3275 case TD_LS_BORDER_COLOR_INDEX:
3276 case TD_LS_BORDER_COLOR_RED:
3277 case TD_LS_BORDER_COLOR_GREEN:
3278 case TD_LS_BORDER_COLOR_BLUE:
3279 case TD_LS_BORDER_COLOR_ALPHA:
3280 case TD_CS_BORDER_COLOR_INDEX:
3281 case TD_CS_BORDER_COLOR_RED:
3282 case TD_CS_BORDER_COLOR_GREEN:
3283 case TD_CS_BORDER_COLOR_BLUE:
3284 case TD_CS_BORDER_COLOR_ALPHA:
3285 case SQ_ESGS_RING_SIZE:
3286 case SQ_GSVS_RING_SIZE:
3287 case SQ_ESTMP_RING_SIZE:
3288 case SQ_GSTMP_RING_SIZE:
3289 case SQ_HSTMP_RING_SIZE:
3290 case SQ_LSTMP_RING_SIZE:
3291 case SQ_PSTMP_RING_SIZE:
3292 case SQ_VSTMP_RING_SIZE:
3293 case SQ_ESGS_RING_ITEMSIZE:
3294 case SQ_ESTMP_RING_ITEMSIZE:
3295 case SQ_GSTMP_RING_ITEMSIZE:
3296 case SQ_GSVS_RING_ITEMSIZE:
3297 case SQ_GS_VERT_ITEMSIZE:
3298 case SQ_GS_VERT_ITEMSIZE_1:
3299 case SQ_GS_VERT_ITEMSIZE_2:
3300 case SQ_GS_VERT_ITEMSIZE_3:
3301 case SQ_GSVS_RING_OFFSET_1:
3302 case SQ_GSVS_RING_OFFSET_2:
3303 case SQ_GSVS_RING_OFFSET_3:
3304 case SQ_HSTMP_RING_ITEMSIZE:
3305 case SQ_LSTMP_RING_ITEMSIZE:
3306 case SQ_PSTMP_RING_ITEMSIZE:
3307 case SQ_VSTMP_RING_ITEMSIZE:
3308 case VGT_TF_RING_SIZE:
3309 case SQ_ESGS_RING_BASE:
3310 case SQ_GSVS_RING_BASE:
3311 case SQ_ESTMP_RING_BASE:
3312 case SQ_GSTMP_RING_BASE:
3313 case SQ_HSTMP_RING_BASE:
3314 case SQ_LSTMP_RING_BASE:
3315 case SQ_PSTMP_RING_BASE:
3316 case SQ_VSTMP_RING_BASE:
3317 case CAYMAN_VGT_OFFCHIP_LDS_BASE:
3318 case CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS:
3319 return true;
3320 default:
3321 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
3322 return false;
3323 }
3324 }
3325
3326 static int evergreen_vm_packet3_check(struct radeon_device *rdev,
3327 u32 *ib, struct radeon_cs_packet *pkt)
3328 {
3329 u32 idx = pkt->idx + 1;
3330 u32 idx_value = ib[idx];
3331 u32 start_reg, end_reg, reg, i;
3332 u32 command, info;
3333
3334 switch (pkt->opcode) {
3335 case PACKET3_NOP:
3336 break;
3337 case PACKET3_SET_BASE:
3338 if (idx_value != 1) {
3339 DRM_ERROR("bad SET_BASE");
3340 return -EINVAL;
3341 }
3342 break;
3343 case PACKET3_CLEAR_STATE:
3344 case PACKET3_INDEX_BUFFER_SIZE:
3345 case PACKET3_DISPATCH_DIRECT:
3346 case PACKET3_DISPATCH_INDIRECT:
3347 case PACKET3_MODE_CONTROL:
3348 case PACKET3_SET_PREDICATION:
3349 case PACKET3_COND_EXEC:
3350 case PACKET3_PRED_EXEC:
3351 case PACKET3_DRAW_INDIRECT:
3352 case PACKET3_DRAW_INDEX_INDIRECT:
3353 case PACKET3_INDEX_BASE:
3354 case PACKET3_DRAW_INDEX_2:
3355 case PACKET3_CONTEXT_CONTROL:
3356 case PACKET3_DRAW_INDEX_OFFSET:
3357 case PACKET3_INDEX_TYPE:
3358 case PACKET3_DRAW_INDEX:
3359 case PACKET3_DRAW_INDEX_AUTO:
3360 case PACKET3_DRAW_INDEX_IMMD:
3361 case PACKET3_NUM_INSTANCES:
3362 case PACKET3_DRAW_INDEX_MULTI_AUTO:
3363 case PACKET3_STRMOUT_BUFFER_UPDATE:
3364 case PACKET3_DRAW_INDEX_OFFSET_2:
3365 case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
3366 case PACKET3_MPEG_INDEX:
3367 case PACKET3_WAIT_REG_MEM:
3368 case PACKET3_MEM_WRITE:
3369 case PACKET3_SURFACE_SYNC:
3370 case PACKET3_EVENT_WRITE:
3371 case PACKET3_EVENT_WRITE_EOP:
3372 case PACKET3_EVENT_WRITE_EOS:
3373 case PACKET3_SET_CONTEXT_REG:
3374 case PACKET3_SET_BOOL_CONST:
3375 case PACKET3_SET_LOOP_CONST:
3376 case PACKET3_SET_RESOURCE:
3377 case PACKET3_SET_SAMPLER:
3378 case PACKET3_SET_CTL_CONST:
3379 case PACKET3_SET_RESOURCE_OFFSET:
3380 case PACKET3_SET_CONTEXT_REG_INDIRECT:
3381 case PACKET3_SET_RESOURCE_INDIRECT:
3382 case CAYMAN_PACKET3_DEALLOC_STATE:
3383 break;
3384 case PACKET3_COND_WRITE:
3385 if (idx_value & 0x100) {
3386 reg = ib[idx + 5] * 4;
3387 if (!evergreen_vm_reg_valid(reg))
3388 return -EINVAL;
3389 }
3390 break;
3391 case PACKET3_COPY_DW:
3392 if (idx_value & 0x2) {
3393 reg = ib[idx + 3] * 4;
3394 if (!evergreen_vm_reg_valid(reg))
3395 return -EINVAL;
3396 }
3397 break;
3398 case PACKET3_SET_CONFIG_REG:
3399 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
3400 end_reg = 4 * pkt->count + start_reg - 4;
3401 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
3402 (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
3403 (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
3404 DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
3405 return -EINVAL;
3406 }
3407 for (i = 0; i < pkt->count; i++) {
3408 reg = start_reg + (4 * i);
3409 if (!evergreen_vm_reg_valid(reg))
3410 return -EINVAL;
3411 }
3412 break;
3413 case PACKET3_CP_DMA:
3414 command = ib[idx + 4];
3415 info = ib[idx + 1];
3416 if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
3417 (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
3418 ((((info & 0x00300000) >> 20) == 0) &&
3419 (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
3420 ((((info & 0x60000000) >> 29) == 0) &&
3421 (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
3422 /* non mem to mem copies requires dw aligned count */
3423 if ((command & 0x1fffff) % 4) {
3424 DRM_ERROR("CP DMA command requires dw count alignment\n");
3425 return -EINVAL;
3426 }
3427 }
3428 if (command & PACKET3_CP_DMA_CMD_SAS) {
3429 /* src address space is register */
3430 if (((info & 0x60000000) >> 29) == 0) {
3431 start_reg = idx_value << 2;
3432 if (command & PACKET3_CP_DMA_CMD_SAIC) {
3433 reg = start_reg;
3434 if (!evergreen_vm_reg_valid(reg)) {
3435 DRM_ERROR("CP DMA Bad SRC register\n");
3436 return -EINVAL;
3437 }
3438 } else {
3439 for (i = 0; i < (command & 0x1fffff); i++) {
3440 reg = start_reg + (4 * i);
3441 if (!evergreen_vm_reg_valid(reg)) {
3442 DRM_ERROR("CP DMA Bad SRC register\n");
3443 return -EINVAL;
3444 }
3445 }
3446 }
3447 }
3448 }
3449 if (command & PACKET3_CP_DMA_CMD_DAS) {
3450 /* dst address space is register */
3451 if (((info & 0x00300000) >> 20) == 0) {
3452 start_reg = ib[idx + 2];
3453 if (command & PACKET3_CP_DMA_CMD_DAIC) {
3454 reg = start_reg;
3455 if (!evergreen_vm_reg_valid(reg)) {
3456 DRM_ERROR("CP DMA Bad DST register\n");
3457 return -EINVAL;
3458 }
3459 } else {
3460 for (i = 0; i < (command & 0x1fffff); i++) {
3461 reg = start_reg + (4 * i);
3462 if (!evergreen_vm_reg_valid(reg)) {
3463 DRM_ERROR("CP DMA Bad DST register\n");
3464 return -EINVAL;
3465 }
3466 }
3467 }
3468 }
3469 }
3470 break;
3471 default:
3472 return -EINVAL;
3473 }
3474 return 0;
3475 }
3476
3477 int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3478 {
3479 int ret = 0;
3480 u32 idx = 0;
3481 struct radeon_cs_packet pkt;
3482
3483 do {
3484 pkt.idx = idx;
3485 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
3486 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
3487 pkt.one_reg_wr = 0;
3488 switch (pkt.type) {
3489 case RADEON_PACKET_TYPE0:
3490 dev_err(rdev->dev, "Packet0 not allowed!\n");
3491 ret = -EINVAL;
3492 break;
3493 case RADEON_PACKET_TYPE2:
3494 idx += 1;
3495 break;
3496 case RADEON_PACKET_TYPE3:
3497 pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
3498 ret = evergreen_vm_packet3_check(rdev, ib->ptr, &pkt);
3499 idx += pkt.count + 2;
3500 break;
3501 default:
3502 dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
3503 ret = -EINVAL;
3504 break;
3505 }
3506 if (ret)
3507 break;
3508 } while (idx < ib->length_dw);
3509
3510 return ret;
3511 }
3512
3513 /**
3514 * evergreen_dma_ib_parse() - parse the DMA IB for VM
3515 * @rdev: radeon_device pointer
3516 * @ib: radeon_ib pointer
3517 *
3518 * Parses the DMA IB from the VM CS ioctl
3519 * checks for errors. (Cayman-SI)
3520 * Returns 0 for success and an error on failure.
3521 **/
3522 int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3523 {
3524 u32 idx = 0;
3525 u32 header, cmd, count, sub_cmd;
3526
3527 do {
3528 header = ib->ptr[idx];
3529 cmd = GET_DMA_CMD(header);
3530 count = GET_DMA_COUNT(header);
3531 sub_cmd = GET_DMA_SUB_CMD(header);
3532
3533 switch (cmd) {
3534 case DMA_PACKET_WRITE:
3535 switch (sub_cmd) {
3536 /* tiled */
3537 case 8:
3538 idx += count + 7;
3539 break;
3540 /* linear */
3541 case 0:
3542 idx += count + 3;
3543 break;
3544 default:
3545 DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, ib->ptr[idx]);
3546 return -EINVAL;
3547 }
3548 break;
3549 case DMA_PACKET_COPY:
3550 switch (sub_cmd) {
3551 /* Copy L2L, DW aligned */
3552 case 0x00:
3553 idx += 5;
3554 break;
3555 /* Copy L2T/T2L */
3556 case 0x08:
3557 idx += 9;
3558 break;
3559 /* Copy L2L, byte aligned */
3560 case 0x40:
3561 idx += 5;
3562 break;
3563 /* Copy L2L, partial */
3564 case 0x41:
3565 idx += 9;
3566 break;
3567 /* Copy L2L, DW aligned, broadcast */
3568 case 0x44:
3569 idx += 7;
3570 break;
3571 /* Copy L2T Frame to Field */
3572 case 0x48:
3573 idx += 10;
3574 break;
3575 /* Copy L2T/T2L, partial */
3576 case 0x49:
3577 idx += 12;
3578 break;
3579 /* Copy L2T broadcast */
3580 case 0x4b:
3581 idx += 10;
3582 break;
3583 /* Copy L2T/T2L (tile units) */
3584 case 0x4c:
3585 idx += 9;
3586 break;
3587 /* Copy T2T, partial (tile units) */
3588 case 0x4d:
3589 idx += 13;
3590 break;
3591 /* Copy L2T broadcast (tile units) */
3592 case 0x4f:
3593 idx += 10;
3594 break;
3595 default:
3596 DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, ib->ptr[idx]);
3597 return -EINVAL;
3598 }
3599 break;
3600 case DMA_PACKET_CONSTANT_FILL:
3601 idx += 4;
3602 break;
3603 case DMA_PACKET_NOP:
3604 idx += 1;
3605 break;
3606 default:
3607 DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3608 return -EINVAL;
3609 }
3610 } while (idx < ib->length_dw);
3611
3612 return 0;
3613 }
3614