1 /* $NetBSD: radeon_evergreen_cs.c,v 1.5 2021/12/18 23:45:43 riastradh Exp $ */ 2 3 /* 4 * Copyright 2010 Advanced Micro Devices, Inc. 5 * Copyright 2008 Red Hat Inc. 6 * Copyright 2009 Jerome Glisse. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice shall be included in 16 * all copies or substantial portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 22 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 23 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 * OTHER DEALINGS IN THE SOFTWARE. 25 * 26 * Authors: Dave Airlie 27 * Alex Deucher 28 * Jerome Glisse 29 */ 30 31 #include <sys/cdefs.h> 32 __KERNEL_RCSID(0, "$NetBSD: radeon_evergreen_cs.c,v 1.5 2021/12/18 23:45:43 riastradh Exp $"); 33 34 #include "radeon.h" 35 #include "radeon_asic.h" 36 #include "evergreend.h" 37 #include "evergreen_reg_safe.h" 38 #include "cayman_reg_safe.h" 39 40 #include <linux/nbsd-namespace.h> 41 42 #ifndef __NetBSD__ 43 #define MAX(a,b) (((a)>(b))?(a):(b)) 44 #define MIN(a,b) (((a)<(b))?(a):(b)) 45 #endif 46 47 #define REG_SAFE_BM_SIZE ARRAY_SIZE(evergreen_reg_safe_bm) 48 49 int r600_dma_cs_next_reloc(struct radeon_cs_parser *p, 50 struct radeon_bo_list **cs_reloc); 51 struct evergreen_cs_track { 52 u32 group_size; 53 u32 nbanks; 54 u32 npipes; 55 u32 row_size; 56 /* value we track */ 57 u32 nsamples; /* unused */ 58 struct radeon_bo *cb_color_bo[12]; 59 u32 cb_color_bo_offset[12]; 60 struct radeon_bo *cb_color_fmask_bo[8]; /* unused */ 61 struct radeon_bo *cb_color_cmask_bo[8]; /* unused */ 62 u32 cb_color_info[12]; 63 u32 cb_color_view[12]; 64 u32 cb_color_pitch[12]; 65 u32 cb_color_slice[12]; 66 u32 cb_color_slice_idx[12]; 67 u32 cb_color_attrib[12]; 68 u32 cb_color_cmask_slice[8];/* unused */ 69 u32 cb_color_fmask_slice[8];/* unused */ 70 u32 cb_target_mask; 71 u32 cb_shader_mask; /* unused */ 72 u32 vgt_strmout_config; 73 u32 vgt_strmout_buffer_config; 74 struct radeon_bo *vgt_strmout_bo[4]; 75 u32 vgt_strmout_bo_offset[4]; 76 u32 vgt_strmout_size[4]; 77 u32 db_depth_control; 78 u32 db_depth_view; 79 u32 db_depth_slice; 80 u32 db_depth_size; 81 u32 db_z_info; 82 u32 db_z_read_offset; 83 u32 db_z_write_offset; 84 struct radeon_bo *db_z_read_bo; 85 struct radeon_bo *db_z_write_bo; 86 u32 db_s_info; 87 u32 db_s_read_offset; 88 u32 db_s_write_offset; 89 struct radeon_bo *db_s_read_bo; 90 struct radeon_bo *db_s_write_bo; 91 bool sx_misc_kill_all_prims; 92 bool cb_dirty; 93 bool db_dirty; 94 bool streamout_dirty; 95 u32 htile_offset; 96 u32 htile_surface; 97 struct radeon_bo *htile_bo; 98 unsigned long indirect_draw_buffer_size; 99 const unsigned *reg_safe_bm; 100 }; 101 102 static u32 evergreen_cs_get_aray_mode(u32 tiling_flags) 103 { 104 if (tiling_flags & RADEON_TILING_MACRO) 105 return ARRAY_2D_TILED_THIN1; 106 else if (tiling_flags & RADEON_TILING_MICRO) 107 return ARRAY_1D_TILED_THIN1; 108 else 109 return ARRAY_LINEAR_GENERAL; 110 } 111 112 static u32 evergreen_cs_get_num_banks(u32 nbanks) 113 { 114 switch (nbanks) { 115 case 2: 116 return ADDR_SURF_2_BANK; 117 case 4: 118 return ADDR_SURF_4_BANK; 119 case 8: 120 default: 121 return ADDR_SURF_8_BANK; 122 case 16: 123 return ADDR_SURF_16_BANK; 124 } 125 } 126 127 static void evergreen_cs_track_init(struct evergreen_cs_track *track) 128 { 129 int i; 130 131 for (i = 0; i < 8; i++) { 132 track->cb_color_fmask_bo[i] = NULL; 133 track->cb_color_cmask_bo[i] = NULL; 134 track->cb_color_cmask_slice[i] = 0; 135 track->cb_color_fmask_slice[i] = 0; 136 } 137 138 for (i = 0; i < 12; i++) { 139 track->cb_color_bo[i] = NULL; 140 track->cb_color_bo_offset[i] = 0xFFFFFFFF; 141 track->cb_color_info[i] = 0; 142 track->cb_color_view[i] = 0xFFFFFFFF; 143 track->cb_color_pitch[i] = 0; 144 track->cb_color_slice[i] = 0xfffffff; 145 track->cb_color_slice_idx[i] = 0; 146 } 147 track->cb_target_mask = 0xFFFFFFFF; 148 track->cb_shader_mask = 0xFFFFFFFF; 149 track->cb_dirty = true; 150 151 track->db_depth_slice = 0xffffffff; 152 track->db_depth_view = 0xFFFFC000; 153 track->db_depth_size = 0xFFFFFFFF; 154 track->db_depth_control = 0xFFFFFFFF; 155 track->db_z_info = 0xFFFFFFFF; 156 track->db_z_read_offset = 0xFFFFFFFF; 157 track->db_z_write_offset = 0xFFFFFFFF; 158 track->db_z_read_bo = NULL; 159 track->db_z_write_bo = NULL; 160 track->db_s_info = 0xFFFFFFFF; 161 track->db_s_read_offset = 0xFFFFFFFF; 162 track->db_s_write_offset = 0xFFFFFFFF; 163 track->db_s_read_bo = NULL; 164 track->db_s_write_bo = NULL; 165 track->db_dirty = true; 166 track->htile_bo = NULL; 167 track->htile_offset = 0xFFFFFFFF; 168 track->htile_surface = 0; 169 170 for (i = 0; i < 4; i++) { 171 track->vgt_strmout_size[i] = 0; 172 track->vgt_strmout_bo[i] = NULL; 173 track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF; 174 } 175 track->streamout_dirty = true; 176 track->sx_misc_kill_all_prims = false; 177 } 178 179 struct eg_surface { 180 /* value gathered from cs */ 181 unsigned nbx; 182 unsigned nby; 183 unsigned format; 184 unsigned mode; 185 unsigned nbanks; 186 unsigned bankw; 187 unsigned bankh; 188 unsigned tsplit; 189 unsigned mtilea; 190 unsigned nsamples; 191 /* output value */ 192 unsigned bpe; 193 unsigned layer_size; 194 unsigned palign; 195 unsigned halign; 196 unsigned long base_align; 197 }; 198 199 static int evergreen_surface_check_linear(struct radeon_cs_parser *p, 200 struct eg_surface *surf, 201 const char *prefix) 202 { 203 surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples; 204 surf->base_align = surf->bpe; 205 surf->palign = 1; 206 surf->halign = 1; 207 return 0; 208 } 209 210 static int evergreen_surface_check_linear_aligned(struct radeon_cs_parser *p, 211 struct eg_surface *surf, 212 const char *prefix) 213 { 214 struct evergreen_cs_track *track = p->track; 215 unsigned palign; 216 217 palign = MAX(64, track->group_size / surf->bpe); 218 surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples; 219 surf->base_align = track->group_size; 220 surf->palign = palign; 221 surf->halign = 1; 222 if (surf->nbx & (palign - 1)) { 223 if (prefix) { 224 dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n", 225 __func__, __LINE__, prefix, surf->nbx, palign); 226 } 227 return -EINVAL; 228 } 229 return 0; 230 } 231 232 static int evergreen_surface_check_1d(struct radeon_cs_parser *p, 233 struct eg_surface *surf, 234 const char *prefix) 235 { 236 struct evergreen_cs_track *track = p->track; 237 unsigned palign; 238 239 palign = track->group_size / (8 * surf->bpe * surf->nsamples); 240 palign = MAX(8, palign); 241 surf->layer_size = surf->nbx * surf->nby * surf->bpe; 242 surf->base_align = track->group_size; 243 surf->palign = palign; 244 surf->halign = 8; 245 if ((surf->nbx & (palign - 1))) { 246 if (prefix) { 247 dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d (%d %d %d)\n", 248 __func__, __LINE__, prefix, surf->nbx, palign, 249 track->group_size, surf->bpe, surf->nsamples); 250 } 251 return -EINVAL; 252 } 253 if ((surf->nby & (8 - 1))) { 254 if (prefix) { 255 dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with 8\n", 256 __func__, __LINE__, prefix, surf->nby); 257 } 258 return -EINVAL; 259 } 260 return 0; 261 } 262 263 static int evergreen_surface_check_2d(struct radeon_cs_parser *p, 264 struct eg_surface *surf, 265 const char *prefix) 266 { 267 struct evergreen_cs_track *track = p->track; 268 unsigned palign, halign, tileb, slice_pt; 269 unsigned mtile_pr, mtile_ps, mtileb; 270 271 tileb = 64 * surf->bpe * surf->nsamples; 272 slice_pt = 1; 273 if (tileb > surf->tsplit) { 274 slice_pt = tileb / surf->tsplit; 275 } 276 tileb = tileb / slice_pt; 277 /* macro tile width & height */ 278 palign = (8 * surf->bankw * track->npipes) * surf->mtilea; 279 halign = (8 * surf->bankh * surf->nbanks) / surf->mtilea; 280 mtileb = (palign / 8) * (halign / 8) * tileb; 281 mtile_pr = surf->nbx / palign; 282 mtile_ps = (mtile_pr * surf->nby) / halign; 283 surf->layer_size = mtile_ps * mtileb * slice_pt; 284 surf->base_align = (palign / 8) * (halign / 8) * tileb; 285 surf->palign = palign; 286 surf->halign = halign; 287 288 if ((surf->nbx & (palign - 1))) { 289 if (prefix) { 290 dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n", 291 __func__, __LINE__, prefix, surf->nbx, palign); 292 } 293 return -EINVAL; 294 } 295 if ((surf->nby & (halign - 1))) { 296 if (prefix) { 297 dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with %d\n", 298 __func__, __LINE__, prefix, surf->nby, halign); 299 } 300 return -EINVAL; 301 } 302 303 return 0; 304 } 305 306 static int evergreen_surface_check(struct radeon_cs_parser *p, 307 struct eg_surface *surf, 308 const char *prefix) 309 { 310 /* some common value computed here */ 311 surf->bpe = r600_fmt_get_blocksize(surf->format); 312 313 switch (surf->mode) { 314 case ARRAY_LINEAR_GENERAL: 315 return evergreen_surface_check_linear(p, surf, prefix); 316 case ARRAY_LINEAR_ALIGNED: 317 return evergreen_surface_check_linear_aligned(p, surf, prefix); 318 case ARRAY_1D_TILED_THIN1: 319 return evergreen_surface_check_1d(p, surf, prefix); 320 case ARRAY_2D_TILED_THIN1: 321 return evergreen_surface_check_2d(p, surf, prefix); 322 default: 323 dev_warn(p->dev, "%s:%d %s invalid array mode %d\n", 324 __func__, __LINE__, prefix, surf->mode); 325 return -EINVAL; 326 } 327 return -EINVAL; 328 } 329 330 static int evergreen_surface_value_conv_check(struct radeon_cs_parser *p, 331 struct eg_surface *surf, 332 const char *prefix) 333 { 334 switch (surf->mode) { 335 case ARRAY_2D_TILED_THIN1: 336 break; 337 case ARRAY_LINEAR_GENERAL: 338 case ARRAY_LINEAR_ALIGNED: 339 case ARRAY_1D_TILED_THIN1: 340 return 0; 341 default: 342 dev_warn(p->dev, "%s:%d %s invalid array mode %d\n", 343 __func__, __LINE__, prefix, surf->mode); 344 return -EINVAL; 345 } 346 347 switch (surf->nbanks) { 348 case 0: surf->nbanks = 2; break; 349 case 1: surf->nbanks = 4; break; 350 case 2: surf->nbanks = 8; break; 351 case 3: surf->nbanks = 16; break; 352 default: 353 dev_warn(p->dev, "%s:%d %s invalid number of banks %d\n", 354 __func__, __LINE__, prefix, surf->nbanks); 355 return -EINVAL; 356 } 357 switch (surf->bankw) { 358 case 0: surf->bankw = 1; break; 359 case 1: surf->bankw = 2; break; 360 case 2: surf->bankw = 4; break; 361 case 3: surf->bankw = 8; break; 362 default: 363 dev_warn(p->dev, "%s:%d %s invalid bankw %d\n", 364 __func__, __LINE__, prefix, surf->bankw); 365 return -EINVAL; 366 } 367 switch (surf->bankh) { 368 case 0: surf->bankh = 1; break; 369 case 1: surf->bankh = 2; break; 370 case 2: surf->bankh = 4; break; 371 case 3: surf->bankh = 8; break; 372 default: 373 dev_warn(p->dev, "%s:%d %s invalid bankh %d\n", 374 __func__, __LINE__, prefix, surf->bankh); 375 return -EINVAL; 376 } 377 switch (surf->mtilea) { 378 case 0: surf->mtilea = 1; break; 379 case 1: surf->mtilea = 2; break; 380 case 2: surf->mtilea = 4; break; 381 case 3: surf->mtilea = 8; break; 382 default: 383 dev_warn(p->dev, "%s:%d %s invalid macro tile aspect %d\n", 384 __func__, __LINE__, prefix, surf->mtilea); 385 return -EINVAL; 386 } 387 switch (surf->tsplit) { 388 case 0: surf->tsplit = 64; break; 389 case 1: surf->tsplit = 128; break; 390 case 2: surf->tsplit = 256; break; 391 case 3: surf->tsplit = 512; break; 392 case 4: surf->tsplit = 1024; break; 393 case 5: surf->tsplit = 2048; break; 394 case 6: surf->tsplit = 4096; break; 395 default: 396 dev_warn(p->dev, "%s:%d %s invalid tile split %d\n", 397 __func__, __LINE__, prefix, surf->tsplit); 398 return -EINVAL; 399 } 400 return 0; 401 } 402 403 static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned id) 404 { 405 struct evergreen_cs_track *track = p->track; 406 struct eg_surface surf; 407 unsigned pitch, slice, mslice; 408 unsigned long offset; 409 int r; 410 411 mslice = G_028C6C_SLICE_MAX(track->cb_color_view[id]) + 1; 412 pitch = track->cb_color_pitch[id]; 413 slice = track->cb_color_slice[id]; 414 surf.nbx = (pitch + 1) * 8; 415 surf.nby = ((slice + 1) * 64) / surf.nbx; 416 surf.mode = G_028C70_ARRAY_MODE(track->cb_color_info[id]); 417 surf.format = G_028C70_FORMAT(track->cb_color_info[id]); 418 surf.tsplit = G_028C74_TILE_SPLIT(track->cb_color_attrib[id]); 419 surf.nbanks = G_028C74_NUM_BANKS(track->cb_color_attrib[id]); 420 surf.bankw = G_028C74_BANK_WIDTH(track->cb_color_attrib[id]); 421 surf.bankh = G_028C74_BANK_HEIGHT(track->cb_color_attrib[id]); 422 surf.mtilea = G_028C74_MACRO_TILE_ASPECT(track->cb_color_attrib[id]); 423 surf.nsamples = 1; 424 425 if (!r600_fmt_is_valid_color(surf.format)) { 426 dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08x)\n", 427 __func__, __LINE__, surf.format, 428 id, track->cb_color_info[id]); 429 return -EINVAL; 430 } 431 432 r = evergreen_surface_value_conv_check(p, &surf, "cb"); 433 if (r) { 434 return r; 435 } 436 437 r = evergreen_surface_check(p, &surf, "cb"); 438 if (r) { 439 dev_warn(p->dev, "%s:%d cb[%d] invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n", 440 __func__, __LINE__, id, track->cb_color_pitch[id], 441 track->cb_color_slice[id], track->cb_color_attrib[id], 442 track->cb_color_info[id]); 443 return r; 444 } 445 446 offset = track->cb_color_bo_offset[id] << 8; 447 if (offset & (surf.base_align - 1)) { 448 dev_warn(p->dev, "%s:%d cb[%d] bo base %ld not aligned with %ld\n", 449 __func__, __LINE__, id, offset, surf.base_align); 450 return -EINVAL; 451 } 452 453 offset += surf.layer_size * mslice; 454 if (offset > radeon_bo_size(track->cb_color_bo[id])) { 455 /* old ddx are broken they allocate bo with w*h*bpp but 456 * program slice with ALIGN(h, 8), catch this and patch 457 * command stream. 458 */ 459 if (!surf.mode) { 460 uint32_t *ib = p->ib.ptr; 461 unsigned long tmp, nby, bsize, size, min = 0; 462 463 /* find the height the ddx wants */ 464 if (surf.nby > 8) { 465 min = surf.nby - 8; 466 } 467 bsize = radeon_bo_size(track->cb_color_bo[id]); 468 tmp = track->cb_color_bo_offset[id] << 8; 469 for (nby = surf.nby; nby > min; nby--) { 470 size = nby * surf.nbx * surf.bpe * surf.nsamples; 471 if ((tmp + size * mslice) <= bsize) { 472 break; 473 } 474 } 475 if (nby > min) { 476 surf.nby = nby; 477 slice = ((nby * surf.nbx) / 64) - 1; 478 if (!evergreen_surface_check(p, &surf, "cb")) { 479 /* check if this one works */ 480 tmp += surf.layer_size * mslice; 481 if (tmp <= bsize) { 482 ib[track->cb_color_slice_idx[id]] = slice; 483 goto old_ddx_ok; 484 } 485 } 486 } 487 } 488 dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, " 489 "offset %d, max layer %d, bo size %ld, slice %d)\n", 490 __func__, __LINE__, id, surf.layer_size, 491 track->cb_color_bo_offset[id] << 8, mslice, 492 radeon_bo_size(track->cb_color_bo[id]), slice); 493 dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n", 494 __func__, __LINE__, surf.nbx, surf.nby, 495 surf.mode, surf.bpe, surf.nsamples, 496 surf.bankw, surf.bankh, 497 surf.tsplit, surf.mtilea); 498 return -EINVAL; 499 } 500 old_ddx_ok: 501 502 return 0; 503 } 504 505 static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p, 506 unsigned nbx, unsigned nby) 507 { 508 struct evergreen_cs_track *track = p->track; 509 unsigned long size; 510 511 if (track->htile_bo == NULL) { 512 dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n", 513 __func__, __LINE__, track->db_z_info); 514 return -EINVAL; 515 } 516 517 if (G_028ABC_LINEAR(track->htile_surface)) { 518 /* pitch must be 16 htiles aligned == 16 * 8 pixel aligned */ 519 nbx = round_up(nbx, 16 * 8); 520 /* height is npipes htiles aligned == npipes * 8 pixel aligned */ 521 nby = round_up(nby, track->npipes * 8); 522 } else { 523 /* always assume 8x8 htile */ 524 /* align is htile align * 8, htile align vary according to 525 * number of pipe and tile width and nby 526 */ 527 switch (track->npipes) { 528 case 8: 529 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ 530 nbx = round_up(nbx, 64 * 8); 531 nby = round_up(nby, 64 * 8); 532 break; 533 case 4: 534 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ 535 nbx = round_up(nbx, 64 * 8); 536 nby = round_up(nby, 32 * 8); 537 break; 538 case 2: 539 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ 540 nbx = round_up(nbx, 32 * 8); 541 nby = round_up(nby, 32 * 8); 542 break; 543 case 1: 544 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ 545 nbx = round_up(nbx, 32 * 8); 546 nby = round_up(nby, 16 * 8); 547 break; 548 default: 549 dev_warn(p->dev, "%s:%d invalid num pipes %d\n", 550 __func__, __LINE__, track->npipes); 551 return -EINVAL; 552 } 553 } 554 /* compute number of htile */ 555 nbx = nbx >> 3; 556 nby = nby >> 3; 557 /* size must be aligned on npipes * 2K boundary */ 558 size = roundup(nbx * nby * 4, track->npipes * (2 << 10)); 559 size += track->htile_offset; 560 561 if (!track->htile_bo) { 562 dev_warn(p->dev, "%s:%d htile_bo not set", __func__, __LINE__); 563 return -EINVAL; 564 } 565 if (size > radeon_bo_size(track->htile_bo)) { 566 dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n", 567 __func__, __LINE__, radeon_bo_size(track->htile_bo), 568 size, nbx, nby); 569 return -EINVAL; 570 } 571 return 0; 572 } 573 574 static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p) 575 { 576 struct evergreen_cs_track *track = p->track; 577 struct eg_surface surf; 578 unsigned pitch, slice, mslice; 579 unsigned long offset; 580 int r; 581 582 mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1; 583 pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size); 584 slice = track->db_depth_slice; 585 surf.nbx = (pitch + 1) * 8; 586 surf.nby = ((slice + 1) * 64) / surf.nbx; 587 surf.mode = G_028040_ARRAY_MODE(track->db_z_info); 588 surf.format = G_028044_FORMAT(track->db_s_info); 589 surf.tsplit = G_028044_TILE_SPLIT(track->db_s_info); 590 surf.nbanks = G_028040_NUM_BANKS(track->db_z_info); 591 surf.bankw = G_028040_BANK_WIDTH(track->db_z_info); 592 surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info); 593 surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info); 594 surf.nsamples = 1; 595 596 if (surf.format != 1) { 597 dev_warn(p->dev, "%s:%d stencil invalid format %d\n", 598 __func__, __LINE__, surf.format); 599 return -EINVAL; 600 } 601 /* replace by color format so we can use same code */ 602 surf.format = V_028C70_COLOR_8; 603 604 r = evergreen_surface_value_conv_check(p, &surf, "stencil"); 605 if (r) { 606 return r; 607 } 608 609 r = evergreen_surface_check(p, &surf, NULL); 610 if (r) { 611 /* old userspace doesn't compute proper depth/stencil alignment 612 * check that alignment against a bigger byte per elements and 613 * only report if that alignment is wrong too. 614 */ 615 surf.format = V_028C70_COLOR_8_8_8_8; 616 r = evergreen_surface_check(p, &surf, "stencil"); 617 if (r) { 618 dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n", 619 __func__, __LINE__, track->db_depth_size, 620 track->db_depth_slice, track->db_s_info, track->db_z_info); 621 } 622 return r; 623 } 624 625 offset = track->db_s_read_offset << 8; 626 if (offset & (surf.base_align - 1)) { 627 dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n", 628 __func__, __LINE__, offset, surf.base_align); 629 return -EINVAL; 630 } 631 offset += surf.layer_size * mslice; 632 if (!track->db_s_read_bo) { 633 dev_warn(p->dev, "%s:%d db_s_read_bo not set", __func__, __LINE__); 634 return -EINVAL; 635 } 636 if (offset > radeon_bo_size(track->db_s_read_bo)) { 637 dev_warn(p->dev, "%s:%d stencil read bo too small (layer size %d, " 638 "offset %ld, max layer %d, bo size %ld)\n", 639 __func__, __LINE__, surf.layer_size, 640 (unsigned long)track->db_s_read_offset << 8, mslice, 641 radeon_bo_size(track->db_s_read_bo)); 642 dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n", 643 __func__, __LINE__, track->db_depth_size, 644 track->db_depth_slice, track->db_s_info, track->db_z_info); 645 return -EINVAL; 646 } 647 648 offset = track->db_s_write_offset << 8; 649 if (offset & (surf.base_align - 1)) { 650 dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n", 651 __func__, __LINE__, offset, surf.base_align); 652 return -EINVAL; 653 } 654 offset += surf.layer_size * mslice; 655 if (!track->db_s_write_bo) { 656 dev_warn(p->dev, "%s:%d db_s_write_bo not set", __func__, __LINE__); 657 return -EINVAL; 658 } 659 if (offset > radeon_bo_size(track->db_s_write_bo)) { 660 dev_warn(p->dev, "%s:%d stencil write bo too small (layer size %d, " 661 "offset %ld, max layer %d, bo size %ld)\n", 662 __func__, __LINE__, surf.layer_size, 663 (unsigned long)track->db_s_write_offset << 8, mslice, 664 radeon_bo_size(track->db_s_write_bo)); 665 return -EINVAL; 666 } 667 668 /* hyperz */ 669 if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) { 670 r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby); 671 if (r) { 672 return r; 673 } 674 } 675 676 return 0; 677 } 678 679 static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p) 680 { 681 struct evergreen_cs_track *track = p->track; 682 struct eg_surface surf; 683 unsigned pitch, slice, mslice; 684 unsigned long offset; 685 int r; 686 687 mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1; 688 pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size); 689 slice = track->db_depth_slice; 690 surf.nbx = (pitch + 1) * 8; 691 surf.nby = ((slice + 1) * 64) / surf.nbx; 692 surf.mode = G_028040_ARRAY_MODE(track->db_z_info); 693 surf.format = G_028040_FORMAT(track->db_z_info); 694 surf.tsplit = G_028040_TILE_SPLIT(track->db_z_info); 695 surf.nbanks = G_028040_NUM_BANKS(track->db_z_info); 696 surf.bankw = G_028040_BANK_WIDTH(track->db_z_info); 697 surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info); 698 surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info); 699 surf.nsamples = 1; 700 701 switch (surf.format) { 702 case V_028040_Z_16: 703 surf.format = V_028C70_COLOR_16; 704 break; 705 case V_028040_Z_24: 706 case V_028040_Z_32_FLOAT: 707 surf.format = V_028C70_COLOR_8_8_8_8; 708 break; 709 default: 710 dev_warn(p->dev, "%s:%d depth invalid format %d\n", 711 __func__, __LINE__, surf.format); 712 return -EINVAL; 713 } 714 715 r = evergreen_surface_value_conv_check(p, &surf, "depth"); 716 if (r) { 717 dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n", 718 __func__, __LINE__, track->db_depth_size, 719 track->db_depth_slice, track->db_z_info); 720 return r; 721 } 722 723 r = evergreen_surface_check(p, &surf, "depth"); 724 if (r) { 725 dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n", 726 __func__, __LINE__, track->db_depth_size, 727 track->db_depth_slice, track->db_z_info); 728 return r; 729 } 730 731 offset = track->db_z_read_offset << 8; 732 if (offset & (surf.base_align - 1)) { 733 dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n", 734 __func__, __LINE__, offset, surf.base_align); 735 return -EINVAL; 736 } 737 offset += surf.layer_size * mslice; 738 if (!track->db_z_read_bo) { 739 dev_warn(p->dev, "%s:%d db_z_read_bo not set", __func__, __LINE__); 740 return -EINVAL; 741 } 742 if (offset > radeon_bo_size(track->db_z_read_bo)) { 743 dev_warn(p->dev, "%s:%d depth read bo too small (layer size %d, " 744 "offset %ld, max layer %d, bo size %ld)\n", 745 __func__, __LINE__, surf.layer_size, 746 (unsigned long)track->db_z_read_offset << 8, mslice, 747 radeon_bo_size(track->db_z_read_bo)); 748 return -EINVAL; 749 } 750 751 offset = track->db_z_write_offset << 8; 752 if (offset & (surf.base_align - 1)) { 753 dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n", 754 __func__, __LINE__, offset, surf.base_align); 755 return -EINVAL; 756 } 757 offset += surf.layer_size * mslice; 758 if (!track->db_z_write_bo) { 759 dev_warn(p->dev, "%s:%d db_z_write_bo not set", __func__, __LINE__); 760 return -EINVAL; 761 } 762 if (offset > radeon_bo_size(track->db_z_write_bo)) { 763 dev_warn(p->dev, "%s:%d depth write bo too small (layer size %d, " 764 "offset %ld, max layer %d, bo size %ld)\n", 765 __func__, __LINE__, surf.layer_size, 766 (unsigned long)track->db_z_write_offset << 8, mslice, 767 radeon_bo_size(track->db_z_write_bo)); 768 return -EINVAL; 769 } 770 771 /* hyperz */ 772 if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) { 773 r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby); 774 if (r) { 775 return r; 776 } 777 } 778 779 return 0; 780 } 781 782 static int evergreen_cs_track_validate_texture(struct radeon_cs_parser *p, 783 struct radeon_bo *texture, 784 struct radeon_bo *mipmap, 785 unsigned idx) 786 { 787 struct eg_surface surf; 788 unsigned long toffset, moffset; 789 unsigned dim, llevel, mslice, width, height, depth, i; 790 u32 texdw[8]; 791 int r; 792 793 texdw[0] = radeon_get_ib_value(p, idx + 0); 794 texdw[1] = radeon_get_ib_value(p, idx + 1); 795 texdw[2] = radeon_get_ib_value(p, idx + 2); 796 texdw[3] = radeon_get_ib_value(p, idx + 3); 797 texdw[4] = radeon_get_ib_value(p, idx + 4); 798 texdw[5] = radeon_get_ib_value(p, idx + 5); 799 texdw[6] = radeon_get_ib_value(p, idx + 6); 800 texdw[7] = radeon_get_ib_value(p, idx + 7); 801 dim = G_030000_DIM(texdw[0]); 802 llevel = G_030014_LAST_LEVEL(texdw[5]); 803 mslice = G_030014_LAST_ARRAY(texdw[5]) + 1; 804 width = G_030000_TEX_WIDTH(texdw[0]) + 1; 805 height = G_030004_TEX_HEIGHT(texdw[1]) + 1; 806 depth = G_030004_TEX_DEPTH(texdw[1]) + 1; 807 surf.format = G_03001C_DATA_FORMAT(texdw[7]); 808 surf.nbx = (G_030000_PITCH(texdw[0]) + 1) * 8; 809 surf.nbx = r600_fmt_get_nblocksx(surf.format, surf.nbx); 810 surf.nby = r600_fmt_get_nblocksy(surf.format, height); 811 surf.mode = G_030004_ARRAY_MODE(texdw[1]); 812 surf.tsplit = G_030018_TILE_SPLIT(texdw[6]); 813 surf.nbanks = G_03001C_NUM_BANKS(texdw[7]); 814 surf.bankw = G_03001C_BANK_WIDTH(texdw[7]); 815 surf.bankh = G_03001C_BANK_HEIGHT(texdw[7]); 816 surf.mtilea = G_03001C_MACRO_TILE_ASPECT(texdw[7]); 817 surf.nsamples = 1; 818 toffset = texdw[2] << 8; 819 moffset = texdw[3] << 8; 820 821 if (!r600_fmt_is_valid_texture(surf.format, p->family)) { 822 dev_warn(p->dev, "%s:%d texture invalid format %d\n", 823 __func__, __LINE__, surf.format); 824 return -EINVAL; 825 } 826 switch (dim) { 827 case V_030000_SQ_TEX_DIM_1D: 828 case V_030000_SQ_TEX_DIM_2D: 829 case V_030000_SQ_TEX_DIM_CUBEMAP: 830 case V_030000_SQ_TEX_DIM_1D_ARRAY: 831 case V_030000_SQ_TEX_DIM_2D_ARRAY: 832 depth = 1; 833 break; 834 case V_030000_SQ_TEX_DIM_2D_MSAA: 835 case V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA: 836 surf.nsamples = 1 << llevel; 837 llevel = 0; 838 depth = 1; 839 break; 840 case V_030000_SQ_TEX_DIM_3D: 841 break; 842 default: 843 dev_warn(p->dev, "%s:%d texture invalid dimension %d\n", 844 __func__, __LINE__, dim); 845 return -EINVAL; 846 } 847 848 r = evergreen_surface_value_conv_check(p, &surf, "texture"); 849 if (r) { 850 return r; 851 } 852 853 /* align height */ 854 evergreen_surface_check(p, &surf, NULL); 855 surf.nby = ALIGN(surf.nby, surf.halign); 856 857 r = evergreen_surface_check(p, &surf, "texture"); 858 if (r) { 859 dev_warn(p->dev, "%s:%d texture invalid 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n", 860 __func__, __LINE__, texdw[0], texdw[1], texdw[4], 861 texdw[5], texdw[6], texdw[7]); 862 return r; 863 } 864 865 /* check texture size */ 866 if (toffset & (surf.base_align - 1)) { 867 dev_warn(p->dev, "%s:%d texture bo base %ld not aligned with %ld\n", 868 __func__, __LINE__, toffset, surf.base_align); 869 return -EINVAL; 870 } 871 if (surf.nsamples <= 1 && moffset & (surf.base_align - 1)) { 872 dev_warn(p->dev, "%s:%d mipmap bo base %ld not aligned with %ld\n", 873 __func__, __LINE__, moffset, surf.base_align); 874 return -EINVAL; 875 } 876 if (dim == SQ_TEX_DIM_3D) { 877 toffset += surf.layer_size * depth; 878 } else { 879 toffset += surf.layer_size * mslice; 880 } 881 if (toffset > radeon_bo_size(texture)) { 882 dev_warn(p->dev, "%s:%d texture bo too small (layer size %d, " 883 "offset %ld, max layer %d, depth %d, bo size %ld) (%d %d)\n", 884 __func__, __LINE__, surf.layer_size, 885 (unsigned long)texdw[2] << 8, mslice, 886 depth, radeon_bo_size(texture), 887 surf.nbx, surf.nby); 888 return -EINVAL; 889 } 890 891 if (!mipmap) { 892 if (llevel) { 893 dev_warn(p->dev, "%s:%i got NULL MIP_ADDRESS relocation\n", 894 __func__, __LINE__); 895 return -EINVAL; 896 } else { 897 return 0; /* everything's ok */ 898 } 899 } 900 901 /* check mipmap size */ 902 for (i = 1; i <= llevel; i++) { 903 unsigned w, h, d; 904 905 w = r600_mip_minify(width, i); 906 h = r600_mip_minify(height, i); 907 d = r600_mip_minify(depth, i); 908 surf.nbx = r600_fmt_get_nblocksx(surf.format, w); 909 surf.nby = r600_fmt_get_nblocksy(surf.format, h); 910 911 switch (surf.mode) { 912 case ARRAY_2D_TILED_THIN1: 913 if (surf.nbx < surf.palign || surf.nby < surf.halign) { 914 surf.mode = ARRAY_1D_TILED_THIN1; 915 } 916 /* recompute alignment */ 917 evergreen_surface_check(p, &surf, NULL); 918 break; 919 case ARRAY_LINEAR_GENERAL: 920 case ARRAY_LINEAR_ALIGNED: 921 case ARRAY_1D_TILED_THIN1: 922 break; 923 default: 924 dev_warn(p->dev, "%s:%d invalid array mode %d\n", 925 __func__, __LINE__, surf.mode); 926 return -EINVAL; 927 } 928 surf.nbx = ALIGN(surf.nbx, surf.palign); 929 surf.nby = ALIGN(surf.nby, surf.halign); 930 931 r = evergreen_surface_check(p, &surf, "mipmap"); 932 if (r) { 933 return r; 934 } 935 936 if (dim == SQ_TEX_DIM_3D) { 937 moffset += surf.layer_size * d; 938 } else { 939 moffset += surf.layer_size * mslice; 940 } 941 if (moffset > radeon_bo_size(mipmap)) { 942 dev_warn(p->dev, "%s:%d mipmap [%d] bo too small (layer size %d, " 943 "offset %ld, coffset %ld, max layer %d, depth %d, " 944 "bo size %ld) level0 (%d %d %d)\n", 945 __func__, __LINE__, i, surf.layer_size, 946 (unsigned long)texdw[3] << 8, moffset, mslice, 947 d, radeon_bo_size(mipmap), 948 width, height, depth); 949 dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n", 950 __func__, __LINE__, surf.nbx, surf.nby, 951 surf.mode, surf.bpe, surf.nsamples, 952 surf.bankw, surf.bankh, 953 surf.tsplit, surf.mtilea); 954 return -EINVAL; 955 } 956 } 957 958 return 0; 959 } 960 961 static int evergreen_cs_track_check(struct radeon_cs_parser *p) 962 { 963 struct evergreen_cs_track *track = p->track; 964 unsigned tmp, i; 965 int r; 966 unsigned buffer_mask = 0; 967 968 /* check streamout */ 969 if (track->streamout_dirty && track->vgt_strmout_config) { 970 for (i = 0; i < 4; i++) { 971 if (track->vgt_strmout_config & (1 << i)) { 972 buffer_mask |= (track->vgt_strmout_buffer_config >> (i * 4)) & 0xf; 973 } 974 } 975 976 for (i = 0; i < 4; i++) { 977 if (buffer_mask & (1 << i)) { 978 if (track->vgt_strmout_bo[i]) { 979 u64 offset = (u64)track->vgt_strmout_bo_offset[i] + 980 (u64)track->vgt_strmout_size[i]; 981 if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) { 982 DRM_ERROR("streamout %d bo too small: 0x%"PRIx64", 0x%lx\n", 983 i, offset, 984 radeon_bo_size(track->vgt_strmout_bo[i])); 985 return -EINVAL; 986 } 987 } else { 988 dev_warn(p->dev, "No buffer for streamout %d\n", i); 989 return -EINVAL; 990 } 991 } 992 } 993 track->streamout_dirty = false; 994 } 995 996 if (track->sx_misc_kill_all_prims) 997 return 0; 998 999 /* check that we have a cb for each enabled target 1000 */ 1001 if (track->cb_dirty) { 1002 tmp = track->cb_target_mask; 1003 for (i = 0; i < 8; i++) { 1004 u32 format = G_028C70_FORMAT(track->cb_color_info[i]); 1005 1006 if (format != V_028C70_COLOR_INVALID && 1007 (tmp >> (i * 4)) & 0xF) { 1008 /* at least one component is enabled */ 1009 if (track->cb_color_bo[i] == NULL) { 1010 dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n", 1011 __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i); 1012 return -EINVAL; 1013 } 1014 /* check cb */ 1015 r = evergreen_cs_track_validate_cb(p, i); 1016 if (r) { 1017 return r; 1018 } 1019 } 1020 } 1021 track->cb_dirty = false; 1022 } 1023 1024 if (track->db_dirty) { 1025 /* Check stencil buffer */ 1026 if (G_028044_FORMAT(track->db_s_info) != V_028044_STENCIL_INVALID && 1027 G_028800_STENCIL_ENABLE(track->db_depth_control)) { 1028 r = evergreen_cs_track_validate_stencil(p); 1029 if (r) 1030 return r; 1031 } 1032 /* Check depth buffer */ 1033 if (G_028040_FORMAT(track->db_z_info) != V_028040_Z_INVALID && 1034 G_028800_Z_ENABLE(track->db_depth_control)) { 1035 r = evergreen_cs_track_validate_depth(p); 1036 if (r) 1037 return r; 1038 } 1039 track->db_dirty = false; 1040 } 1041 1042 return 0; 1043 } 1044 1045 /** 1046 * evergreen_cs_packet_parse_vline() - parse userspace VLINE packet 1047 * @parser: parser structure holding parsing context. 1048 * 1049 * This is an Evergreen(+)-specific function for parsing VLINE packets. 1050 * Real work is done by r600_cs_common_vline_parse function. 1051 * Here we just set up ASIC-specific register table and call 1052 * the common implementation function. 1053 */ 1054 static int evergreen_cs_packet_parse_vline(struct radeon_cs_parser *p) 1055 { 1056 1057 static uint32_t vline_start_end[6] = { 1058 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC0_REGISTER_OFFSET, 1059 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC1_REGISTER_OFFSET, 1060 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC2_REGISTER_OFFSET, 1061 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC3_REGISTER_OFFSET, 1062 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC4_REGISTER_OFFSET, 1063 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC5_REGISTER_OFFSET 1064 }; 1065 static uint32_t vline_status[6] = { 1066 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, 1067 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, 1068 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, 1069 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, 1070 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, 1071 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET 1072 }; 1073 1074 return r600_cs_common_vline_parse(p, vline_start_end, vline_status); 1075 } 1076 1077 static int evergreen_packet0_check(struct radeon_cs_parser *p, 1078 struct radeon_cs_packet *pkt, 1079 unsigned idx, unsigned reg) 1080 { 1081 int r; 1082 1083 switch (reg) { 1084 case EVERGREEN_VLINE_START_END: 1085 r = evergreen_cs_packet_parse_vline(p); 1086 if (r) { 1087 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1088 idx, reg); 1089 return r; 1090 } 1091 break; 1092 default: 1093 pr_err("Forbidden register 0x%04X in cs at %d\n", reg, idx); 1094 return -EINVAL; 1095 } 1096 return 0; 1097 } 1098 1099 static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p, 1100 struct radeon_cs_packet *pkt) 1101 { 1102 unsigned reg, i; 1103 unsigned idx; 1104 int r; 1105 1106 idx = pkt->idx + 1; 1107 reg = pkt->reg; 1108 for (i = 0; i <= pkt->count; i++, idx++, reg += 4) { 1109 r = evergreen_packet0_check(p, pkt, idx, reg); 1110 if (r) { 1111 return r; 1112 } 1113 } 1114 return 0; 1115 } 1116 1117 /** 1118 * evergreen_cs_handle_reg() - process registers that need special handling. 1119 * @parser: parser structure holding parsing context 1120 * @reg: register we are testing 1121 * @idx: index into the cs buffer 1122 */ 1123 static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) 1124 { 1125 struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track; 1126 struct radeon_bo_list *reloc; 1127 u32 tmp, *ib; 1128 int r; 1129 1130 ib = p->ib.ptr; 1131 switch (reg) { 1132 /* force following reg to 0 in an attempt to disable out buffer 1133 * which will need us to better understand how it works to perform 1134 * security check on it (Jerome) 1135 */ 1136 case SQ_ESGS_RING_SIZE: 1137 case SQ_GSVS_RING_SIZE: 1138 case SQ_ESTMP_RING_SIZE: 1139 case SQ_GSTMP_RING_SIZE: 1140 case SQ_HSTMP_RING_SIZE: 1141 case SQ_LSTMP_RING_SIZE: 1142 case SQ_PSTMP_RING_SIZE: 1143 case SQ_VSTMP_RING_SIZE: 1144 case SQ_ESGS_RING_ITEMSIZE: 1145 case SQ_ESTMP_RING_ITEMSIZE: 1146 case SQ_GSTMP_RING_ITEMSIZE: 1147 case SQ_GSVS_RING_ITEMSIZE: 1148 case SQ_GS_VERT_ITEMSIZE: 1149 case SQ_GS_VERT_ITEMSIZE_1: 1150 case SQ_GS_VERT_ITEMSIZE_2: 1151 case SQ_GS_VERT_ITEMSIZE_3: 1152 case SQ_GSVS_RING_OFFSET_1: 1153 case SQ_GSVS_RING_OFFSET_2: 1154 case SQ_GSVS_RING_OFFSET_3: 1155 case SQ_HSTMP_RING_ITEMSIZE: 1156 case SQ_LSTMP_RING_ITEMSIZE: 1157 case SQ_PSTMP_RING_ITEMSIZE: 1158 case SQ_VSTMP_RING_ITEMSIZE: 1159 case VGT_TF_RING_SIZE: 1160 /* get value to populate the IB don't remove */ 1161 /*tmp =radeon_get_ib_value(p, idx); 1162 ib[idx] = 0;*/ 1163 break; 1164 case SQ_ESGS_RING_BASE: 1165 case SQ_GSVS_RING_BASE: 1166 case SQ_ESTMP_RING_BASE: 1167 case SQ_GSTMP_RING_BASE: 1168 case SQ_HSTMP_RING_BASE: 1169 case SQ_LSTMP_RING_BASE: 1170 case SQ_PSTMP_RING_BASE: 1171 case SQ_VSTMP_RING_BASE: 1172 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1173 if (r) { 1174 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1175 "0x%04X\n", reg); 1176 return -EINVAL; 1177 } 1178 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1179 break; 1180 case DB_DEPTH_CONTROL: 1181 track->db_depth_control = radeon_get_ib_value(p, idx); 1182 track->db_dirty = true; 1183 break; 1184 case CAYMAN_DB_EQAA: 1185 if (p->rdev->family < CHIP_CAYMAN) { 1186 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1187 "0x%04X\n", reg); 1188 return -EINVAL; 1189 } 1190 break; 1191 case CAYMAN_DB_DEPTH_INFO: 1192 if (p->rdev->family < CHIP_CAYMAN) { 1193 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1194 "0x%04X\n", reg); 1195 return -EINVAL; 1196 } 1197 break; 1198 case DB_Z_INFO: 1199 track->db_z_info = radeon_get_ib_value(p, idx); 1200 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 1201 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1202 if (r) { 1203 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1204 "0x%04X\n", reg); 1205 return -EINVAL; 1206 } 1207 ib[idx] &= ~Z_ARRAY_MODE(0xf); 1208 track->db_z_info &= ~Z_ARRAY_MODE(0xf); 1209 ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); 1210 track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); 1211 if (reloc->tiling_flags & RADEON_TILING_MACRO) { 1212 unsigned bankw, bankh, mtaspect, tile_split; 1213 1214 evergreen_tiling_fields(reloc->tiling_flags, 1215 &bankw, &bankh, &mtaspect, 1216 &tile_split); 1217 ib[idx] |= DB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks)); 1218 ib[idx] |= DB_TILE_SPLIT(tile_split) | 1219 DB_BANK_WIDTH(bankw) | 1220 DB_BANK_HEIGHT(bankh) | 1221 DB_MACRO_TILE_ASPECT(mtaspect); 1222 } 1223 } 1224 track->db_dirty = true; 1225 break; 1226 case DB_STENCIL_INFO: 1227 track->db_s_info = radeon_get_ib_value(p, idx); 1228 track->db_dirty = true; 1229 break; 1230 case DB_DEPTH_VIEW: 1231 track->db_depth_view = radeon_get_ib_value(p, idx); 1232 track->db_dirty = true; 1233 break; 1234 case DB_DEPTH_SIZE: 1235 track->db_depth_size = radeon_get_ib_value(p, idx); 1236 track->db_dirty = true; 1237 break; 1238 case R_02805C_DB_DEPTH_SLICE: 1239 track->db_depth_slice = radeon_get_ib_value(p, idx); 1240 track->db_dirty = true; 1241 break; 1242 case DB_Z_READ_BASE: 1243 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1244 if (r) { 1245 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1246 "0x%04X\n", reg); 1247 return -EINVAL; 1248 } 1249 track->db_z_read_offset = radeon_get_ib_value(p, idx); 1250 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1251 track->db_z_read_bo = reloc->robj; 1252 track->db_dirty = true; 1253 break; 1254 case DB_Z_WRITE_BASE: 1255 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1256 if (r) { 1257 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1258 "0x%04X\n", reg); 1259 return -EINVAL; 1260 } 1261 track->db_z_write_offset = radeon_get_ib_value(p, idx); 1262 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1263 track->db_z_write_bo = reloc->robj; 1264 track->db_dirty = true; 1265 break; 1266 case DB_STENCIL_READ_BASE: 1267 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1268 if (r) { 1269 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1270 "0x%04X\n", reg); 1271 return -EINVAL; 1272 } 1273 track->db_s_read_offset = radeon_get_ib_value(p, idx); 1274 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1275 track->db_s_read_bo = reloc->robj; 1276 track->db_dirty = true; 1277 break; 1278 case DB_STENCIL_WRITE_BASE: 1279 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1280 if (r) { 1281 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1282 "0x%04X\n", reg); 1283 return -EINVAL; 1284 } 1285 track->db_s_write_offset = radeon_get_ib_value(p, idx); 1286 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1287 track->db_s_write_bo = reloc->robj; 1288 track->db_dirty = true; 1289 break; 1290 case VGT_STRMOUT_CONFIG: 1291 track->vgt_strmout_config = radeon_get_ib_value(p, idx); 1292 track->streamout_dirty = true; 1293 break; 1294 case VGT_STRMOUT_BUFFER_CONFIG: 1295 track->vgt_strmout_buffer_config = radeon_get_ib_value(p, idx); 1296 track->streamout_dirty = true; 1297 break; 1298 case VGT_STRMOUT_BUFFER_BASE_0: 1299 case VGT_STRMOUT_BUFFER_BASE_1: 1300 case VGT_STRMOUT_BUFFER_BASE_2: 1301 case VGT_STRMOUT_BUFFER_BASE_3: 1302 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1303 if (r) { 1304 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1305 "0x%04X\n", reg); 1306 return -EINVAL; 1307 } 1308 tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16; 1309 track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8; 1310 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1311 track->vgt_strmout_bo[tmp] = reloc->robj; 1312 track->streamout_dirty = true; 1313 break; 1314 case VGT_STRMOUT_BUFFER_SIZE_0: 1315 case VGT_STRMOUT_BUFFER_SIZE_1: 1316 case VGT_STRMOUT_BUFFER_SIZE_2: 1317 case VGT_STRMOUT_BUFFER_SIZE_3: 1318 tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16; 1319 /* size in register is DWs, convert to bytes */ 1320 track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4; 1321 track->streamout_dirty = true; 1322 break; 1323 case CP_COHER_BASE: 1324 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1325 if (r) { 1326 dev_warn(p->dev, "missing reloc for CP_COHER_BASE " 1327 "0x%04X\n", reg); 1328 return -EINVAL; 1329 } 1330 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1331 break; 1332 case CB_TARGET_MASK: 1333 track->cb_target_mask = radeon_get_ib_value(p, idx); 1334 track->cb_dirty = true; 1335 break; 1336 case CB_SHADER_MASK: 1337 track->cb_shader_mask = radeon_get_ib_value(p, idx); 1338 track->cb_dirty = true; 1339 break; 1340 case PA_SC_AA_CONFIG: 1341 if (p->rdev->family >= CHIP_CAYMAN) { 1342 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1343 "0x%04X\n", reg); 1344 return -EINVAL; 1345 } 1346 tmp = radeon_get_ib_value(p, idx) & MSAA_NUM_SAMPLES_MASK; 1347 track->nsamples = 1 << tmp; 1348 break; 1349 case CAYMAN_PA_SC_AA_CONFIG: 1350 if (p->rdev->family < CHIP_CAYMAN) { 1351 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1352 "0x%04X\n", reg); 1353 return -EINVAL; 1354 } 1355 tmp = radeon_get_ib_value(p, idx) & CAYMAN_MSAA_NUM_SAMPLES_MASK; 1356 track->nsamples = 1 << tmp; 1357 break; 1358 case CB_COLOR0_VIEW: 1359 case CB_COLOR1_VIEW: 1360 case CB_COLOR2_VIEW: 1361 case CB_COLOR3_VIEW: 1362 case CB_COLOR4_VIEW: 1363 case CB_COLOR5_VIEW: 1364 case CB_COLOR6_VIEW: 1365 case CB_COLOR7_VIEW: 1366 tmp = (reg - CB_COLOR0_VIEW) / 0x3c; 1367 track->cb_color_view[tmp] = radeon_get_ib_value(p, idx); 1368 track->cb_dirty = true; 1369 break; 1370 case CB_COLOR8_VIEW: 1371 case CB_COLOR9_VIEW: 1372 case CB_COLOR10_VIEW: 1373 case CB_COLOR11_VIEW: 1374 tmp = ((reg - CB_COLOR8_VIEW) / 0x1c) + 8; 1375 track->cb_color_view[tmp] = radeon_get_ib_value(p, idx); 1376 track->cb_dirty = true; 1377 break; 1378 case CB_COLOR0_INFO: 1379 case CB_COLOR1_INFO: 1380 case CB_COLOR2_INFO: 1381 case CB_COLOR3_INFO: 1382 case CB_COLOR4_INFO: 1383 case CB_COLOR5_INFO: 1384 case CB_COLOR6_INFO: 1385 case CB_COLOR7_INFO: 1386 tmp = (reg - CB_COLOR0_INFO) / 0x3c; 1387 track->cb_color_info[tmp] = radeon_get_ib_value(p, idx); 1388 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 1389 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1390 if (r) { 1391 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1392 "0x%04X\n", reg); 1393 return -EINVAL; 1394 } 1395 ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); 1396 track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); 1397 } 1398 track->cb_dirty = true; 1399 break; 1400 case CB_COLOR8_INFO: 1401 case CB_COLOR9_INFO: 1402 case CB_COLOR10_INFO: 1403 case CB_COLOR11_INFO: 1404 tmp = ((reg - CB_COLOR8_INFO) / 0x1c) + 8; 1405 track->cb_color_info[tmp] = radeon_get_ib_value(p, idx); 1406 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 1407 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1408 if (r) { 1409 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1410 "0x%04X\n", reg); 1411 return -EINVAL; 1412 } 1413 ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); 1414 track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); 1415 } 1416 track->cb_dirty = true; 1417 break; 1418 case CB_COLOR0_PITCH: 1419 case CB_COLOR1_PITCH: 1420 case CB_COLOR2_PITCH: 1421 case CB_COLOR3_PITCH: 1422 case CB_COLOR4_PITCH: 1423 case CB_COLOR5_PITCH: 1424 case CB_COLOR6_PITCH: 1425 case CB_COLOR7_PITCH: 1426 tmp = (reg - CB_COLOR0_PITCH) / 0x3c; 1427 track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx); 1428 track->cb_dirty = true; 1429 break; 1430 case CB_COLOR8_PITCH: 1431 case CB_COLOR9_PITCH: 1432 case CB_COLOR10_PITCH: 1433 case CB_COLOR11_PITCH: 1434 tmp = ((reg - CB_COLOR8_PITCH) / 0x1c) + 8; 1435 track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx); 1436 track->cb_dirty = true; 1437 break; 1438 case CB_COLOR0_SLICE: 1439 case CB_COLOR1_SLICE: 1440 case CB_COLOR2_SLICE: 1441 case CB_COLOR3_SLICE: 1442 case CB_COLOR4_SLICE: 1443 case CB_COLOR5_SLICE: 1444 case CB_COLOR6_SLICE: 1445 case CB_COLOR7_SLICE: 1446 tmp = (reg - CB_COLOR0_SLICE) / 0x3c; 1447 track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx); 1448 track->cb_color_slice_idx[tmp] = idx; 1449 track->cb_dirty = true; 1450 break; 1451 case CB_COLOR8_SLICE: 1452 case CB_COLOR9_SLICE: 1453 case CB_COLOR10_SLICE: 1454 case CB_COLOR11_SLICE: 1455 tmp = ((reg - CB_COLOR8_SLICE) / 0x1c) + 8; 1456 track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx); 1457 track->cb_color_slice_idx[tmp] = idx; 1458 track->cb_dirty = true; 1459 break; 1460 case CB_COLOR0_ATTRIB: 1461 case CB_COLOR1_ATTRIB: 1462 case CB_COLOR2_ATTRIB: 1463 case CB_COLOR3_ATTRIB: 1464 case CB_COLOR4_ATTRIB: 1465 case CB_COLOR5_ATTRIB: 1466 case CB_COLOR6_ATTRIB: 1467 case CB_COLOR7_ATTRIB: 1468 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1469 if (r) { 1470 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1471 "0x%04X\n", reg); 1472 return -EINVAL; 1473 } 1474 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 1475 if (reloc->tiling_flags & RADEON_TILING_MACRO) { 1476 unsigned bankw, bankh, mtaspect, tile_split; 1477 1478 evergreen_tiling_fields(reloc->tiling_flags, 1479 &bankw, &bankh, &mtaspect, 1480 &tile_split); 1481 ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks)); 1482 ib[idx] |= CB_TILE_SPLIT(tile_split) | 1483 CB_BANK_WIDTH(bankw) | 1484 CB_BANK_HEIGHT(bankh) | 1485 CB_MACRO_TILE_ASPECT(mtaspect); 1486 } 1487 } 1488 tmp = ((reg - CB_COLOR0_ATTRIB) / 0x3c); 1489 track->cb_color_attrib[tmp] = ib[idx]; 1490 track->cb_dirty = true; 1491 break; 1492 case CB_COLOR8_ATTRIB: 1493 case CB_COLOR9_ATTRIB: 1494 case CB_COLOR10_ATTRIB: 1495 case CB_COLOR11_ATTRIB: 1496 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1497 if (r) { 1498 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1499 "0x%04X\n", reg); 1500 return -EINVAL; 1501 } 1502 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 1503 if (reloc->tiling_flags & RADEON_TILING_MACRO) { 1504 unsigned bankw, bankh, mtaspect, tile_split; 1505 1506 evergreen_tiling_fields(reloc->tiling_flags, 1507 &bankw, &bankh, &mtaspect, 1508 &tile_split); 1509 ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks)); 1510 ib[idx] |= CB_TILE_SPLIT(tile_split) | 1511 CB_BANK_WIDTH(bankw) | 1512 CB_BANK_HEIGHT(bankh) | 1513 CB_MACRO_TILE_ASPECT(mtaspect); 1514 } 1515 } 1516 tmp = ((reg - CB_COLOR8_ATTRIB) / 0x1c) + 8; 1517 track->cb_color_attrib[tmp] = ib[idx]; 1518 track->cb_dirty = true; 1519 break; 1520 case CB_COLOR0_FMASK: 1521 case CB_COLOR1_FMASK: 1522 case CB_COLOR2_FMASK: 1523 case CB_COLOR3_FMASK: 1524 case CB_COLOR4_FMASK: 1525 case CB_COLOR5_FMASK: 1526 case CB_COLOR6_FMASK: 1527 case CB_COLOR7_FMASK: 1528 tmp = (reg - CB_COLOR0_FMASK) / 0x3c; 1529 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1530 if (r) { 1531 dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg); 1532 return -EINVAL; 1533 } 1534 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1535 track->cb_color_fmask_bo[tmp] = reloc->robj; 1536 break; 1537 case CB_COLOR0_CMASK: 1538 case CB_COLOR1_CMASK: 1539 case CB_COLOR2_CMASK: 1540 case CB_COLOR3_CMASK: 1541 case CB_COLOR4_CMASK: 1542 case CB_COLOR5_CMASK: 1543 case CB_COLOR6_CMASK: 1544 case CB_COLOR7_CMASK: 1545 tmp = (reg - CB_COLOR0_CMASK) / 0x3c; 1546 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1547 if (r) { 1548 dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg); 1549 return -EINVAL; 1550 } 1551 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1552 track->cb_color_cmask_bo[tmp] = reloc->robj; 1553 break; 1554 case CB_COLOR0_FMASK_SLICE: 1555 case CB_COLOR1_FMASK_SLICE: 1556 case CB_COLOR2_FMASK_SLICE: 1557 case CB_COLOR3_FMASK_SLICE: 1558 case CB_COLOR4_FMASK_SLICE: 1559 case CB_COLOR5_FMASK_SLICE: 1560 case CB_COLOR6_FMASK_SLICE: 1561 case CB_COLOR7_FMASK_SLICE: 1562 tmp = (reg - CB_COLOR0_FMASK_SLICE) / 0x3c; 1563 track->cb_color_fmask_slice[tmp] = radeon_get_ib_value(p, idx); 1564 break; 1565 case CB_COLOR0_CMASK_SLICE: 1566 case CB_COLOR1_CMASK_SLICE: 1567 case CB_COLOR2_CMASK_SLICE: 1568 case CB_COLOR3_CMASK_SLICE: 1569 case CB_COLOR4_CMASK_SLICE: 1570 case CB_COLOR5_CMASK_SLICE: 1571 case CB_COLOR6_CMASK_SLICE: 1572 case CB_COLOR7_CMASK_SLICE: 1573 tmp = (reg - CB_COLOR0_CMASK_SLICE) / 0x3c; 1574 track->cb_color_cmask_slice[tmp] = radeon_get_ib_value(p, idx); 1575 break; 1576 case CB_COLOR0_BASE: 1577 case CB_COLOR1_BASE: 1578 case CB_COLOR2_BASE: 1579 case CB_COLOR3_BASE: 1580 case CB_COLOR4_BASE: 1581 case CB_COLOR5_BASE: 1582 case CB_COLOR6_BASE: 1583 case CB_COLOR7_BASE: 1584 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1585 if (r) { 1586 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1587 "0x%04X\n", reg); 1588 return -EINVAL; 1589 } 1590 tmp = (reg - CB_COLOR0_BASE) / 0x3c; 1591 track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx); 1592 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1593 track->cb_color_bo[tmp] = reloc->robj; 1594 track->cb_dirty = true; 1595 break; 1596 case CB_COLOR8_BASE: 1597 case CB_COLOR9_BASE: 1598 case CB_COLOR10_BASE: 1599 case CB_COLOR11_BASE: 1600 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1601 if (r) { 1602 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1603 "0x%04X\n", reg); 1604 return -EINVAL; 1605 } 1606 tmp = ((reg - CB_COLOR8_BASE) / 0x1c) + 8; 1607 track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx); 1608 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1609 track->cb_color_bo[tmp] = reloc->robj; 1610 track->cb_dirty = true; 1611 break; 1612 case DB_HTILE_DATA_BASE: 1613 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1614 if (r) { 1615 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1616 "0x%04X\n", reg); 1617 return -EINVAL; 1618 } 1619 track->htile_offset = radeon_get_ib_value(p, idx); 1620 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1621 track->htile_bo = reloc->robj; 1622 track->db_dirty = true; 1623 break; 1624 case DB_HTILE_SURFACE: 1625 /* 8x8 only */ 1626 track->htile_surface = radeon_get_ib_value(p, idx); 1627 /* force 8x8 htile width and height */ 1628 ib[idx] |= 3; 1629 track->db_dirty = true; 1630 break; 1631 case CB_IMMED0_BASE: 1632 case CB_IMMED1_BASE: 1633 case CB_IMMED2_BASE: 1634 case CB_IMMED3_BASE: 1635 case CB_IMMED4_BASE: 1636 case CB_IMMED5_BASE: 1637 case CB_IMMED6_BASE: 1638 case CB_IMMED7_BASE: 1639 case CB_IMMED8_BASE: 1640 case CB_IMMED9_BASE: 1641 case CB_IMMED10_BASE: 1642 case CB_IMMED11_BASE: 1643 case SQ_PGM_START_FS: 1644 case SQ_PGM_START_ES: 1645 case SQ_PGM_START_VS: 1646 case SQ_PGM_START_GS: 1647 case SQ_PGM_START_PS: 1648 case SQ_PGM_START_HS: 1649 case SQ_PGM_START_LS: 1650 case SQ_CONST_MEM_BASE: 1651 case SQ_ALU_CONST_CACHE_GS_0: 1652 case SQ_ALU_CONST_CACHE_GS_1: 1653 case SQ_ALU_CONST_CACHE_GS_2: 1654 case SQ_ALU_CONST_CACHE_GS_3: 1655 case SQ_ALU_CONST_CACHE_GS_4: 1656 case SQ_ALU_CONST_CACHE_GS_5: 1657 case SQ_ALU_CONST_CACHE_GS_6: 1658 case SQ_ALU_CONST_CACHE_GS_7: 1659 case SQ_ALU_CONST_CACHE_GS_8: 1660 case SQ_ALU_CONST_CACHE_GS_9: 1661 case SQ_ALU_CONST_CACHE_GS_10: 1662 case SQ_ALU_CONST_CACHE_GS_11: 1663 case SQ_ALU_CONST_CACHE_GS_12: 1664 case SQ_ALU_CONST_CACHE_GS_13: 1665 case SQ_ALU_CONST_CACHE_GS_14: 1666 case SQ_ALU_CONST_CACHE_GS_15: 1667 case SQ_ALU_CONST_CACHE_PS_0: 1668 case SQ_ALU_CONST_CACHE_PS_1: 1669 case SQ_ALU_CONST_CACHE_PS_2: 1670 case SQ_ALU_CONST_CACHE_PS_3: 1671 case SQ_ALU_CONST_CACHE_PS_4: 1672 case SQ_ALU_CONST_CACHE_PS_5: 1673 case SQ_ALU_CONST_CACHE_PS_6: 1674 case SQ_ALU_CONST_CACHE_PS_7: 1675 case SQ_ALU_CONST_CACHE_PS_8: 1676 case SQ_ALU_CONST_CACHE_PS_9: 1677 case SQ_ALU_CONST_CACHE_PS_10: 1678 case SQ_ALU_CONST_CACHE_PS_11: 1679 case SQ_ALU_CONST_CACHE_PS_12: 1680 case SQ_ALU_CONST_CACHE_PS_13: 1681 case SQ_ALU_CONST_CACHE_PS_14: 1682 case SQ_ALU_CONST_CACHE_PS_15: 1683 case SQ_ALU_CONST_CACHE_VS_0: 1684 case SQ_ALU_CONST_CACHE_VS_1: 1685 case SQ_ALU_CONST_CACHE_VS_2: 1686 case SQ_ALU_CONST_CACHE_VS_3: 1687 case SQ_ALU_CONST_CACHE_VS_4: 1688 case SQ_ALU_CONST_CACHE_VS_5: 1689 case SQ_ALU_CONST_CACHE_VS_6: 1690 case SQ_ALU_CONST_CACHE_VS_7: 1691 case SQ_ALU_CONST_CACHE_VS_8: 1692 case SQ_ALU_CONST_CACHE_VS_9: 1693 case SQ_ALU_CONST_CACHE_VS_10: 1694 case SQ_ALU_CONST_CACHE_VS_11: 1695 case SQ_ALU_CONST_CACHE_VS_12: 1696 case SQ_ALU_CONST_CACHE_VS_13: 1697 case SQ_ALU_CONST_CACHE_VS_14: 1698 case SQ_ALU_CONST_CACHE_VS_15: 1699 case SQ_ALU_CONST_CACHE_HS_0: 1700 case SQ_ALU_CONST_CACHE_HS_1: 1701 case SQ_ALU_CONST_CACHE_HS_2: 1702 case SQ_ALU_CONST_CACHE_HS_3: 1703 case SQ_ALU_CONST_CACHE_HS_4: 1704 case SQ_ALU_CONST_CACHE_HS_5: 1705 case SQ_ALU_CONST_CACHE_HS_6: 1706 case SQ_ALU_CONST_CACHE_HS_7: 1707 case SQ_ALU_CONST_CACHE_HS_8: 1708 case SQ_ALU_CONST_CACHE_HS_9: 1709 case SQ_ALU_CONST_CACHE_HS_10: 1710 case SQ_ALU_CONST_CACHE_HS_11: 1711 case SQ_ALU_CONST_CACHE_HS_12: 1712 case SQ_ALU_CONST_CACHE_HS_13: 1713 case SQ_ALU_CONST_CACHE_HS_14: 1714 case SQ_ALU_CONST_CACHE_HS_15: 1715 case SQ_ALU_CONST_CACHE_LS_0: 1716 case SQ_ALU_CONST_CACHE_LS_1: 1717 case SQ_ALU_CONST_CACHE_LS_2: 1718 case SQ_ALU_CONST_CACHE_LS_3: 1719 case SQ_ALU_CONST_CACHE_LS_4: 1720 case SQ_ALU_CONST_CACHE_LS_5: 1721 case SQ_ALU_CONST_CACHE_LS_6: 1722 case SQ_ALU_CONST_CACHE_LS_7: 1723 case SQ_ALU_CONST_CACHE_LS_8: 1724 case SQ_ALU_CONST_CACHE_LS_9: 1725 case SQ_ALU_CONST_CACHE_LS_10: 1726 case SQ_ALU_CONST_CACHE_LS_11: 1727 case SQ_ALU_CONST_CACHE_LS_12: 1728 case SQ_ALU_CONST_CACHE_LS_13: 1729 case SQ_ALU_CONST_CACHE_LS_14: 1730 case SQ_ALU_CONST_CACHE_LS_15: 1731 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1732 if (r) { 1733 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1734 "0x%04X\n", reg); 1735 return -EINVAL; 1736 } 1737 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1738 break; 1739 case SX_MEMORY_EXPORT_BASE: 1740 if (p->rdev->family >= CHIP_CAYMAN) { 1741 dev_warn(p->dev, "bad SET_CONFIG_REG " 1742 "0x%04X\n", reg); 1743 return -EINVAL; 1744 } 1745 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1746 if (r) { 1747 dev_warn(p->dev, "bad SET_CONFIG_REG " 1748 "0x%04X\n", reg); 1749 return -EINVAL; 1750 } 1751 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1752 break; 1753 case CAYMAN_SX_SCATTER_EXPORT_BASE: 1754 if (p->rdev->family < CHIP_CAYMAN) { 1755 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1756 "0x%04X\n", reg); 1757 return -EINVAL; 1758 } 1759 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1760 if (r) { 1761 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1762 "0x%04X\n", reg); 1763 return -EINVAL; 1764 } 1765 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1766 break; 1767 case SX_MISC: 1768 track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0; 1769 break; 1770 default: 1771 dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); 1772 return -EINVAL; 1773 } 1774 return 0; 1775 } 1776 1777 /** 1778 * evergreen_is_safe_reg() - check if register is authorized or not 1779 * @parser: parser structure holding parsing context 1780 * @reg: register we are testing 1781 * 1782 * This function will test against reg_safe_bm and return true 1783 * if register is safe or false otherwise. 1784 */ 1785 static inline bool evergreen_is_safe_reg(struct radeon_cs_parser *p, u32 reg) 1786 { 1787 struct evergreen_cs_track *track = p->track; 1788 u32 m, i; 1789 1790 i = (reg >> 7); 1791 if (unlikely(i >= REG_SAFE_BM_SIZE)) { 1792 return false; 1793 } 1794 m = 1 << ((reg >> 2) & 31); 1795 if (!(track->reg_safe_bm[i] & m)) 1796 return true; 1797 1798 return false; 1799 } 1800 1801 static int evergreen_packet3_check(struct radeon_cs_parser *p, 1802 struct radeon_cs_packet *pkt) 1803 { 1804 struct radeon_bo_list *reloc; 1805 struct evergreen_cs_track *track; 1806 uint32_t *ib; 1807 unsigned idx; 1808 unsigned i; 1809 unsigned start_reg, end_reg, reg; 1810 int r; 1811 u32 idx_value; 1812 1813 track = (struct evergreen_cs_track *)p->track; 1814 ib = p->ib.ptr; 1815 idx = pkt->idx + 1; 1816 idx_value = radeon_get_ib_value(p, idx); 1817 1818 switch (pkt->opcode) { 1819 case PACKET3_SET_PREDICATION: 1820 { 1821 int pred_op; 1822 int tmp; 1823 uint64_t offset; 1824 1825 if (pkt->count != 1) { 1826 DRM_ERROR("bad SET PREDICATION\n"); 1827 return -EINVAL; 1828 } 1829 1830 tmp = radeon_get_ib_value(p, idx + 1); 1831 pred_op = (tmp >> 16) & 0x7; 1832 1833 /* for the clear predicate operation */ 1834 if (pred_op == 0) 1835 return 0; 1836 1837 if (pred_op > 2) { 1838 DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op); 1839 return -EINVAL; 1840 } 1841 1842 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1843 if (r) { 1844 DRM_ERROR("bad SET PREDICATION\n"); 1845 return -EINVAL; 1846 } 1847 1848 offset = reloc->gpu_offset + 1849 (idx_value & 0xfffffff0) + 1850 ((u64)(tmp & 0xff) << 32); 1851 1852 ib[idx + 0] = offset; 1853 ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff); 1854 } 1855 break; 1856 case PACKET3_CONTEXT_CONTROL: 1857 if (pkt->count != 1) { 1858 DRM_ERROR("bad CONTEXT_CONTROL\n"); 1859 return -EINVAL; 1860 } 1861 break; 1862 case PACKET3_INDEX_TYPE: 1863 case PACKET3_NUM_INSTANCES: 1864 case PACKET3_CLEAR_STATE: 1865 if (pkt->count) { 1866 DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n"); 1867 return -EINVAL; 1868 } 1869 break; 1870 case CAYMAN_PACKET3_DEALLOC_STATE: 1871 if (p->rdev->family < CHIP_CAYMAN) { 1872 DRM_ERROR("bad PACKET3_DEALLOC_STATE\n"); 1873 return -EINVAL; 1874 } 1875 if (pkt->count) { 1876 DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n"); 1877 return -EINVAL; 1878 } 1879 break; 1880 case PACKET3_INDEX_BASE: 1881 { 1882 uint64_t offset; 1883 1884 if (pkt->count != 1) { 1885 DRM_ERROR("bad INDEX_BASE\n"); 1886 return -EINVAL; 1887 } 1888 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1889 if (r) { 1890 DRM_ERROR("bad INDEX_BASE\n"); 1891 return -EINVAL; 1892 } 1893 1894 offset = reloc->gpu_offset + 1895 idx_value + 1896 ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); 1897 1898 ib[idx+0] = offset; 1899 ib[idx+1] = upper_32_bits(offset) & 0xff; 1900 1901 r = evergreen_cs_track_check(p); 1902 if (r) { 1903 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); 1904 return r; 1905 } 1906 break; 1907 } 1908 case PACKET3_INDEX_BUFFER_SIZE: 1909 { 1910 if (pkt->count != 0) { 1911 DRM_ERROR("bad INDEX_BUFFER_SIZE\n"); 1912 return -EINVAL; 1913 } 1914 break; 1915 } 1916 case PACKET3_DRAW_INDEX: 1917 { 1918 uint64_t offset; 1919 if (pkt->count != 3) { 1920 DRM_ERROR("bad DRAW_INDEX\n"); 1921 return -EINVAL; 1922 } 1923 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1924 if (r) { 1925 DRM_ERROR("bad DRAW_INDEX\n"); 1926 return -EINVAL; 1927 } 1928 1929 offset = reloc->gpu_offset + 1930 idx_value + 1931 ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); 1932 1933 ib[idx+0] = offset; 1934 ib[idx+1] = upper_32_bits(offset) & 0xff; 1935 1936 r = evergreen_cs_track_check(p); 1937 if (r) { 1938 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); 1939 return r; 1940 } 1941 break; 1942 } 1943 case PACKET3_DRAW_INDEX_2: 1944 { 1945 uint64_t offset; 1946 1947 if (pkt->count != 4) { 1948 DRM_ERROR("bad DRAW_INDEX_2\n"); 1949 return -EINVAL; 1950 } 1951 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1952 if (r) { 1953 DRM_ERROR("bad DRAW_INDEX_2\n"); 1954 return -EINVAL; 1955 } 1956 1957 offset = reloc->gpu_offset + 1958 radeon_get_ib_value(p, idx+1) + 1959 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); 1960 1961 ib[idx+1] = offset; 1962 ib[idx+2] = upper_32_bits(offset) & 0xff; 1963 1964 r = evergreen_cs_track_check(p); 1965 if (r) { 1966 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); 1967 return r; 1968 } 1969 break; 1970 } 1971 case PACKET3_DRAW_INDEX_AUTO: 1972 if (pkt->count != 1) { 1973 DRM_ERROR("bad DRAW_INDEX_AUTO\n"); 1974 return -EINVAL; 1975 } 1976 r = evergreen_cs_track_check(p); 1977 if (r) { 1978 dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); 1979 return r; 1980 } 1981 break; 1982 case PACKET3_DRAW_INDEX_MULTI_AUTO: 1983 if (pkt->count != 2) { 1984 DRM_ERROR("bad DRAW_INDEX_MULTI_AUTO\n"); 1985 return -EINVAL; 1986 } 1987 r = evergreen_cs_track_check(p); 1988 if (r) { 1989 dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); 1990 return r; 1991 } 1992 break; 1993 case PACKET3_DRAW_INDEX_IMMD: 1994 if (pkt->count < 2) { 1995 DRM_ERROR("bad DRAW_INDEX_IMMD\n"); 1996 return -EINVAL; 1997 } 1998 r = evergreen_cs_track_check(p); 1999 if (r) { 2000 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); 2001 return r; 2002 } 2003 break; 2004 case PACKET3_DRAW_INDEX_OFFSET: 2005 if (pkt->count != 2) { 2006 DRM_ERROR("bad DRAW_INDEX_OFFSET\n"); 2007 return -EINVAL; 2008 } 2009 r = evergreen_cs_track_check(p); 2010 if (r) { 2011 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); 2012 return r; 2013 } 2014 break; 2015 case PACKET3_DRAW_INDEX_OFFSET_2: 2016 if (pkt->count != 3) { 2017 DRM_ERROR("bad DRAW_INDEX_OFFSET_2\n"); 2018 return -EINVAL; 2019 } 2020 r = evergreen_cs_track_check(p); 2021 if (r) { 2022 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); 2023 return r; 2024 } 2025 break; 2026 case PACKET3_SET_BASE: 2027 { 2028 /* 2029 DW 1 HEADER Header of the packet. Shader_Type in bit 1 of the Header will correspond to the shader type of the Load, see Type-3 Packet. 2030 2 BASE_INDEX Bits [3:0] BASE_INDEX - Base Index specifies which base address is specified in the last two DWs. 2031 0001: DX11 Draw_Index_Indirect Patch Table Base: Base address for Draw_Index_Indirect data. 2032 3 ADDRESS_LO Bits [31:3] - Lower bits of QWORD-Aligned Address. Bits [2:0] - Reserved 2033 4 ADDRESS_HI Bits [31:8] - Reserved. Bits [7:0] - Upper bits of Address [47:32] 2034 */ 2035 if (pkt->count != 2) { 2036 DRM_ERROR("bad SET_BASE\n"); 2037 return -EINVAL; 2038 } 2039 2040 /* currently only supporting setting indirect draw buffer base address */ 2041 if (idx_value != 1) { 2042 DRM_ERROR("bad SET_BASE\n"); 2043 return -EINVAL; 2044 } 2045 2046 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2047 if (r) { 2048 DRM_ERROR("bad SET_BASE\n"); 2049 return -EINVAL; 2050 } 2051 2052 track->indirect_draw_buffer_size = radeon_bo_size(reloc->robj); 2053 2054 ib[idx+1] = reloc->gpu_offset; 2055 ib[idx+2] = upper_32_bits(reloc->gpu_offset) & 0xff; 2056 2057 break; 2058 } 2059 case PACKET3_DRAW_INDIRECT: 2060 case PACKET3_DRAW_INDEX_INDIRECT: 2061 { 2062 u64 size = pkt->opcode == PACKET3_DRAW_INDIRECT ? 16 : 20; 2063 2064 /* 2065 DW 1 HEADER 2066 2 DATA_OFFSET Bits [31:0] + byte aligned offset where the required data structure starts. Bits 1:0 are zero 2067 3 DRAW_INITIATOR Draw Initiator Register. Written to the VGT_DRAW_INITIATOR register for the assigned context 2068 */ 2069 if (pkt->count != 1) { 2070 DRM_ERROR("bad DRAW_INDIRECT\n"); 2071 return -EINVAL; 2072 } 2073 2074 if (idx_value + size > track->indirect_draw_buffer_size) { 2075 dev_warn(p->dev, "DRAW_INDIRECT buffer too small %u + %"PRIx64" > %lu\n", 2076 idx_value, size, track->indirect_draw_buffer_size); 2077 return -EINVAL; 2078 } 2079 2080 r = evergreen_cs_track_check(p); 2081 if (r) { 2082 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); 2083 return r; 2084 } 2085 break; 2086 } 2087 case PACKET3_DISPATCH_DIRECT: 2088 if (pkt->count != 3) { 2089 DRM_ERROR("bad DISPATCH_DIRECT\n"); 2090 return -EINVAL; 2091 } 2092 r = evergreen_cs_track_check(p); 2093 if (r) { 2094 dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); 2095 return r; 2096 } 2097 break; 2098 case PACKET3_DISPATCH_INDIRECT: 2099 if (pkt->count != 1) { 2100 DRM_ERROR("bad DISPATCH_INDIRECT\n"); 2101 return -EINVAL; 2102 } 2103 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2104 if (r) { 2105 DRM_ERROR("bad DISPATCH_INDIRECT\n"); 2106 return -EINVAL; 2107 } 2108 ib[idx+0] = idx_value + (u32)(reloc->gpu_offset & 0xffffffff); 2109 r = evergreen_cs_track_check(p); 2110 if (r) { 2111 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); 2112 return r; 2113 } 2114 break; 2115 case PACKET3_WAIT_REG_MEM: 2116 if (pkt->count != 5) { 2117 DRM_ERROR("bad WAIT_REG_MEM\n"); 2118 return -EINVAL; 2119 } 2120 /* bit 4 is reg (0) or mem (1) */ 2121 if (idx_value & 0x10) { 2122 uint64_t offset; 2123 2124 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2125 if (r) { 2126 DRM_ERROR("bad WAIT_REG_MEM\n"); 2127 return -EINVAL; 2128 } 2129 2130 offset = reloc->gpu_offset + 2131 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + 2132 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); 2133 2134 ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc); 2135 ib[idx+2] = upper_32_bits(offset) & 0xff; 2136 } else if (idx_value & 0x100) { 2137 DRM_ERROR("cannot use PFP on REG wait\n"); 2138 return -EINVAL; 2139 } 2140 break; 2141 case PACKET3_CP_DMA: 2142 { 2143 u32 command, size, info; 2144 u64 offset, tmp; 2145 if (pkt->count != 4) { 2146 DRM_ERROR("bad CP DMA\n"); 2147 return -EINVAL; 2148 } 2149 command = radeon_get_ib_value(p, idx+4); 2150 size = command & 0x1fffff; 2151 info = radeon_get_ib_value(p, idx+1); 2152 if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */ 2153 (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */ 2154 ((((info & 0x00300000) >> 20) == 0) && 2155 (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */ 2156 ((((info & 0x60000000) >> 29) == 0) && 2157 (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */ 2158 /* non mem to mem copies requires dw aligned count */ 2159 if (size % 4) { 2160 DRM_ERROR("CP DMA command requires dw count alignment\n"); 2161 return -EINVAL; 2162 } 2163 } 2164 if (command & PACKET3_CP_DMA_CMD_SAS) { 2165 /* src address space is register */ 2166 /* GDS is ok */ 2167 if (((info & 0x60000000) >> 29) != 1) { 2168 DRM_ERROR("CP DMA SAS not supported\n"); 2169 return -EINVAL; 2170 } 2171 } else { 2172 if (command & PACKET3_CP_DMA_CMD_SAIC) { 2173 DRM_ERROR("CP DMA SAIC only supported for registers\n"); 2174 return -EINVAL; 2175 } 2176 /* src address space is memory */ 2177 if (((info & 0x60000000) >> 29) == 0) { 2178 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2179 if (r) { 2180 DRM_ERROR("bad CP DMA SRC\n"); 2181 return -EINVAL; 2182 } 2183 2184 tmp = radeon_get_ib_value(p, idx) + 2185 ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); 2186 2187 offset = reloc->gpu_offset + tmp; 2188 2189 if ((tmp + size) > radeon_bo_size(reloc->robj)) { 2190 dev_warn(p->dev, "CP DMA src buffer too small (%"PRIu64" %lu)\n", 2191 tmp + size, radeon_bo_size(reloc->robj)); 2192 return -EINVAL; 2193 } 2194 2195 ib[idx] = offset; 2196 ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff); 2197 } else if (((info & 0x60000000) >> 29) != 2) { 2198 DRM_ERROR("bad CP DMA SRC_SEL\n"); 2199 return -EINVAL; 2200 } 2201 } 2202 if (command & PACKET3_CP_DMA_CMD_DAS) { 2203 /* dst address space is register */ 2204 /* GDS is ok */ 2205 if (((info & 0x00300000) >> 20) != 1) { 2206 DRM_ERROR("CP DMA DAS not supported\n"); 2207 return -EINVAL; 2208 } 2209 } else { 2210 /* dst address space is memory */ 2211 if (command & PACKET3_CP_DMA_CMD_DAIC) { 2212 DRM_ERROR("CP DMA DAIC only supported for registers\n"); 2213 return -EINVAL; 2214 } 2215 if (((info & 0x00300000) >> 20) == 0) { 2216 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2217 if (r) { 2218 DRM_ERROR("bad CP DMA DST\n"); 2219 return -EINVAL; 2220 } 2221 2222 tmp = radeon_get_ib_value(p, idx+2) + 2223 ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32); 2224 2225 offset = reloc->gpu_offset + tmp; 2226 2227 if ((tmp + size) > radeon_bo_size(reloc->robj)) { 2228 dev_warn(p->dev, "CP DMA dst buffer too small (%"PRIu64" %lu)\n", 2229 tmp + size, radeon_bo_size(reloc->robj)); 2230 return -EINVAL; 2231 } 2232 2233 ib[idx+2] = offset; 2234 ib[idx+3] = upper_32_bits(offset) & 0xff; 2235 } else { 2236 DRM_ERROR("bad CP DMA DST_SEL\n"); 2237 return -EINVAL; 2238 } 2239 } 2240 break; 2241 } 2242 case PACKET3_PFP_SYNC_ME: 2243 if (pkt->count) { 2244 DRM_ERROR("bad PFP_SYNC_ME\n"); 2245 return -EINVAL; 2246 } 2247 break; 2248 case PACKET3_SURFACE_SYNC: 2249 if (pkt->count != 3) { 2250 DRM_ERROR("bad SURFACE_SYNC\n"); 2251 return -EINVAL; 2252 } 2253 /* 0xffffffff/0x0 is flush all cache flag */ 2254 if (radeon_get_ib_value(p, idx + 1) != 0xffffffff || 2255 radeon_get_ib_value(p, idx + 2) != 0) { 2256 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2257 if (r) { 2258 DRM_ERROR("bad SURFACE_SYNC\n"); 2259 return -EINVAL; 2260 } 2261 ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 2262 } 2263 break; 2264 case PACKET3_EVENT_WRITE: 2265 if (pkt->count != 2 && pkt->count != 0) { 2266 DRM_ERROR("bad EVENT_WRITE\n"); 2267 return -EINVAL; 2268 } 2269 if (pkt->count) { 2270 uint64_t offset; 2271 2272 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2273 if (r) { 2274 DRM_ERROR("bad EVENT_WRITE\n"); 2275 return -EINVAL; 2276 } 2277 offset = reloc->gpu_offset + 2278 (radeon_get_ib_value(p, idx+1) & 0xfffffff8) + 2279 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); 2280 2281 ib[idx+1] = offset & 0xfffffff8; 2282 ib[idx+2] = upper_32_bits(offset) & 0xff; 2283 } 2284 break; 2285 case PACKET3_EVENT_WRITE_EOP: 2286 { 2287 uint64_t offset; 2288 2289 if (pkt->count != 4) { 2290 DRM_ERROR("bad EVENT_WRITE_EOP\n"); 2291 return -EINVAL; 2292 } 2293 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2294 if (r) { 2295 DRM_ERROR("bad EVENT_WRITE_EOP\n"); 2296 return -EINVAL; 2297 } 2298 2299 offset = reloc->gpu_offset + 2300 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + 2301 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); 2302 2303 ib[idx+1] = offset & 0xfffffffc; 2304 ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff); 2305 break; 2306 } 2307 case PACKET3_EVENT_WRITE_EOS: 2308 { 2309 uint64_t offset; 2310 2311 if (pkt->count != 3) { 2312 DRM_ERROR("bad EVENT_WRITE_EOS\n"); 2313 return -EINVAL; 2314 } 2315 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2316 if (r) { 2317 DRM_ERROR("bad EVENT_WRITE_EOS\n"); 2318 return -EINVAL; 2319 } 2320 2321 offset = reloc->gpu_offset + 2322 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + 2323 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); 2324 2325 ib[idx+1] = offset & 0xfffffffc; 2326 ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff); 2327 break; 2328 } 2329 case PACKET3_SET_CONFIG_REG: 2330 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START; 2331 end_reg = 4 * pkt->count + start_reg - 4; 2332 if ((start_reg < PACKET3_SET_CONFIG_REG_START) || 2333 (start_reg >= PACKET3_SET_CONFIG_REG_END) || 2334 (end_reg >= PACKET3_SET_CONFIG_REG_END)) { 2335 DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n"); 2336 return -EINVAL; 2337 } 2338 for (reg = start_reg, idx++; reg <= end_reg; reg += 4, idx++) { 2339 if (evergreen_is_safe_reg(p, reg)) 2340 continue; 2341 r = evergreen_cs_handle_reg(p, reg, idx); 2342 if (r) 2343 return r; 2344 } 2345 break; 2346 case PACKET3_SET_CONTEXT_REG: 2347 start_reg = (idx_value << 2) + PACKET3_SET_CONTEXT_REG_START; 2348 end_reg = 4 * pkt->count + start_reg - 4; 2349 if ((start_reg < PACKET3_SET_CONTEXT_REG_START) || 2350 (start_reg >= PACKET3_SET_CONTEXT_REG_END) || 2351 (end_reg >= PACKET3_SET_CONTEXT_REG_END)) { 2352 DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n"); 2353 return -EINVAL; 2354 } 2355 for (reg = start_reg, idx++; reg <= end_reg; reg += 4, idx++) { 2356 if (evergreen_is_safe_reg(p, reg)) 2357 continue; 2358 r = evergreen_cs_handle_reg(p, reg, idx); 2359 if (r) 2360 return r; 2361 } 2362 break; 2363 case PACKET3_SET_RESOURCE: 2364 if (pkt->count % 8) { 2365 DRM_ERROR("bad SET_RESOURCE\n"); 2366 return -EINVAL; 2367 } 2368 start_reg = (idx_value << 2) + PACKET3_SET_RESOURCE_START; 2369 end_reg = 4 * pkt->count + start_reg - 4; 2370 if ((start_reg < PACKET3_SET_RESOURCE_START) || 2371 (start_reg >= PACKET3_SET_RESOURCE_END) || 2372 (end_reg >= PACKET3_SET_RESOURCE_END)) { 2373 DRM_ERROR("bad SET_RESOURCE\n"); 2374 return -EINVAL; 2375 } 2376 for (i = 0; i < (pkt->count / 8); i++) { 2377 struct radeon_bo *texture, *mipmap; 2378 u32 toffset, moffset; 2379 u32 size, offset, mip_address, tex_dim; 2380 2381 switch (G__SQ_CONSTANT_TYPE(radeon_get_ib_value(p, idx+1+(i*8)+7))) { 2382 case SQ_TEX_VTX_VALID_TEXTURE: 2383 /* tex base */ 2384 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2385 if (r) { 2386 DRM_ERROR("bad SET_RESOURCE (tex)\n"); 2387 return -EINVAL; 2388 } 2389 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 2390 ib[idx+1+(i*8)+1] |= 2391 TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); 2392 if (reloc->tiling_flags & RADEON_TILING_MACRO) { 2393 unsigned bankw, bankh, mtaspect, tile_split; 2394 2395 evergreen_tiling_fields(reloc->tiling_flags, 2396 &bankw, &bankh, &mtaspect, 2397 &tile_split); 2398 ib[idx+1+(i*8)+6] |= TEX_TILE_SPLIT(tile_split); 2399 ib[idx+1+(i*8)+7] |= 2400 TEX_BANK_WIDTH(bankw) | 2401 TEX_BANK_HEIGHT(bankh) | 2402 MACRO_TILE_ASPECT(mtaspect) | 2403 TEX_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks)); 2404 } 2405 } 2406 texture = reloc->robj; 2407 toffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 2408 2409 /* tex mip base */ 2410 tex_dim = ib[idx+1+(i*8)+0] & 0x7; 2411 mip_address = ib[idx+1+(i*8)+3]; 2412 2413 if ((tex_dim == SQ_TEX_DIM_2D_MSAA || tex_dim == SQ_TEX_DIM_2D_ARRAY_MSAA) && 2414 !mip_address && 2415 !radeon_cs_packet_next_is_pkt3_nop(p)) { 2416 /* MIP_ADDRESS should point to FMASK for an MSAA texture. 2417 * It should be 0 if FMASK is disabled. */ 2418 moffset = 0; 2419 mipmap = NULL; 2420 } else { 2421 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2422 if (r) { 2423 DRM_ERROR("bad SET_RESOURCE (tex)\n"); 2424 return -EINVAL; 2425 } 2426 moffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 2427 mipmap = reloc->robj; 2428 } 2429 2430 r = evergreen_cs_track_validate_texture(p, texture, mipmap, idx+1+(i*8)); 2431 if (r) 2432 return r; 2433 ib[idx+1+(i*8)+2] += toffset; 2434 ib[idx+1+(i*8)+3] += moffset; 2435 break; 2436 case SQ_TEX_VTX_VALID_BUFFER: 2437 { 2438 uint64_t offset64; 2439 /* vtx base */ 2440 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2441 if (r) { 2442 DRM_ERROR("bad SET_RESOURCE (vtx)\n"); 2443 return -EINVAL; 2444 } 2445 offset = radeon_get_ib_value(p, idx+1+(i*8)+0); 2446 size = radeon_get_ib_value(p, idx+1+(i*8)+1); 2447 if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) { 2448 /* force size to size of the buffer */ 2449 dev_warn_ratelimited(p->dev, "vbo resource seems too big for the bo\n"); 2450 ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset; 2451 } 2452 2453 offset64 = reloc->gpu_offset + offset; 2454 ib[idx+1+(i*8)+0] = offset64; 2455 ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) | 2456 (upper_32_bits(offset64) & 0xff); 2457 break; 2458 } 2459 case SQ_TEX_VTX_INVALID_TEXTURE: 2460 case SQ_TEX_VTX_INVALID_BUFFER: 2461 default: 2462 DRM_ERROR("bad SET_RESOURCE\n"); 2463 return -EINVAL; 2464 } 2465 } 2466 break; 2467 case PACKET3_SET_ALU_CONST: 2468 /* XXX fix me ALU const buffers only */ 2469 break; 2470 case PACKET3_SET_BOOL_CONST: 2471 start_reg = (idx_value << 2) + PACKET3_SET_BOOL_CONST_START; 2472 end_reg = 4 * pkt->count + start_reg - 4; 2473 if ((start_reg < PACKET3_SET_BOOL_CONST_START) || 2474 (start_reg >= PACKET3_SET_BOOL_CONST_END) || 2475 (end_reg >= PACKET3_SET_BOOL_CONST_END)) { 2476 DRM_ERROR("bad SET_BOOL_CONST\n"); 2477 return -EINVAL; 2478 } 2479 break; 2480 case PACKET3_SET_LOOP_CONST: 2481 start_reg = (idx_value << 2) + PACKET3_SET_LOOP_CONST_START; 2482 end_reg = 4 * pkt->count + start_reg - 4; 2483 if ((start_reg < PACKET3_SET_LOOP_CONST_START) || 2484 (start_reg >= PACKET3_SET_LOOP_CONST_END) || 2485 (end_reg >= PACKET3_SET_LOOP_CONST_END)) { 2486 DRM_ERROR("bad SET_LOOP_CONST\n"); 2487 return -EINVAL; 2488 } 2489 break; 2490 case PACKET3_SET_CTL_CONST: 2491 start_reg = (idx_value << 2) + PACKET3_SET_CTL_CONST_START; 2492 end_reg = 4 * pkt->count + start_reg - 4; 2493 if ((start_reg < PACKET3_SET_CTL_CONST_START) || 2494 (start_reg >= PACKET3_SET_CTL_CONST_END) || 2495 (end_reg >= PACKET3_SET_CTL_CONST_END)) { 2496 DRM_ERROR("bad SET_CTL_CONST\n"); 2497 return -EINVAL; 2498 } 2499 break; 2500 case PACKET3_SET_SAMPLER: 2501 if (pkt->count % 3) { 2502 DRM_ERROR("bad SET_SAMPLER\n"); 2503 return -EINVAL; 2504 } 2505 start_reg = (idx_value << 2) + PACKET3_SET_SAMPLER_START; 2506 end_reg = 4 * pkt->count + start_reg - 4; 2507 if ((start_reg < PACKET3_SET_SAMPLER_START) || 2508 (start_reg >= PACKET3_SET_SAMPLER_END) || 2509 (end_reg >= PACKET3_SET_SAMPLER_END)) { 2510 DRM_ERROR("bad SET_SAMPLER\n"); 2511 return -EINVAL; 2512 } 2513 break; 2514 case PACKET3_STRMOUT_BUFFER_UPDATE: 2515 if (pkt->count != 4) { 2516 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n"); 2517 return -EINVAL; 2518 } 2519 /* Updating memory at DST_ADDRESS. */ 2520 if (idx_value & 0x1) { 2521 u64 offset; 2522 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2523 if (r) { 2524 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n"); 2525 return -EINVAL; 2526 } 2527 offset = radeon_get_ib_value(p, idx+1); 2528 offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; 2529 if ((offset + 4) > radeon_bo_size(reloc->robj)) { 2530 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%"PRIx64", 0x%lx\n", 2531 offset + 4, radeon_bo_size(reloc->robj)); 2532 return -EINVAL; 2533 } 2534 offset += reloc->gpu_offset; 2535 ib[idx+1] = offset; 2536 ib[idx+2] = upper_32_bits(offset) & 0xff; 2537 } 2538 /* Reading data from SRC_ADDRESS. */ 2539 if (((idx_value >> 1) & 0x3) == 2) { 2540 u64 offset; 2541 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2542 if (r) { 2543 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n"); 2544 return -EINVAL; 2545 } 2546 offset = radeon_get_ib_value(p, idx+3); 2547 offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; 2548 if ((offset + 4) > radeon_bo_size(reloc->robj)) { 2549 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%"PRIx64", 0x%lx\n", 2550 offset + 4, radeon_bo_size(reloc->robj)); 2551 return -EINVAL; 2552 } 2553 offset += reloc->gpu_offset; 2554 ib[idx+3] = offset; 2555 ib[idx+4] = upper_32_bits(offset) & 0xff; 2556 } 2557 break; 2558 case PACKET3_MEM_WRITE: 2559 { 2560 u64 offset; 2561 2562 if (pkt->count != 3) { 2563 DRM_ERROR("bad MEM_WRITE (invalid count)\n"); 2564 return -EINVAL; 2565 } 2566 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2567 if (r) { 2568 DRM_ERROR("bad MEM_WRITE (missing reloc)\n"); 2569 return -EINVAL; 2570 } 2571 offset = radeon_get_ib_value(p, idx+0); 2572 offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL; 2573 if (offset & 0x7) { 2574 DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n"); 2575 return -EINVAL; 2576 } 2577 if ((offset + 8) > radeon_bo_size(reloc->robj)) { 2578 DRM_ERROR("bad MEM_WRITE bo too small: 0x%"PRIx64", 0x%lx\n", 2579 offset + 8, radeon_bo_size(reloc->robj)); 2580 return -EINVAL; 2581 } 2582 offset += reloc->gpu_offset; 2583 ib[idx+0] = offset; 2584 ib[idx+1] = upper_32_bits(offset) & 0xff; 2585 break; 2586 } 2587 case PACKET3_COPY_DW: 2588 if (pkt->count != 4) { 2589 DRM_ERROR("bad COPY_DW (invalid count)\n"); 2590 return -EINVAL; 2591 } 2592 if (idx_value & 0x1) { 2593 u64 offset; 2594 /* SRC is memory. */ 2595 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2596 if (r) { 2597 DRM_ERROR("bad COPY_DW (missing src reloc)\n"); 2598 return -EINVAL; 2599 } 2600 offset = radeon_get_ib_value(p, idx+1); 2601 offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; 2602 if ((offset + 4) > radeon_bo_size(reloc->robj)) { 2603 DRM_ERROR("bad COPY_DW src bo too small: 0x%"PRIx64", 0x%lx\n", 2604 offset + 4, radeon_bo_size(reloc->robj)); 2605 return -EINVAL; 2606 } 2607 offset += reloc->gpu_offset; 2608 ib[idx+1] = offset; 2609 ib[idx+2] = upper_32_bits(offset) & 0xff; 2610 } else { 2611 /* SRC is a reg. */ 2612 reg = radeon_get_ib_value(p, idx+1) << 2; 2613 if (!evergreen_is_safe_reg(p, reg)) { 2614 dev_warn(p->dev, "forbidden register 0x%08x at %d\n", 2615 reg, idx + 1); 2616 return -EINVAL; 2617 } 2618 } 2619 if (idx_value & 0x2) { 2620 u64 offset; 2621 /* DST is memory. */ 2622 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2623 if (r) { 2624 DRM_ERROR("bad COPY_DW (missing dst reloc)\n"); 2625 return -EINVAL; 2626 } 2627 offset = radeon_get_ib_value(p, idx+3); 2628 offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; 2629 if ((offset + 4) > radeon_bo_size(reloc->robj)) { 2630 DRM_ERROR("bad COPY_DW dst bo too small: 0x%"PRIx64", 0x%lx\n", 2631 offset + 4, radeon_bo_size(reloc->robj)); 2632 return -EINVAL; 2633 } 2634 offset += reloc->gpu_offset; 2635 ib[idx+3] = offset; 2636 ib[idx+4] = upper_32_bits(offset) & 0xff; 2637 } else { 2638 /* DST is a reg. */ 2639 reg = radeon_get_ib_value(p, idx+3) << 2; 2640 if (!evergreen_is_safe_reg(p, reg)) { 2641 dev_warn(p->dev, "forbidden register 0x%08x at %d\n", 2642 reg, idx + 3); 2643 return -EINVAL; 2644 } 2645 } 2646 break; 2647 case PACKET3_SET_APPEND_CNT: 2648 { 2649 uint32_t areg; 2650 uint32_t allowed_reg_base; 2651 uint32_t source_sel; 2652 if (pkt->count != 2) { 2653 DRM_ERROR("bad SET_APPEND_CNT (invalid count)\n"); 2654 return -EINVAL; 2655 } 2656 2657 allowed_reg_base = GDS_APPEND_COUNT_0; 2658 allowed_reg_base -= PACKET3_SET_CONTEXT_REG_START; 2659 allowed_reg_base >>= 2; 2660 2661 areg = idx_value >> 16; 2662 if (areg < allowed_reg_base || areg > (allowed_reg_base + 11)) { 2663 dev_warn(p->dev, "forbidden register for append cnt 0x%08x at %d\n", 2664 areg, idx); 2665 return -EINVAL; 2666 } 2667 2668 source_sel = G_PACKET3_SET_APPEND_CNT_SRC_SELECT(idx_value); 2669 if (source_sel == PACKET3_SAC_SRC_SEL_MEM) { 2670 uint64_t offset; 2671 uint32_t swap; 2672 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2673 if (r) { 2674 DRM_ERROR("bad SET_APPEND_CNT (missing reloc)\n"); 2675 return -EINVAL; 2676 } 2677 offset = radeon_get_ib_value(p, idx + 1); 2678 swap = offset & 0x3; 2679 offset &= ~0x3; 2680 2681 offset += ((u64)(radeon_get_ib_value(p, idx + 2) & 0xff)) << 32; 2682 2683 offset += reloc->gpu_offset; 2684 ib[idx+1] = (offset & 0xfffffffc) | swap; 2685 ib[idx+2] = upper_32_bits(offset) & 0xff; 2686 } else { 2687 DRM_ERROR("bad SET_APPEND_CNT (unsupported operation)\n"); 2688 return -EINVAL; 2689 } 2690 break; 2691 } 2692 case PACKET3_NOP: 2693 break; 2694 default: 2695 DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode); 2696 return -EINVAL; 2697 } 2698 return 0; 2699 } 2700 2701 int evergreen_cs_parse(struct radeon_cs_parser *p) 2702 { 2703 struct radeon_cs_packet pkt; 2704 struct evergreen_cs_track *track; 2705 u32 tmp; 2706 int r; 2707 2708 if (p->track == NULL) { 2709 /* initialize tracker, we are in kms */ 2710 track = kzalloc(sizeof(*track), GFP_KERNEL); 2711 if (track == NULL) 2712 return -ENOMEM; 2713 evergreen_cs_track_init(track); 2714 if (p->rdev->family >= CHIP_CAYMAN) { 2715 tmp = p->rdev->config.cayman.tile_config; 2716 track->reg_safe_bm = cayman_reg_safe_bm; 2717 } else { 2718 tmp = p->rdev->config.evergreen.tile_config; 2719 track->reg_safe_bm = evergreen_reg_safe_bm; 2720 } 2721 BUILD_BUG_ON(ARRAY_SIZE(cayman_reg_safe_bm) != REG_SAFE_BM_SIZE); 2722 BUILD_BUG_ON(ARRAY_SIZE(evergreen_reg_safe_bm) != REG_SAFE_BM_SIZE); 2723 switch (tmp & 0xf) { 2724 case 0: 2725 track->npipes = 1; 2726 break; 2727 case 1: 2728 default: 2729 track->npipes = 2; 2730 break; 2731 case 2: 2732 track->npipes = 4; 2733 break; 2734 case 3: 2735 track->npipes = 8; 2736 break; 2737 } 2738 2739 switch ((tmp & 0xf0) >> 4) { 2740 case 0: 2741 track->nbanks = 4; 2742 break; 2743 case 1: 2744 default: 2745 track->nbanks = 8; 2746 break; 2747 case 2: 2748 track->nbanks = 16; 2749 break; 2750 } 2751 2752 switch ((tmp & 0xf00) >> 8) { 2753 case 0: 2754 track->group_size = 256; 2755 break; 2756 case 1: 2757 default: 2758 track->group_size = 512; 2759 break; 2760 } 2761 2762 switch ((tmp & 0xf000) >> 12) { 2763 case 0: 2764 track->row_size = 1; 2765 break; 2766 case 1: 2767 default: 2768 track->row_size = 2; 2769 break; 2770 case 2: 2771 track->row_size = 4; 2772 break; 2773 } 2774 2775 p->track = track; 2776 } 2777 do { 2778 r = radeon_cs_packet_parse(p, &pkt, p->idx); 2779 if (r) { 2780 kfree(p->track); 2781 p->track = NULL; 2782 return r; 2783 } 2784 p->idx += pkt.count + 2; 2785 switch (pkt.type) { 2786 case RADEON_PACKET_TYPE0: 2787 r = evergreen_cs_parse_packet0(p, &pkt); 2788 break; 2789 case RADEON_PACKET_TYPE2: 2790 break; 2791 case RADEON_PACKET_TYPE3: 2792 r = evergreen_packet3_check(p, &pkt); 2793 break; 2794 default: 2795 DRM_ERROR("Unknown packet type %d !\n", pkt.type); 2796 kfree(p->track); 2797 p->track = NULL; 2798 return -EINVAL; 2799 } 2800 if (r) { 2801 kfree(p->track); 2802 p->track = NULL; 2803 return r; 2804 } 2805 } while (p->idx < p->chunk_ib->length_dw); 2806 #if 0 2807 for (r = 0; r < p->ib.length_dw; r++) { 2808 pr_info("%05d 0x%08X\n", r, p->ib.ptr[r]); 2809 mdelay(1); 2810 } 2811 #endif 2812 kfree(p->track); 2813 p->track = NULL; 2814 return 0; 2815 } 2816 2817 /** 2818 * evergreen_dma_cs_parse() - parse the DMA IB 2819 * @p: parser structure holding parsing context. 2820 * 2821 * Parses the DMA IB from the CS ioctl and updates 2822 * the GPU addresses based on the reloc information and 2823 * checks for errors. (Evergreen-Cayman) 2824 * Returns 0 for success and an error on failure. 2825 **/ 2826 int evergreen_dma_cs_parse(struct radeon_cs_parser *p) 2827 { 2828 struct radeon_cs_chunk *ib_chunk = p->chunk_ib; 2829 struct radeon_bo_list *src_reloc, *dst_reloc, *dst2_reloc; 2830 u32 header, cmd, count, sub_cmd; 2831 uint32_t *ib = p->ib.ptr; 2832 u32 idx; 2833 u64 src_offset, dst_offset, dst2_offset; 2834 int r; 2835 2836 do { 2837 if (p->idx >= ib_chunk->length_dw) { 2838 DRM_ERROR("Can not parse packet at %d after CS end %d !\n", 2839 p->idx, ib_chunk->length_dw); 2840 return -EINVAL; 2841 } 2842 idx = p->idx; 2843 header = radeon_get_ib_value(p, idx); 2844 cmd = GET_DMA_CMD(header); 2845 count = GET_DMA_COUNT(header); 2846 sub_cmd = GET_DMA_SUB_CMD(header); 2847 2848 switch (cmd) { 2849 case DMA_PACKET_WRITE: 2850 r = r600_dma_cs_next_reloc(p, &dst_reloc); 2851 if (r) { 2852 DRM_ERROR("bad DMA_PACKET_WRITE\n"); 2853 return -EINVAL; 2854 } 2855 switch (sub_cmd) { 2856 /* tiled */ 2857 case 8: 2858 dst_offset = radeon_get_ib_value(p, idx+1); 2859 dst_offset <<= 8; 2860 2861 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); 2862 p->idx += count + 7; 2863 break; 2864 /* linear */ 2865 case 0: 2866 dst_offset = radeon_get_ib_value(p, idx+1); 2867 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; 2868 2869 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); 2870 ib[idx+2] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 2871 p->idx += count + 3; 2872 break; 2873 default: 2874 DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, header); 2875 return -EINVAL; 2876 } 2877 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { 2878 dev_warn(p->dev, "DMA write buffer too small (%"PRIu64" %lu)\n", 2879 dst_offset, radeon_bo_size(dst_reloc->robj)); 2880 return -EINVAL; 2881 } 2882 break; 2883 case DMA_PACKET_COPY: 2884 r = r600_dma_cs_next_reloc(p, &src_reloc); 2885 if (r) { 2886 DRM_ERROR("bad DMA_PACKET_COPY\n"); 2887 return -EINVAL; 2888 } 2889 r = r600_dma_cs_next_reloc(p, &dst_reloc); 2890 if (r) { 2891 DRM_ERROR("bad DMA_PACKET_COPY\n"); 2892 return -EINVAL; 2893 } 2894 switch (sub_cmd) { 2895 /* Copy L2L, DW aligned */ 2896 case 0x00: 2897 /* L2L, dw */ 2898 src_offset = radeon_get_ib_value(p, idx+2); 2899 src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; 2900 dst_offset = radeon_get_ib_value(p, idx+1); 2901 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32; 2902 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { 2903 dev_warn(p->dev, "DMA L2L, dw src buffer too small (%"PRIu64" %lu)\n", 2904 src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); 2905 return -EINVAL; 2906 } 2907 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { 2908 dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%"PRIu64" %lu)\n", 2909 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); 2910 return -EINVAL; 2911 } 2912 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); 2913 ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 2914 ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 2915 ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 2916 p->idx += 5; 2917 break; 2918 /* Copy L2T/T2L */ 2919 case 0x08: 2920 /* detile bit */ 2921 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { 2922 /* tiled src, linear dst */ 2923 src_offset = radeon_get_ib_value(p, idx+1); 2924 src_offset <<= 8; 2925 ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); 2926 2927 dst_offset = radeon_get_ib_value(p, idx + 7); 2928 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; 2929 ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); 2930 ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 2931 } else { 2932 /* linear src, tiled dst */ 2933 src_offset = radeon_get_ib_value(p, idx+7); 2934 src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; 2935 ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 2936 ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 2937 2938 dst_offset = radeon_get_ib_value(p, idx+1); 2939 dst_offset <<= 8; 2940 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); 2941 } 2942 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { 2943 dev_warn(p->dev, "DMA L2T, src buffer too small (%"PRIu64" %lu)\n", 2944 src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); 2945 return -EINVAL; 2946 } 2947 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { 2948 dev_warn(p->dev, "DMA L2T, dst buffer too small (%"PRIu64" %lu)\n", 2949 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); 2950 return -EINVAL; 2951 } 2952 p->idx += 9; 2953 break; 2954 /* Copy L2L, byte aligned */ 2955 case 0x40: 2956 /* L2L, byte */ 2957 src_offset = radeon_get_ib_value(p, idx+2); 2958 src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; 2959 dst_offset = radeon_get_ib_value(p, idx+1); 2960 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32; 2961 if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) { 2962 dev_warn(p->dev, "DMA L2L, byte src buffer too small (%"PRIu64" %lu)\n", 2963 src_offset + count, radeon_bo_size(src_reloc->robj)); 2964 return -EINVAL; 2965 } 2966 if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) { 2967 dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%"PRIu64" %lu)\n", 2968 dst_offset + count, radeon_bo_size(dst_reloc->robj)); 2969 return -EINVAL; 2970 } 2971 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xffffffff); 2972 ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xffffffff); 2973 ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 2974 ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 2975 p->idx += 5; 2976 break; 2977 /* Copy L2L, partial */ 2978 case 0x41: 2979 /* L2L, partial */ 2980 if (p->family < CHIP_CAYMAN) { 2981 DRM_ERROR("L2L Partial is cayman only !\n"); 2982 return -EINVAL; 2983 } 2984 ib[idx+1] += (u32)(src_reloc->gpu_offset & 0xffffffff); 2985 ib[idx+2] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 2986 ib[idx+4] += (u32)(dst_reloc->gpu_offset & 0xffffffff); 2987 ib[idx+5] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 2988 2989 p->idx += 9; 2990 break; 2991 /* Copy L2L, DW aligned, broadcast */ 2992 case 0x44: 2993 /* L2L, dw, broadcast */ 2994 r = r600_dma_cs_next_reloc(p, &dst2_reloc); 2995 if (r) { 2996 DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n"); 2997 return -EINVAL; 2998 } 2999 dst_offset = radeon_get_ib_value(p, idx+1); 3000 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; 3001 dst2_offset = radeon_get_ib_value(p, idx+2); 3002 dst2_offset |= ((u64)(radeon_get_ib_value(p, idx+5) & 0xff)) << 32; 3003 src_offset = radeon_get_ib_value(p, idx+3); 3004 src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32; 3005 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { 3006 dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%"PRIu64" %lu)\n", 3007 src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); 3008 return -EINVAL; 3009 } 3010 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { 3011 dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%"PRIu64" %lu)\n", 3012 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); 3013 return -EINVAL; 3014 } 3015 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { 3016 dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%"PRIu64" %lu)\n", 3017 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); 3018 return -EINVAL; 3019 } 3020 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); 3021 ib[idx+2] += (u32)(dst2_reloc->gpu_offset & 0xfffffffc); 3022 ib[idx+3] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 3023 ib[idx+4] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 3024 ib[idx+5] += upper_32_bits(dst2_reloc->gpu_offset) & 0xff; 3025 ib[idx+6] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 3026 p->idx += 7; 3027 break; 3028 /* Copy L2T Frame to Field */ 3029 case 0x48: 3030 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { 3031 DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n"); 3032 return -EINVAL; 3033 } 3034 r = r600_dma_cs_next_reloc(p, &dst2_reloc); 3035 if (r) { 3036 DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n"); 3037 return -EINVAL; 3038 } 3039 dst_offset = radeon_get_ib_value(p, idx+1); 3040 dst_offset <<= 8; 3041 dst2_offset = radeon_get_ib_value(p, idx+2); 3042 dst2_offset <<= 8; 3043 src_offset = radeon_get_ib_value(p, idx+8); 3044 src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32; 3045 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { 3046 dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%"PRIu64" %lu)\n", 3047 src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); 3048 return -EINVAL; 3049 } 3050 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { 3051 dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%"PRIu64" %lu)\n", 3052 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); 3053 return -EINVAL; 3054 } 3055 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { 3056 dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%"PRIu64" %lu)\n", 3057 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); 3058 return -EINVAL; 3059 } 3060 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); 3061 ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8); 3062 ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 3063 ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 3064 p->idx += 10; 3065 break; 3066 /* Copy L2T/T2L, partial */ 3067 case 0x49: 3068 /* L2T, T2L partial */ 3069 if (p->family < CHIP_CAYMAN) { 3070 DRM_ERROR("L2T, T2L Partial is cayman only !\n"); 3071 return -EINVAL; 3072 } 3073 /* detile bit */ 3074 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { 3075 /* tiled src, linear dst */ 3076 ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); 3077 3078 ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); 3079 ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 3080 } else { 3081 /* linear src, tiled dst */ 3082 ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 3083 ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 3084 3085 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); 3086 } 3087 p->idx += 12; 3088 break; 3089 /* Copy L2T broadcast */ 3090 case 0x4b: 3091 /* L2T, broadcast */ 3092 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { 3093 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); 3094 return -EINVAL; 3095 } 3096 r = r600_dma_cs_next_reloc(p, &dst2_reloc); 3097 if (r) { 3098 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); 3099 return -EINVAL; 3100 } 3101 dst_offset = radeon_get_ib_value(p, idx+1); 3102 dst_offset <<= 8; 3103 dst2_offset = radeon_get_ib_value(p, idx+2); 3104 dst2_offset <<= 8; 3105 src_offset = radeon_get_ib_value(p, idx+8); 3106 src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32; 3107 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { 3108 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%"PRIu64" %lu)\n", 3109 src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); 3110 return -EINVAL; 3111 } 3112 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { 3113 dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%"PRIu64" %lu)\n", 3114 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); 3115 return -EINVAL; 3116 } 3117 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { 3118 dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%"PRIu64" %lu)\n", 3119 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); 3120 return -EINVAL; 3121 } 3122 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); 3123 ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8); 3124 ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 3125 ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 3126 p->idx += 10; 3127 break; 3128 /* Copy L2T/T2L (tile units) */ 3129 case 0x4c: 3130 /* L2T, T2L */ 3131 /* detile bit */ 3132 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { 3133 /* tiled src, linear dst */ 3134 src_offset = radeon_get_ib_value(p, idx+1); 3135 src_offset <<= 8; 3136 ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); 3137 3138 dst_offset = radeon_get_ib_value(p, idx+7); 3139 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; 3140 ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); 3141 ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 3142 } else { 3143 /* linear src, tiled dst */ 3144 src_offset = radeon_get_ib_value(p, idx+7); 3145 src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; 3146 ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 3147 ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 3148 3149 dst_offset = radeon_get_ib_value(p, idx+1); 3150 dst_offset <<= 8; 3151 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); 3152 } 3153 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { 3154 dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%"PRIu64" %lu)\n", 3155 src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); 3156 return -EINVAL; 3157 } 3158 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { 3159 dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%"PRIu64" %lu)\n", 3160 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); 3161 return -EINVAL; 3162 } 3163 p->idx += 9; 3164 break; 3165 /* Copy T2T, partial (tile units) */ 3166 case 0x4d: 3167 /* T2T partial */ 3168 if (p->family < CHIP_CAYMAN) { 3169 DRM_ERROR("L2T, T2L Partial is cayman only !\n"); 3170 return -EINVAL; 3171 } 3172 ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); 3173 ib[idx+4] += (u32)(dst_reloc->gpu_offset >> 8); 3174 p->idx += 13; 3175 break; 3176 /* Copy L2T broadcast (tile units) */ 3177 case 0x4f: 3178 /* L2T, broadcast */ 3179 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { 3180 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); 3181 return -EINVAL; 3182 } 3183 r = r600_dma_cs_next_reloc(p, &dst2_reloc); 3184 if (r) { 3185 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); 3186 return -EINVAL; 3187 } 3188 dst_offset = radeon_get_ib_value(p, idx+1); 3189 dst_offset <<= 8; 3190 dst2_offset = radeon_get_ib_value(p, idx+2); 3191 dst2_offset <<= 8; 3192 src_offset = radeon_get_ib_value(p, idx+8); 3193 src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32; 3194 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { 3195 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%"PRIu64" %lu)\n", 3196 src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); 3197 return -EINVAL; 3198 } 3199 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { 3200 dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%"PRIu64" %lu)\n", 3201 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); 3202 return -EINVAL; 3203 } 3204 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { 3205 dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%"PRIu64" %lu)\n", 3206 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); 3207 return -EINVAL; 3208 } 3209 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); 3210 ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8); 3211 ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 3212 ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 3213 p->idx += 10; 3214 break; 3215 default: 3216 DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, header); 3217 return -EINVAL; 3218 } 3219 break; 3220 case DMA_PACKET_CONSTANT_FILL: 3221 r = r600_dma_cs_next_reloc(p, &dst_reloc); 3222 if (r) { 3223 DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n"); 3224 return -EINVAL; 3225 } 3226 dst_offset = radeon_get_ib_value(p, idx+1); 3227 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16; 3228 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { 3229 dev_warn(p->dev, "DMA constant fill buffer too small (%"PRIu64" %lu)\n", 3230 dst_offset, radeon_bo_size(dst_reloc->robj)); 3231 return -EINVAL; 3232 } 3233 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); 3234 ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) << 16) & 0x00ff0000; 3235 p->idx += 4; 3236 break; 3237 case DMA_PACKET_NOP: 3238 p->idx += 1; 3239 break; 3240 default: 3241 DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx); 3242 return -EINVAL; 3243 } 3244 } while (p->idx < p->chunk_ib->length_dw); 3245 #if 0 3246 for (r = 0; r < p->ib->length_dw; r++) { 3247 pr_info("%05d 0x%08X\n", r, p->ib.ptr[r]); 3248 mdelay(1); 3249 } 3250 #endif 3251 return 0; 3252 } 3253 3254 /* vm parser */ 3255 static bool evergreen_vm_reg_valid(u32 reg) 3256 { 3257 /* context regs are fine */ 3258 if (reg >= 0x28000) 3259 return true; 3260 3261 /* check config regs */ 3262 switch (reg) { 3263 case WAIT_UNTIL: 3264 case GRBM_GFX_INDEX: 3265 case CP_STRMOUT_CNTL: 3266 case CP_COHER_CNTL: 3267 case CP_COHER_SIZE: 3268 case VGT_VTX_VECT_EJECT_REG: 3269 case VGT_CACHE_INVALIDATION: 3270 case VGT_GS_VERTEX_REUSE: 3271 case VGT_PRIMITIVE_TYPE: 3272 case VGT_INDEX_TYPE: 3273 case VGT_NUM_INDICES: 3274 case VGT_NUM_INSTANCES: 3275 case VGT_COMPUTE_DIM_X: 3276 case VGT_COMPUTE_DIM_Y: 3277 case VGT_COMPUTE_DIM_Z: 3278 case VGT_COMPUTE_START_X: 3279 case VGT_COMPUTE_START_Y: 3280 case VGT_COMPUTE_START_Z: 3281 case VGT_COMPUTE_INDEX: 3282 case VGT_COMPUTE_THREAD_GROUP_SIZE: 3283 case VGT_HS_OFFCHIP_PARAM: 3284 case PA_CL_ENHANCE: 3285 case PA_SU_LINE_STIPPLE_VALUE: 3286 case PA_SC_LINE_STIPPLE_STATE: 3287 case PA_SC_ENHANCE: 3288 case SQ_DYN_GPR_CNTL_PS_FLUSH_REQ: 3289 case SQ_DYN_GPR_SIMD_LOCK_EN: 3290 case SQ_CONFIG: 3291 case SQ_GPR_RESOURCE_MGMT_1: 3292 case SQ_GLOBAL_GPR_RESOURCE_MGMT_1: 3293 case SQ_GLOBAL_GPR_RESOURCE_MGMT_2: 3294 case SQ_CONST_MEM_BASE: 3295 case SQ_STATIC_THREAD_MGMT_1: 3296 case SQ_STATIC_THREAD_MGMT_2: 3297 case SQ_STATIC_THREAD_MGMT_3: 3298 case SPI_CONFIG_CNTL: 3299 case SPI_CONFIG_CNTL_1: 3300 case TA_CNTL_AUX: 3301 case DB_DEBUG: 3302 case DB_DEBUG2: 3303 case DB_DEBUG3: 3304 case DB_DEBUG4: 3305 case DB_WATERMARKS: 3306 case TD_PS_BORDER_COLOR_INDEX: 3307 case TD_PS_BORDER_COLOR_RED: 3308 case TD_PS_BORDER_COLOR_GREEN: 3309 case TD_PS_BORDER_COLOR_BLUE: 3310 case TD_PS_BORDER_COLOR_ALPHA: 3311 case TD_VS_BORDER_COLOR_INDEX: 3312 case TD_VS_BORDER_COLOR_RED: 3313 case TD_VS_BORDER_COLOR_GREEN: 3314 case TD_VS_BORDER_COLOR_BLUE: 3315 case TD_VS_BORDER_COLOR_ALPHA: 3316 case TD_GS_BORDER_COLOR_INDEX: 3317 case TD_GS_BORDER_COLOR_RED: 3318 case TD_GS_BORDER_COLOR_GREEN: 3319 case TD_GS_BORDER_COLOR_BLUE: 3320 case TD_GS_BORDER_COLOR_ALPHA: 3321 case TD_HS_BORDER_COLOR_INDEX: 3322 case TD_HS_BORDER_COLOR_RED: 3323 case TD_HS_BORDER_COLOR_GREEN: 3324 case TD_HS_BORDER_COLOR_BLUE: 3325 case TD_HS_BORDER_COLOR_ALPHA: 3326 case TD_LS_BORDER_COLOR_INDEX: 3327 case TD_LS_BORDER_COLOR_RED: 3328 case TD_LS_BORDER_COLOR_GREEN: 3329 case TD_LS_BORDER_COLOR_BLUE: 3330 case TD_LS_BORDER_COLOR_ALPHA: 3331 case TD_CS_BORDER_COLOR_INDEX: 3332 case TD_CS_BORDER_COLOR_RED: 3333 case TD_CS_BORDER_COLOR_GREEN: 3334 case TD_CS_BORDER_COLOR_BLUE: 3335 case TD_CS_BORDER_COLOR_ALPHA: 3336 case SQ_ESGS_RING_SIZE: 3337 case SQ_GSVS_RING_SIZE: 3338 case SQ_ESTMP_RING_SIZE: 3339 case SQ_GSTMP_RING_SIZE: 3340 case SQ_HSTMP_RING_SIZE: 3341 case SQ_LSTMP_RING_SIZE: 3342 case SQ_PSTMP_RING_SIZE: 3343 case SQ_VSTMP_RING_SIZE: 3344 case SQ_ESGS_RING_ITEMSIZE: 3345 case SQ_ESTMP_RING_ITEMSIZE: 3346 case SQ_GSTMP_RING_ITEMSIZE: 3347 case SQ_GSVS_RING_ITEMSIZE: 3348 case SQ_GS_VERT_ITEMSIZE: 3349 case SQ_GS_VERT_ITEMSIZE_1: 3350 case SQ_GS_VERT_ITEMSIZE_2: 3351 case SQ_GS_VERT_ITEMSIZE_3: 3352 case SQ_GSVS_RING_OFFSET_1: 3353 case SQ_GSVS_RING_OFFSET_2: 3354 case SQ_GSVS_RING_OFFSET_3: 3355 case SQ_HSTMP_RING_ITEMSIZE: 3356 case SQ_LSTMP_RING_ITEMSIZE: 3357 case SQ_PSTMP_RING_ITEMSIZE: 3358 case SQ_VSTMP_RING_ITEMSIZE: 3359 case VGT_TF_RING_SIZE: 3360 case SQ_ESGS_RING_BASE: 3361 case SQ_GSVS_RING_BASE: 3362 case SQ_ESTMP_RING_BASE: 3363 case SQ_GSTMP_RING_BASE: 3364 case SQ_HSTMP_RING_BASE: 3365 case SQ_LSTMP_RING_BASE: 3366 case SQ_PSTMP_RING_BASE: 3367 case SQ_VSTMP_RING_BASE: 3368 case CAYMAN_VGT_OFFCHIP_LDS_BASE: 3369 case CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS: 3370 return true; 3371 default: 3372 DRM_ERROR("Invalid register 0x%x in CS\n", reg); 3373 return false; 3374 } 3375 } 3376 3377 static int evergreen_vm_packet3_check(struct radeon_device *rdev, 3378 u32 *ib, struct radeon_cs_packet *pkt) 3379 { 3380 u32 idx = pkt->idx + 1; 3381 u32 idx_value = ib[idx]; 3382 u32 start_reg, end_reg, reg, i; 3383 u32 command, info; 3384 3385 switch (pkt->opcode) { 3386 case PACKET3_NOP: 3387 break; 3388 case PACKET3_SET_BASE: 3389 if (idx_value != 1) { 3390 DRM_ERROR("bad SET_BASE"); 3391 return -EINVAL; 3392 } 3393 break; 3394 case PACKET3_CLEAR_STATE: 3395 case PACKET3_INDEX_BUFFER_SIZE: 3396 case PACKET3_DISPATCH_DIRECT: 3397 case PACKET3_DISPATCH_INDIRECT: 3398 case PACKET3_MODE_CONTROL: 3399 case PACKET3_SET_PREDICATION: 3400 case PACKET3_COND_EXEC: 3401 case PACKET3_PRED_EXEC: 3402 case PACKET3_DRAW_INDIRECT: 3403 case PACKET3_DRAW_INDEX_INDIRECT: 3404 case PACKET3_INDEX_BASE: 3405 case PACKET3_DRAW_INDEX_2: 3406 case PACKET3_CONTEXT_CONTROL: 3407 case PACKET3_DRAW_INDEX_OFFSET: 3408 case PACKET3_INDEX_TYPE: 3409 case PACKET3_DRAW_INDEX: 3410 case PACKET3_DRAW_INDEX_AUTO: 3411 case PACKET3_DRAW_INDEX_IMMD: 3412 case PACKET3_NUM_INSTANCES: 3413 case PACKET3_DRAW_INDEX_MULTI_AUTO: 3414 case PACKET3_STRMOUT_BUFFER_UPDATE: 3415 case PACKET3_DRAW_INDEX_OFFSET_2: 3416 case PACKET3_DRAW_INDEX_MULTI_ELEMENT: 3417 case PACKET3_MPEG_INDEX: 3418 case PACKET3_WAIT_REG_MEM: 3419 case PACKET3_MEM_WRITE: 3420 case PACKET3_PFP_SYNC_ME: 3421 case PACKET3_SURFACE_SYNC: 3422 case PACKET3_EVENT_WRITE: 3423 case PACKET3_EVENT_WRITE_EOP: 3424 case PACKET3_EVENT_WRITE_EOS: 3425 case PACKET3_SET_CONTEXT_REG: 3426 case PACKET3_SET_BOOL_CONST: 3427 case PACKET3_SET_LOOP_CONST: 3428 case PACKET3_SET_RESOURCE: 3429 case PACKET3_SET_SAMPLER: 3430 case PACKET3_SET_CTL_CONST: 3431 case PACKET3_SET_RESOURCE_OFFSET: 3432 case PACKET3_SET_CONTEXT_REG_INDIRECT: 3433 case PACKET3_SET_RESOURCE_INDIRECT: 3434 case CAYMAN_PACKET3_DEALLOC_STATE: 3435 break; 3436 case PACKET3_COND_WRITE: 3437 if (idx_value & 0x100) { 3438 reg = ib[idx + 5] * 4; 3439 if (!evergreen_vm_reg_valid(reg)) 3440 return -EINVAL; 3441 } 3442 break; 3443 case PACKET3_COPY_DW: 3444 if (idx_value & 0x2) { 3445 reg = ib[idx + 3] * 4; 3446 if (!evergreen_vm_reg_valid(reg)) 3447 return -EINVAL; 3448 } 3449 break; 3450 case PACKET3_SET_CONFIG_REG: 3451 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START; 3452 end_reg = 4 * pkt->count + start_reg - 4; 3453 if ((start_reg < PACKET3_SET_CONFIG_REG_START) || 3454 (start_reg >= PACKET3_SET_CONFIG_REG_END) || 3455 (end_reg >= PACKET3_SET_CONFIG_REG_END)) { 3456 DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n"); 3457 return -EINVAL; 3458 } 3459 for (i = 0; i < pkt->count; i++) { 3460 reg = start_reg + (4 * i); 3461 if (!evergreen_vm_reg_valid(reg)) 3462 return -EINVAL; 3463 } 3464 break; 3465 case PACKET3_CP_DMA: 3466 command = ib[idx + 4]; 3467 info = ib[idx + 1]; 3468 if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */ 3469 (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */ 3470 ((((info & 0x00300000) >> 20) == 0) && 3471 (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */ 3472 ((((info & 0x60000000) >> 29) == 0) && 3473 (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */ 3474 /* non mem to mem copies requires dw aligned count */ 3475 if ((command & 0x1fffff) % 4) { 3476 DRM_ERROR("CP DMA command requires dw count alignment\n"); 3477 return -EINVAL; 3478 } 3479 } 3480 if (command & PACKET3_CP_DMA_CMD_SAS) { 3481 /* src address space is register */ 3482 if (((info & 0x60000000) >> 29) == 0) { 3483 start_reg = idx_value << 2; 3484 if (command & PACKET3_CP_DMA_CMD_SAIC) { 3485 reg = start_reg; 3486 if (!evergreen_vm_reg_valid(reg)) { 3487 DRM_ERROR("CP DMA Bad SRC register\n"); 3488 return -EINVAL; 3489 } 3490 } else { 3491 for (i = 0; i < (command & 0x1fffff); i++) { 3492 reg = start_reg + (4 * i); 3493 if (!evergreen_vm_reg_valid(reg)) { 3494 DRM_ERROR("CP DMA Bad SRC register\n"); 3495 return -EINVAL; 3496 } 3497 } 3498 } 3499 } 3500 } 3501 if (command & PACKET3_CP_DMA_CMD_DAS) { 3502 /* dst address space is register */ 3503 if (((info & 0x00300000) >> 20) == 0) { 3504 start_reg = ib[idx + 2]; 3505 if (command & PACKET3_CP_DMA_CMD_DAIC) { 3506 reg = start_reg; 3507 if (!evergreen_vm_reg_valid(reg)) { 3508 DRM_ERROR("CP DMA Bad DST register\n"); 3509 return -EINVAL; 3510 } 3511 } else { 3512 for (i = 0; i < (command & 0x1fffff); i++) { 3513 reg = start_reg + (4 * i); 3514 if (!evergreen_vm_reg_valid(reg)) { 3515 DRM_ERROR("CP DMA Bad DST register\n"); 3516 return -EINVAL; 3517 } 3518 } 3519 } 3520 } 3521 } 3522 break; 3523 case PACKET3_SET_APPEND_CNT: { 3524 uint32_t areg; 3525 uint32_t allowed_reg_base; 3526 3527 if (pkt->count != 2) { 3528 DRM_ERROR("bad SET_APPEND_CNT (invalid count)\n"); 3529 return -EINVAL; 3530 } 3531 3532 allowed_reg_base = GDS_APPEND_COUNT_0; 3533 allowed_reg_base -= PACKET3_SET_CONTEXT_REG_START; 3534 allowed_reg_base >>= 2; 3535 3536 areg = idx_value >> 16; 3537 if (areg < allowed_reg_base || areg > (allowed_reg_base + 11)) { 3538 DRM_ERROR("forbidden register for append cnt 0x%08x at %d\n", 3539 areg, idx); 3540 return -EINVAL; 3541 } 3542 break; 3543 } 3544 default: 3545 return -EINVAL; 3546 } 3547 return 0; 3548 } 3549 3550 int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) 3551 { 3552 int ret = 0; 3553 u32 idx = 0; 3554 struct radeon_cs_packet pkt; 3555 3556 do { 3557 pkt.idx = idx; 3558 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]); 3559 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]); 3560 pkt.one_reg_wr = 0; 3561 switch (pkt.type) { 3562 case RADEON_PACKET_TYPE0: 3563 dev_err(rdev->dev, "Packet0 not allowed!\n"); 3564 ret = -EINVAL; 3565 break; 3566 case RADEON_PACKET_TYPE2: 3567 idx += 1; 3568 break; 3569 case RADEON_PACKET_TYPE3: 3570 pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]); 3571 ret = evergreen_vm_packet3_check(rdev, ib->ptr, &pkt); 3572 idx += pkt.count + 2; 3573 break; 3574 default: 3575 dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type); 3576 ret = -EINVAL; 3577 break; 3578 } 3579 if (ret) 3580 break; 3581 } while (idx < ib->length_dw); 3582 3583 return ret; 3584 } 3585 3586 /** 3587 * evergreen_dma_ib_parse() - parse the DMA IB for VM 3588 * @rdev: radeon_device pointer 3589 * @ib: radeon_ib pointer 3590 * 3591 * Parses the DMA IB from the VM CS ioctl 3592 * checks for errors. (Cayman-SI) 3593 * Returns 0 for success and an error on failure. 3594 **/ 3595 int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) 3596 { 3597 u32 idx = 0; 3598 u32 header, cmd, count, sub_cmd; 3599 3600 do { 3601 header = ib->ptr[idx]; 3602 cmd = GET_DMA_CMD(header); 3603 count = GET_DMA_COUNT(header); 3604 sub_cmd = GET_DMA_SUB_CMD(header); 3605 3606 switch (cmd) { 3607 case DMA_PACKET_WRITE: 3608 switch (sub_cmd) { 3609 /* tiled */ 3610 case 8: 3611 idx += count + 7; 3612 break; 3613 /* linear */ 3614 case 0: 3615 idx += count + 3; 3616 break; 3617 default: 3618 DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, ib->ptr[idx]); 3619 return -EINVAL; 3620 } 3621 break; 3622 case DMA_PACKET_COPY: 3623 switch (sub_cmd) { 3624 /* Copy L2L, DW aligned */ 3625 case 0x00: 3626 idx += 5; 3627 break; 3628 /* Copy L2T/T2L */ 3629 case 0x08: 3630 idx += 9; 3631 break; 3632 /* Copy L2L, byte aligned */ 3633 case 0x40: 3634 idx += 5; 3635 break; 3636 /* Copy L2L, partial */ 3637 case 0x41: 3638 idx += 9; 3639 break; 3640 /* Copy L2L, DW aligned, broadcast */ 3641 case 0x44: 3642 idx += 7; 3643 break; 3644 /* Copy L2T Frame to Field */ 3645 case 0x48: 3646 idx += 10; 3647 break; 3648 /* Copy L2T/T2L, partial */ 3649 case 0x49: 3650 idx += 12; 3651 break; 3652 /* Copy L2T broadcast */ 3653 case 0x4b: 3654 idx += 10; 3655 break; 3656 /* Copy L2T/T2L (tile units) */ 3657 case 0x4c: 3658 idx += 9; 3659 break; 3660 /* Copy T2T, partial (tile units) */ 3661 case 0x4d: 3662 idx += 13; 3663 break; 3664 /* Copy L2T broadcast (tile units) */ 3665 case 0x4f: 3666 idx += 10; 3667 break; 3668 default: 3669 DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, ib->ptr[idx]); 3670 return -EINVAL; 3671 } 3672 break; 3673 case DMA_PACKET_CONSTANT_FILL: 3674 idx += 4; 3675 break; 3676 case DMA_PACKET_NOP: 3677 idx += 1; 3678 break; 3679 default: 3680 DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx); 3681 return -EINVAL; 3682 } 3683 } while (idx < ib->length_dw); 3684 3685 return 0; 3686 } 3687