1/* 2 * Copyright © 2017 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 * DEALINGS IN THE SOFTWARE. 21 */ 22 23/** 24 * @file iris_state.c 25 * 26 * ============================= GENXML CODE ============================= 27 * [This file is compiled once per generation.] 28 * ======================================================================= 29 * 30 * This is the main state upload code. 31 * 32 * Gallium uses Constant State Objects, or CSOs, for most state. Large, 33 * complex, or highly reusable state can be created once, and bound and 34 * rebound multiple times. This is modeled with the pipe->create_*_state() 35 * and pipe->bind_*_state() hooks. Highly dynamic or inexpensive state is 36 * streamed out on the fly, via pipe->set_*_state() hooks. 37 * 38 * OpenGL involves frequently mutating context state, which is mirrored in 39 * core Mesa by highly mutable data structures. However, most applications 40 * typically draw the same things over and over - from frame to frame, most 41 * of the same objects are still visible and need to be redrawn. So, rather 42 * than inventing new state all the time, applications usually mutate to swap 43 * between known states that we've seen before. 44 * 45 * Gallium isolates us from this mutation by tracking API state, and 46 * distilling it into a set of Constant State Objects, or CSOs. Large, 47 * complex, or typically reusable state can be created once, then reused 48 * multiple times. Drivers can create and store their own associated data. 49 * This create/bind model corresponds to the pipe->create_*_state() and 50 * pipe->bind_*_state() driver hooks. 51 * 52 * Some state is cheap to create, or expected to be highly dynamic. Rather 53 * than creating and caching piles of CSOs for these, Gallium simply streams 54 * them out, via the pipe->set_*_state() driver hooks. 55 * 56 * To reduce draw time overhead, we try to compute as much state at create 57 * time as possible. Wherever possible, we translate the Gallium pipe state 58 * to 3DSTATE commands, and store those commands in the CSO. At draw time, 59 * we can simply memcpy them into a batch buffer. 60 * 61 * No hardware matches the abstraction perfectly, so some commands require 62 * information from multiple CSOs. In this case, we can store two copies 63 * of the packet (one in each CSO), and simply | together their DWords at 64 * draw time. Sometimes the second set is trivial (one or two fields), so 65 * we simply pack it at draw time. 66 * 67 * There are two main components in the file below. First, the CSO hooks 68 * create/bind/track state. The second are the draw-time upload functions, 69 * iris_upload_render_state() and iris_upload_compute_state(), which read 70 * the context state and emit the commands into the actual batch. 71 */ 72 73#include <stdio.h> 74#include <errno.h> 75 76#if HAVE_VALGRIND 77#include <valgrind.h> 78#include <memcheck.h> 79#define VG(x) x 80#ifdef DEBUG 81#define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x)) 82#endif 83#else 84#define VG(x) 85#endif 86 87#include "pipe/p_defines.h" 88#include "pipe/p_state.h" 89#include "pipe/p_context.h" 90#include "pipe/p_screen.h" 91#include "util/u_dual_blend.h" 92#include "util/u_inlines.h" 93#include "util/u_format.h" 94#include "util/u_framebuffer.h" 95#include "util/u_transfer.h" 96#include "util/u_upload_mgr.h" 97#include "util/u_viewport.h" 98#include "drm-uapi/i915_drm.h" 99#include "nir.h" 100#include "intel/compiler/brw_compiler.h" 101#include "intel/common/gen_l3_config.h" 102#include "intel/common/gen_sample_positions.h" 103#include "iris_batch.h" 104#include "iris_context.h" 105#include "iris_defines.h" 106#include "iris_pipe.h" 107#include "iris_resource.h" 108 109#define __gen_address_type struct iris_address 110#define __gen_user_data struct iris_batch 111 112#define ARRAY_BYTES(x) (sizeof(uint32_t) * ARRAY_SIZE(x)) 113 114static uint64_t 115__gen_combine_address(struct iris_batch *batch, void *location, 116 struct iris_address addr, uint32_t delta) 117{ 118 uint64_t result = addr.offset + delta; 119 120 if (addr.bo) { 121 iris_use_pinned_bo(batch, addr.bo, addr.write); 122 /* Assume this is a general address, not relative to a base. */ 123 result += addr.bo->gtt_offset; 124 } 125 126 return result; 127} 128 129#define __genxml_cmd_length(cmd) cmd ## _length 130#define __genxml_cmd_length_bias(cmd) cmd ## _length_bias 131#define __genxml_cmd_header(cmd) cmd ## _header 132#define __genxml_cmd_pack(cmd) cmd ## _pack 133 134#define _iris_pack_command(batch, cmd, dst, name) \ 135 for (struct cmd name = { __genxml_cmd_header(cmd) }, \ 136 *_dst = (void *)(dst); __builtin_expect(_dst != NULL, 1); \ 137 ({ __genxml_cmd_pack(cmd)(batch, (void *)_dst, &name); \ 138 _dst = NULL; \ 139 })) 140 141#define iris_pack_command(cmd, dst, name) \ 142 _iris_pack_command(NULL, cmd, dst, name) 143 144#define iris_pack_state(cmd, dst, name) \ 145 for (struct cmd name = {}, \ 146 *_dst = (void *)(dst); __builtin_expect(_dst != NULL, 1); \ 147 __genxml_cmd_pack(cmd)(NULL, (void *)_dst, &name), \ 148 _dst = NULL) 149 150#define iris_emit_cmd(batch, cmd, name) \ 151 _iris_pack_command(batch, cmd, iris_get_command_space(batch, 4 * __genxml_cmd_length(cmd)), name) 152 153#define iris_emit_merge(batch, dwords0, dwords1, num_dwords) \ 154 do { \ 155 uint32_t *dw = iris_get_command_space(batch, 4 * num_dwords); \ 156 for (uint32_t i = 0; i < num_dwords; i++) \ 157 dw[i] = (dwords0)[i] | (dwords1)[i]; \ 158 VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, num_dwords)); \ 159 } while (0) 160 161#include "genxml/genX_pack.h" 162#include "genxml/gen_macros.h" 163#include "genxml/genX_bits.h" 164 165#if GEN_GEN == 8 166#define MOCS_PTE 0x18 167#define MOCS_WB 0x78 168#else 169#define MOCS_PTE (1 << 1) 170#define MOCS_WB (2 << 1) 171#endif 172 173static uint32_t 174mocs(const struct iris_bo *bo) 175{ 176 return bo && bo->external ? MOCS_PTE : MOCS_WB; 177} 178 179/** 180 * Statically assert that PIPE_* enums match the hardware packets. 181 * (As long as they match, we don't need to translate them.) 182 */ 183UNUSED static void pipe_asserts() 184{ 185#define PIPE_ASSERT(x) STATIC_ASSERT((int)x) 186 187 /* pipe_logicop happens to match the hardware. */ 188 PIPE_ASSERT(PIPE_LOGICOP_CLEAR == LOGICOP_CLEAR); 189 PIPE_ASSERT(PIPE_LOGICOP_NOR == LOGICOP_NOR); 190 PIPE_ASSERT(PIPE_LOGICOP_AND_INVERTED == LOGICOP_AND_INVERTED); 191 PIPE_ASSERT(PIPE_LOGICOP_COPY_INVERTED == LOGICOP_COPY_INVERTED); 192 PIPE_ASSERT(PIPE_LOGICOP_AND_REVERSE == LOGICOP_AND_REVERSE); 193 PIPE_ASSERT(PIPE_LOGICOP_INVERT == LOGICOP_INVERT); 194 PIPE_ASSERT(PIPE_LOGICOP_XOR == LOGICOP_XOR); 195 PIPE_ASSERT(PIPE_LOGICOP_NAND == LOGICOP_NAND); 196 PIPE_ASSERT(PIPE_LOGICOP_AND == LOGICOP_AND); 197 PIPE_ASSERT(PIPE_LOGICOP_EQUIV == LOGICOP_EQUIV); 198 PIPE_ASSERT(PIPE_LOGICOP_NOOP == LOGICOP_NOOP); 199 PIPE_ASSERT(PIPE_LOGICOP_OR_INVERTED == LOGICOP_OR_INVERTED); 200 PIPE_ASSERT(PIPE_LOGICOP_COPY == LOGICOP_COPY); 201 PIPE_ASSERT(PIPE_LOGICOP_OR_REVERSE == LOGICOP_OR_REVERSE); 202 PIPE_ASSERT(PIPE_LOGICOP_OR == LOGICOP_OR); 203 PIPE_ASSERT(PIPE_LOGICOP_SET == LOGICOP_SET); 204 205 /* pipe_blend_func happens to match the hardware. */ 206 PIPE_ASSERT(PIPE_BLENDFACTOR_ONE == BLENDFACTOR_ONE); 207 PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_COLOR == BLENDFACTOR_SRC_COLOR); 208 PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_ALPHA == BLENDFACTOR_SRC_ALPHA); 209 PIPE_ASSERT(PIPE_BLENDFACTOR_DST_ALPHA == BLENDFACTOR_DST_ALPHA); 210 PIPE_ASSERT(PIPE_BLENDFACTOR_DST_COLOR == BLENDFACTOR_DST_COLOR); 211 PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE == BLENDFACTOR_SRC_ALPHA_SATURATE); 212 PIPE_ASSERT(PIPE_BLENDFACTOR_CONST_COLOR == BLENDFACTOR_CONST_COLOR); 213 PIPE_ASSERT(PIPE_BLENDFACTOR_CONST_ALPHA == BLENDFACTOR_CONST_ALPHA); 214 PIPE_ASSERT(PIPE_BLENDFACTOR_SRC1_COLOR == BLENDFACTOR_SRC1_COLOR); 215 PIPE_ASSERT(PIPE_BLENDFACTOR_SRC1_ALPHA == BLENDFACTOR_SRC1_ALPHA); 216 PIPE_ASSERT(PIPE_BLENDFACTOR_ZERO == BLENDFACTOR_ZERO); 217 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC_COLOR == BLENDFACTOR_INV_SRC_COLOR); 218 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC_ALPHA == BLENDFACTOR_INV_SRC_ALPHA); 219 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_DST_ALPHA == BLENDFACTOR_INV_DST_ALPHA); 220 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_DST_COLOR == BLENDFACTOR_INV_DST_COLOR); 221 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_CONST_COLOR == BLENDFACTOR_INV_CONST_COLOR); 222 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_CONST_ALPHA == BLENDFACTOR_INV_CONST_ALPHA); 223 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC1_COLOR == BLENDFACTOR_INV_SRC1_COLOR); 224 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC1_ALPHA == BLENDFACTOR_INV_SRC1_ALPHA); 225 226 /* pipe_blend_func happens to match the hardware. */ 227 PIPE_ASSERT(PIPE_BLEND_ADD == BLENDFUNCTION_ADD); 228 PIPE_ASSERT(PIPE_BLEND_SUBTRACT == BLENDFUNCTION_SUBTRACT); 229 PIPE_ASSERT(PIPE_BLEND_REVERSE_SUBTRACT == BLENDFUNCTION_REVERSE_SUBTRACT); 230 PIPE_ASSERT(PIPE_BLEND_MIN == BLENDFUNCTION_MIN); 231 PIPE_ASSERT(PIPE_BLEND_MAX == BLENDFUNCTION_MAX); 232 233 /* pipe_stencil_op happens to match the hardware. */ 234 PIPE_ASSERT(PIPE_STENCIL_OP_KEEP == STENCILOP_KEEP); 235 PIPE_ASSERT(PIPE_STENCIL_OP_ZERO == STENCILOP_ZERO); 236 PIPE_ASSERT(PIPE_STENCIL_OP_REPLACE == STENCILOP_REPLACE); 237 PIPE_ASSERT(PIPE_STENCIL_OP_INCR == STENCILOP_INCRSAT); 238 PIPE_ASSERT(PIPE_STENCIL_OP_DECR == STENCILOP_DECRSAT); 239 PIPE_ASSERT(PIPE_STENCIL_OP_INCR_WRAP == STENCILOP_INCR); 240 PIPE_ASSERT(PIPE_STENCIL_OP_DECR_WRAP == STENCILOP_DECR); 241 PIPE_ASSERT(PIPE_STENCIL_OP_INVERT == STENCILOP_INVERT); 242 243 /* pipe_sprite_coord_mode happens to match 3DSTATE_SBE */ 244 PIPE_ASSERT(PIPE_SPRITE_COORD_UPPER_LEFT == UPPERLEFT); 245 PIPE_ASSERT(PIPE_SPRITE_COORD_LOWER_LEFT == LOWERLEFT); 246#undef PIPE_ASSERT 247} 248 249static unsigned 250translate_prim_type(enum pipe_prim_type prim, uint8_t verts_per_patch) 251{ 252 static const unsigned map[] = { 253 [PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST, 254 [PIPE_PRIM_LINES] = _3DPRIM_LINELIST, 255 [PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP, 256 [PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP, 257 [PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST, 258 [PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, 259 [PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN, 260 [PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST, 261 [PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP, 262 [PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON, 263 [PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ, 264 [PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, 265 [PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ, 266 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, 267 [PIPE_PRIM_PATCHES] = _3DPRIM_PATCHLIST_1 - 1, 268 }; 269 270 return map[prim] + (prim == PIPE_PRIM_PATCHES ? verts_per_patch : 0); 271} 272 273static unsigned 274translate_compare_func(enum pipe_compare_func pipe_func) 275{ 276 static const unsigned map[] = { 277 [PIPE_FUNC_NEVER] = COMPAREFUNCTION_NEVER, 278 [PIPE_FUNC_LESS] = COMPAREFUNCTION_LESS, 279 [PIPE_FUNC_EQUAL] = COMPAREFUNCTION_EQUAL, 280 [PIPE_FUNC_LEQUAL] = COMPAREFUNCTION_LEQUAL, 281 [PIPE_FUNC_GREATER] = COMPAREFUNCTION_GREATER, 282 [PIPE_FUNC_NOTEQUAL] = COMPAREFUNCTION_NOTEQUAL, 283 [PIPE_FUNC_GEQUAL] = COMPAREFUNCTION_GEQUAL, 284 [PIPE_FUNC_ALWAYS] = COMPAREFUNCTION_ALWAYS, 285 }; 286 return map[pipe_func]; 287} 288 289static unsigned 290translate_shadow_func(enum pipe_compare_func pipe_func) 291{ 292 /* Gallium specifies the result of shadow comparisons as: 293 * 294 * 1 if ref <op> texel, 295 * 0 otherwise. 296 * 297 * The hardware does: 298 * 299 * 0 if texel <op> ref, 300 * 1 otherwise. 301 * 302 * So we need to flip the operator and also negate. 303 */ 304 static const unsigned map[] = { 305 [PIPE_FUNC_NEVER] = PREFILTEROPALWAYS, 306 [PIPE_FUNC_LESS] = PREFILTEROPLEQUAL, 307 [PIPE_FUNC_EQUAL] = PREFILTEROPNOTEQUAL, 308 [PIPE_FUNC_LEQUAL] = PREFILTEROPLESS, 309 [PIPE_FUNC_GREATER] = PREFILTEROPGEQUAL, 310 [PIPE_FUNC_NOTEQUAL] = PREFILTEROPEQUAL, 311 [PIPE_FUNC_GEQUAL] = PREFILTEROPGREATER, 312 [PIPE_FUNC_ALWAYS] = PREFILTEROPNEVER, 313 }; 314 return map[pipe_func]; 315} 316 317static unsigned 318translate_cull_mode(unsigned pipe_face) 319{ 320 static const unsigned map[4] = { 321 [PIPE_FACE_NONE] = CULLMODE_NONE, 322 [PIPE_FACE_FRONT] = CULLMODE_FRONT, 323 [PIPE_FACE_BACK] = CULLMODE_BACK, 324 [PIPE_FACE_FRONT_AND_BACK] = CULLMODE_BOTH, 325 }; 326 return map[pipe_face]; 327} 328 329static unsigned 330translate_fill_mode(unsigned pipe_polymode) 331{ 332 static const unsigned map[4] = { 333 [PIPE_POLYGON_MODE_FILL] = FILL_MODE_SOLID, 334 [PIPE_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME, 335 [PIPE_POLYGON_MODE_POINT] = FILL_MODE_POINT, 336 [PIPE_POLYGON_MODE_FILL_RECTANGLE] = FILL_MODE_SOLID, 337 }; 338 return map[pipe_polymode]; 339} 340 341static unsigned 342translate_mip_filter(enum pipe_tex_mipfilter pipe_mip) 343{ 344 static const unsigned map[] = { 345 [PIPE_TEX_MIPFILTER_NEAREST] = MIPFILTER_NEAREST, 346 [PIPE_TEX_MIPFILTER_LINEAR] = MIPFILTER_LINEAR, 347 [PIPE_TEX_MIPFILTER_NONE] = MIPFILTER_NONE, 348 }; 349 return map[pipe_mip]; 350} 351 352static uint32_t 353translate_wrap(unsigned pipe_wrap) 354{ 355 static const unsigned map[] = { 356 [PIPE_TEX_WRAP_REPEAT] = TCM_WRAP, 357 [PIPE_TEX_WRAP_CLAMP] = TCM_HALF_BORDER, 358 [PIPE_TEX_WRAP_CLAMP_TO_EDGE] = TCM_CLAMP, 359 [PIPE_TEX_WRAP_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER, 360 [PIPE_TEX_WRAP_MIRROR_REPEAT] = TCM_MIRROR, 361 [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE, 362 363 /* These are unsupported. */ 364 [PIPE_TEX_WRAP_MIRROR_CLAMP] = -1, 365 [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER] = -1, 366 }; 367 return map[pipe_wrap]; 368} 369 370static struct iris_address 371ro_bo(struct iris_bo *bo, uint64_t offset) 372{ 373 /* CSOs must pass NULL for bo! Otherwise it will add the BO to the 374 * validation list at CSO creation time, instead of draw time. 375 */ 376 return (struct iris_address) { .bo = bo, .offset = offset }; 377} 378 379static struct iris_address 380rw_bo(struct iris_bo *bo, uint64_t offset) 381{ 382 /* CSOs must pass NULL for bo! Otherwise it will add the BO to the 383 * validation list at CSO creation time, instead of draw time. 384 */ 385 return (struct iris_address) { .bo = bo, .offset = offset, .write = true }; 386} 387 388/** 389 * Allocate space for some indirect state. 390 * 391 * Return a pointer to the map (to fill it out) and a state ref (for 392 * referring to the state in GPU commands). 393 */ 394static void * 395upload_state(struct u_upload_mgr *uploader, 396 struct iris_state_ref *ref, 397 unsigned size, 398 unsigned alignment) 399{ 400 void *p = NULL; 401 u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p); 402 return p; 403} 404 405/** 406 * Stream out temporary/short-lived state. 407 * 408 * This allocates space, pins the BO, and includes the BO address in the 409 * returned offset (which works because all state lives in 32-bit memory 410 * zones). 411 */ 412static uint32_t * 413stream_state(struct iris_batch *batch, 414 struct u_upload_mgr *uploader, 415 struct pipe_resource **out_res, 416 unsigned size, 417 unsigned alignment, 418 uint32_t *out_offset) 419{ 420 void *ptr = NULL; 421 422 u_upload_alloc(uploader, 0, size, alignment, out_offset, out_res, &ptr); 423 424 struct iris_bo *bo = iris_resource_bo(*out_res); 425 iris_use_pinned_bo(batch, bo, false); 426 427 *out_offset += iris_bo_offset_from_base_address(bo); 428 429 return ptr; 430} 431 432/** 433 * stream_state() + memcpy. 434 */ 435static uint32_t 436emit_state(struct iris_batch *batch, 437 struct u_upload_mgr *uploader, 438 struct pipe_resource **out_res, 439 const void *data, 440 unsigned size, 441 unsigned alignment) 442{ 443 unsigned offset = 0; 444 uint32_t *map = 445 stream_state(batch, uploader, out_res, size, alignment, &offset); 446 447 if (map) 448 memcpy(map, data, size); 449 450 return offset; 451} 452 453/** 454 * Did field 'x' change between 'old_cso' and 'new_cso'? 455 * 456 * (If so, we may want to set some dirty flags.) 457 */ 458#define cso_changed(x) (!old_cso || (old_cso->x != new_cso->x)) 459#define cso_changed_memcmp(x) \ 460 (!old_cso || memcmp(old_cso->x, new_cso->x, sizeof(old_cso->x)) != 0) 461 462static void 463flush_for_state_base_change(struct iris_batch *batch) 464{ 465 /* Flush before emitting STATE_BASE_ADDRESS. 466 * 467 * This isn't documented anywhere in the PRM. However, it seems to be 468 * necessary prior to changing the surface state base adress. We've 469 * seen issues in Vulkan where we get GPU hangs when using multi-level 470 * command buffers which clear depth, reset state base address, and then 471 * go render stuff. 472 * 473 * Normally, in GL, we would trust the kernel to do sufficient stalls 474 * and flushes prior to executing our batch. However, it doesn't seem 475 * as if the kernel's flushing is always sufficient and we don't want to 476 * rely on it. 477 * 478 * We make this an end-of-pipe sync instead of a normal flush because we 479 * do not know the current status of the GPU. On Haswell at least, 480 * having a fast-clear operation in flight at the same time as a normal 481 * rendering operation can cause hangs. Since the kernel's flushing is 482 * insufficient, we need to ensure that any rendering operations from 483 * other processes are definitely complete before we try to do our own 484 * rendering. It's a bit of a big hammer but it appears to work. 485 */ 486 iris_emit_end_of_pipe_sync(batch, 487 PIPE_CONTROL_RENDER_TARGET_FLUSH | 488 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 489 PIPE_CONTROL_DATA_CACHE_FLUSH); 490} 491 492static void 493_iris_emit_lri(struct iris_batch *batch, uint32_t reg, uint32_t val) 494{ 495 iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { 496 lri.RegisterOffset = reg; 497 lri.DataDWord = val; 498 } 499} 500#define iris_emit_lri(b, r, v) _iris_emit_lri(b, GENX(r##_num), v) 501 502static void 503_iris_emit_lrr(struct iris_batch *batch, uint32_t dst, uint32_t src) 504{ 505 iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_REG), lrr) { 506 lrr.SourceRegisterAddress = src; 507 lrr.DestinationRegisterAddress = dst; 508 } 509} 510 511static void 512emit_pipeline_select(struct iris_batch *batch, uint32_t pipeline) 513{ 514#if GEN_GEN >= 8 && GEN_GEN < 10 515 /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT: 516 * 517 * Software must clear the COLOR_CALC_STATE Valid field in 518 * 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT 519 * with Pipeline Select set to GPGPU. 520 * 521 * The internal hardware docs recommend the same workaround for Gen9 522 * hardware too. 523 */ 524 if (pipeline == GPGPU) 525 iris_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), t); 526#endif 527 528 529 /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction] 530 * PIPELINE_SELECT [DevBWR+]": 531 * 532 * "Project: DEVSNB+ 533 * 534 * Software must ensure all the write caches are flushed through a 535 * stalling PIPE_CONTROL command followed by another PIPE_CONTROL 536 * command to invalidate read only caches prior to programming 537 * MI_PIPELINE_SELECT command to change the Pipeline Select Mode." 538 */ 539 iris_emit_pipe_control_flush(batch, 540 PIPE_CONTROL_RENDER_TARGET_FLUSH | 541 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 542 PIPE_CONTROL_DATA_CACHE_FLUSH | 543 PIPE_CONTROL_CS_STALL); 544 545 iris_emit_pipe_control_flush(batch, 546 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 547 PIPE_CONTROL_CONST_CACHE_INVALIDATE | 548 PIPE_CONTROL_STATE_CACHE_INVALIDATE | 549 PIPE_CONTROL_INSTRUCTION_INVALIDATE); 550 551 iris_emit_cmd(batch, GENX(PIPELINE_SELECT), sel) { 552#if GEN_GEN >= 9 553 sel.MaskBits = 3; 554#endif 555 sel.PipelineSelection = pipeline; 556 } 557} 558 559UNUSED static void 560init_glk_barrier_mode(struct iris_batch *batch, uint32_t value) 561{ 562#if GEN_GEN == 9 563 /* Project: DevGLK 564 * 565 * "This chicken bit works around a hardware issue with barrier 566 * logic encountered when switching between GPGPU and 3D pipelines. 567 * To workaround the issue, this mode bit should be set after a 568 * pipeline is selected." 569 */ 570 uint32_t reg_val; 571 iris_pack_state(GENX(SLICE_COMMON_ECO_CHICKEN1), ®_val, reg) { 572 reg.GLKBarrierMode = value; 573 reg.GLKBarrierModeMask = 1; 574 } 575 iris_emit_lri(batch, SLICE_COMMON_ECO_CHICKEN1, reg_val); 576#endif 577} 578 579static void 580init_state_base_address(struct iris_batch *batch) 581{ 582 flush_for_state_base_change(batch); 583 584 /* We program most base addresses once at context initialization time. 585 * Each base address points at a 4GB memory zone, and never needs to 586 * change. See iris_bufmgr.h for a description of the memory zones. 587 * 588 * The one exception is Surface State Base Address, which needs to be 589 * updated occasionally. See iris_binder.c for the details there. 590 */ 591 iris_emit_cmd(batch, GENX(STATE_BASE_ADDRESS), sba) { 592 sba.GeneralStateMOCS = MOCS_WB; 593 sba.StatelessDataPortAccessMOCS = MOCS_WB; 594 sba.DynamicStateMOCS = MOCS_WB; 595 sba.IndirectObjectMOCS = MOCS_WB; 596 sba.InstructionMOCS = MOCS_WB; 597 598 sba.GeneralStateBaseAddressModifyEnable = true; 599 sba.DynamicStateBaseAddressModifyEnable = true; 600 sba.IndirectObjectBaseAddressModifyEnable = true; 601 sba.InstructionBaseAddressModifyEnable = true; 602 sba.GeneralStateBufferSizeModifyEnable = true; 603 sba.DynamicStateBufferSizeModifyEnable = true; 604#if (GEN_GEN >= 9) 605 sba.BindlessSurfaceStateBaseAddressModifyEnable = true; 606 sba.BindlessSurfaceStateMOCS = MOCS_WB; 607#endif 608 sba.IndirectObjectBufferSizeModifyEnable = true; 609 sba.InstructionBuffersizeModifyEnable = true; 610 611 sba.InstructionBaseAddress = ro_bo(NULL, IRIS_MEMZONE_SHADER_START); 612 sba.DynamicStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_DYNAMIC_START); 613 614 sba.GeneralStateBufferSize = 0xfffff; 615 sba.IndirectObjectBufferSize = 0xfffff; 616 sba.InstructionBufferSize = 0xfffff; 617 sba.DynamicStateBufferSize = 0xfffff; 618 } 619} 620 621static void 622iris_emit_l3_config(struct iris_batch *batch, const struct gen_l3_config *cfg, 623 bool has_slm, bool wants_dc_cache) 624{ 625 uint32_t reg_val; 626 iris_pack_state(GENX(L3CNTLREG), ®_val, reg) { 627 reg.SLMEnable = has_slm; 628#if GEN_GEN == 11 629 /* WA_1406697149: Bit 9 "Error Detection Behavior Control" must be set 630 * in L3CNTLREG register. The default setting of the bit is not the 631 * desirable behavior. 632 */ 633 reg.ErrorDetectionBehaviorControl = true; 634 reg.UseFullWays = true; 635#endif 636 reg.URBAllocation = cfg->n[GEN_L3P_URB]; 637 reg.ROAllocation = cfg->n[GEN_L3P_RO]; 638 reg.DCAllocation = cfg->n[GEN_L3P_DC]; 639 reg.AllAllocation = cfg->n[GEN_L3P_ALL]; 640 } 641 iris_emit_lri(batch, L3CNTLREG, reg_val); 642} 643 644static void 645iris_emit_default_l3_config(struct iris_batch *batch, 646 const struct gen_device_info *devinfo, 647 bool compute) 648{ 649 bool wants_dc_cache = true; 650 bool has_slm = compute; 651 const struct gen_l3_weights w = 652 gen_get_default_l3_weights(devinfo, wants_dc_cache, has_slm); 653 const struct gen_l3_config *cfg = gen_get_l3_config(devinfo, w); 654 iris_emit_l3_config(batch, cfg, has_slm, wants_dc_cache); 655} 656 657#if GEN_GEN == 9 || GEN_GEN == 10 658static void 659iris_enable_obj_preemption(struct iris_batch *batch, bool enable) 660{ 661 uint32_t reg_val; 662 663 /* A fixed function pipe flush is required before modifying this field */ 664 iris_emit_end_of_pipe_sync(batch, PIPE_CONTROL_RENDER_TARGET_FLUSH); 665 666 /* enable object level preemption */ 667 iris_pack_state(GENX(CS_CHICKEN1), ®_val, reg) { 668 reg.ReplayMode = enable; 669 reg.ReplayModeMask = true; 670 } 671 iris_emit_lri(batch, CS_CHICKEN1, reg_val); 672} 673#endif 674 675/** 676 * Upload the initial GPU state for a render context. 677 * 678 * This sets some invariant state that needs to be programmed a particular 679 * way, but we never actually change. 680 */ 681static void 682iris_init_render_context(struct iris_screen *screen, 683 struct iris_batch *batch, 684 struct iris_vtable *vtbl, 685 struct pipe_debug_callback *dbg) 686{ 687 UNUSED const struct gen_device_info *devinfo = &screen->devinfo; 688 uint32_t reg_val; 689 690 emit_pipeline_select(batch, _3D); 691 692 iris_emit_default_l3_config(batch, devinfo, false); 693 694 init_state_base_address(batch); 695 696#if GEN_GEN >= 9 697 iris_pack_state(GENX(CS_DEBUG_MODE2), ®_val, reg) { 698 reg.CONSTANT_BUFFERAddressOffsetDisable = true; 699 reg.CONSTANT_BUFFERAddressOffsetDisableMask = true; 700 } 701 iris_emit_lri(batch, CS_DEBUG_MODE2, reg_val); 702#else 703 iris_pack_state(GENX(INSTPM), ®_val, reg) { 704 reg.CONSTANT_BUFFERAddressOffsetDisable = true; 705 reg.CONSTANT_BUFFERAddressOffsetDisableMask = true; 706 } 707 iris_emit_lri(batch, INSTPM, reg_val); 708#endif 709 710#if GEN_GEN == 9 711 iris_pack_state(GENX(CACHE_MODE_1), ®_val, reg) { 712 reg.FloatBlendOptimizationEnable = true; 713 reg.FloatBlendOptimizationEnableMask = true; 714 reg.PartialResolveDisableInVC = true; 715 reg.PartialResolveDisableInVCMask = true; 716 } 717 iris_emit_lri(batch, CACHE_MODE_1, reg_val); 718 719 if (devinfo->is_geminilake) 720 init_glk_barrier_mode(batch, GLK_BARRIER_MODE_3D_HULL); 721#endif 722 723#if GEN_GEN == 11 724 iris_pack_state(GENX(SAMPLER_MODE), ®_val, reg) { 725 reg.HeaderlessMessageforPreemptableContexts = 1; 726 reg.HeaderlessMessageforPreemptableContextsMask = 1; 727 } 728 iris_emit_lri(batch, SAMPLER_MODE, reg_val); 729 730 /* Bit 1 must be set in HALF_SLICE_CHICKEN7. */ 731 iris_pack_state(GENX(HALF_SLICE_CHICKEN7), ®_val, reg) { 732 reg.EnabledTexelOffsetPrecisionFix = 1; 733 reg.EnabledTexelOffsetPrecisionFixMask = 1; 734 } 735 iris_emit_lri(batch, HALF_SLICE_CHICKEN7, reg_val); 736 737 iris_pack_state(GENX(SLICE_COMMON_ECO_CHICKEN1), ®_val, reg) { 738 reg.StateCacheRedirectToCSSectionEnable = true; 739 reg.StateCacheRedirectToCSSectionEnableMask = true; 740 } 741 iris_emit_lri(batch, SLICE_COMMON_ECO_CHICKEN1, reg_val); 742 743 // XXX: 3D_MODE? 744#endif 745 746 /* 3DSTATE_DRAWING_RECTANGLE is non-pipelined, so we want to avoid 747 * changing it dynamically. We set it to the maximum size here, and 748 * instead include the render target dimensions in the viewport, so 749 * viewport extents clipping takes care of pruning stray geometry. 750 */ 751 iris_emit_cmd(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) { 752 rect.ClippedDrawingRectangleXMax = UINT16_MAX; 753 rect.ClippedDrawingRectangleYMax = UINT16_MAX; 754 } 755 756 /* Set the initial MSAA sample positions. */ 757 iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_PATTERN), pat) { 758 GEN_SAMPLE_POS_1X(pat._1xSample); 759 GEN_SAMPLE_POS_2X(pat._2xSample); 760 GEN_SAMPLE_POS_4X(pat._4xSample); 761 GEN_SAMPLE_POS_8X(pat._8xSample); 762#if GEN_GEN >= 9 763 GEN_SAMPLE_POS_16X(pat._16xSample); 764#endif 765 } 766 767 /* Use the legacy AA line coverage computation. */ 768 iris_emit_cmd(batch, GENX(3DSTATE_AA_LINE_PARAMETERS), foo); 769 770 /* Disable chromakeying (it's for media) */ 771 iris_emit_cmd(batch, GENX(3DSTATE_WM_CHROMAKEY), foo); 772 773 /* We want regular rendering, not special HiZ operations. */ 774 iris_emit_cmd(batch, GENX(3DSTATE_WM_HZ_OP), foo); 775 776 /* No polygon stippling offsets are necessary. */ 777 /* TODO: may need to set an offset for origin-UL framebuffers */ 778 iris_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_OFFSET), foo); 779 780 /* Set a static partitioning of the push constant area. */ 781 /* TODO: this may be a bad idea...could starve the push ringbuffers... */ 782 for (int i = 0; i <= MESA_SHADER_FRAGMENT; i++) { 783 iris_emit_cmd(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), alloc) { 784 alloc._3DCommandSubOpcode = 18 + i; 785 alloc.ConstantBufferOffset = 6 * i; 786 alloc.ConstantBufferSize = i == MESA_SHADER_FRAGMENT ? 8 : 6; 787 } 788 } 789 790#if GEN_GEN == 10 791 /* Gen11+ is enabled for us by the kernel. */ 792 iris_enable_obj_preemption(batch, true); 793#endif 794} 795 796static void 797iris_init_compute_context(struct iris_screen *screen, 798 struct iris_batch *batch, 799 struct iris_vtable *vtbl, 800 struct pipe_debug_callback *dbg) 801{ 802 UNUSED const struct gen_device_info *devinfo = &screen->devinfo; 803 804 emit_pipeline_select(batch, GPGPU); 805 806 iris_emit_default_l3_config(batch, devinfo, true); 807 808 init_state_base_address(batch); 809 810#if GEN_GEN == 9 811 if (devinfo->is_geminilake) 812 init_glk_barrier_mode(batch, GLK_BARRIER_MODE_GPGPU); 813#endif 814} 815 816struct iris_vertex_buffer_state { 817 /** The VERTEX_BUFFER_STATE hardware structure. */ 818 uint32_t state[GENX(VERTEX_BUFFER_STATE_length)]; 819 820 /** The resource to source vertex data from. */ 821 struct pipe_resource *resource; 822}; 823 824struct iris_depth_buffer_state { 825 /* Depth/HiZ/Stencil related hardware packets. */ 826 uint32_t packets[GENX(3DSTATE_DEPTH_BUFFER_length) + 827 GENX(3DSTATE_STENCIL_BUFFER_length) + 828 GENX(3DSTATE_HIER_DEPTH_BUFFER_length) + 829 GENX(3DSTATE_CLEAR_PARAMS_length)]; 830}; 831 832/** 833 * Generation-specific context state (ice->state.genx->...). 834 * 835 * Most state can go in iris_context directly, but these encode hardware 836 * packets which vary by generation. 837 */ 838struct iris_genx_state { 839 struct iris_vertex_buffer_state vertex_buffers[33]; 840 841 struct iris_depth_buffer_state depth_buffer; 842 843 uint32_t so_buffers[4 * GENX(3DSTATE_SO_BUFFER_length)]; 844 845#if GEN_GEN == 9 846 /* Is object level preemption enabled? */ 847 bool object_preemption; 848#endif 849 850 struct { 851#if GEN_GEN == 8 852 struct brw_image_param image_param[PIPE_MAX_SHADER_IMAGES]; 853#endif 854 } shaders[MESA_SHADER_STAGES]; 855}; 856 857/** 858 * The pipe->set_blend_color() driver hook. 859 * 860 * This corresponds to our COLOR_CALC_STATE. 861 */ 862static void 863iris_set_blend_color(struct pipe_context *ctx, 864 const struct pipe_blend_color *state) 865{ 866 struct iris_context *ice = (struct iris_context *) ctx; 867 868 /* Our COLOR_CALC_STATE is exactly pipe_blend_color, so just memcpy */ 869 memcpy(&ice->state.blend_color, state, sizeof(struct pipe_blend_color)); 870 ice->state.dirty |= IRIS_DIRTY_COLOR_CALC_STATE; 871} 872 873/** 874 * Gallium CSO for blend state (see pipe_blend_state). 875 */ 876struct iris_blend_state { 877 /** Partial 3DSTATE_PS_BLEND */ 878 uint32_t ps_blend[GENX(3DSTATE_PS_BLEND_length)]; 879 880 /** Partial BLEND_STATE */ 881 uint32_t blend_state[GENX(BLEND_STATE_length) + 882 BRW_MAX_DRAW_BUFFERS * GENX(BLEND_STATE_ENTRY_length)]; 883 884 bool alpha_to_coverage; /* for shader key */ 885 886 /** Bitfield of whether blending is enabled for RT[i] - for aux resolves */ 887 uint8_t blend_enables; 888 889 /** Bitfield of whether color writes are enabled for RT[i] */ 890 uint8_t color_write_enables; 891 892 /** Does RT[0] use dual color blending? */ 893 bool dual_color_blending; 894}; 895 896static enum pipe_blendfactor 897fix_blendfactor(enum pipe_blendfactor f, bool alpha_to_one) 898{ 899 if (alpha_to_one) { 900 if (f == PIPE_BLENDFACTOR_SRC1_ALPHA) 901 return PIPE_BLENDFACTOR_ONE; 902 903 if (f == PIPE_BLENDFACTOR_INV_SRC1_ALPHA) 904 return PIPE_BLENDFACTOR_ZERO; 905 } 906 907 return f; 908} 909 910/** 911 * The pipe->create_blend_state() driver hook. 912 * 913 * Translates a pipe_blend_state into iris_blend_state. 914 */ 915static void * 916iris_create_blend_state(struct pipe_context *ctx, 917 const struct pipe_blend_state *state) 918{ 919 struct iris_blend_state *cso = malloc(sizeof(struct iris_blend_state)); 920 uint32_t *blend_entry = cso->blend_state + GENX(BLEND_STATE_length); 921 922 cso->blend_enables = 0; 923 cso->color_write_enables = 0; 924 STATIC_ASSERT(BRW_MAX_DRAW_BUFFERS <= 8); 925 926 cso->alpha_to_coverage = state->alpha_to_coverage; 927 928 bool indep_alpha_blend = false; 929 930 for (int i = 0; i < BRW_MAX_DRAW_BUFFERS; i++) { 931 const struct pipe_rt_blend_state *rt = 932 &state->rt[state->independent_blend_enable ? i : 0]; 933 934 enum pipe_blendfactor src_rgb = 935 fix_blendfactor(rt->rgb_src_factor, state->alpha_to_one); 936 enum pipe_blendfactor src_alpha = 937 fix_blendfactor(rt->alpha_src_factor, state->alpha_to_one); 938 enum pipe_blendfactor dst_rgb = 939 fix_blendfactor(rt->rgb_dst_factor, state->alpha_to_one); 940 enum pipe_blendfactor dst_alpha = 941 fix_blendfactor(rt->alpha_dst_factor, state->alpha_to_one); 942 943 if (rt->rgb_func != rt->alpha_func || 944 src_rgb != src_alpha || dst_rgb != dst_alpha) 945 indep_alpha_blend = true; 946 947 if (rt->blend_enable) 948 cso->blend_enables |= 1u << i; 949 950 if (rt->colormask) 951 cso->color_write_enables |= 1u << i; 952 953 iris_pack_state(GENX(BLEND_STATE_ENTRY), blend_entry, be) { 954 be.LogicOpEnable = state->logicop_enable; 955 be.LogicOpFunction = state->logicop_func; 956 957 be.PreBlendSourceOnlyClampEnable = false; 958 be.ColorClampRange = COLORCLAMP_RTFORMAT; 959 be.PreBlendColorClampEnable = true; 960 be.PostBlendColorClampEnable = true; 961 962 be.ColorBufferBlendEnable = rt->blend_enable; 963 964 be.ColorBlendFunction = rt->rgb_func; 965 be.AlphaBlendFunction = rt->alpha_func; 966 be.SourceBlendFactor = src_rgb; 967 be.SourceAlphaBlendFactor = src_alpha; 968 be.DestinationBlendFactor = dst_rgb; 969 be.DestinationAlphaBlendFactor = dst_alpha; 970 971 be.WriteDisableRed = !(rt->colormask & PIPE_MASK_R); 972 be.WriteDisableGreen = !(rt->colormask & PIPE_MASK_G); 973 be.WriteDisableBlue = !(rt->colormask & PIPE_MASK_B); 974 be.WriteDisableAlpha = !(rt->colormask & PIPE_MASK_A); 975 } 976 blend_entry += GENX(BLEND_STATE_ENTRY_length); 977 } 978 979 iris_pack_command(GENX(3DSTATE_PS_BLEND), cso->ps_blend, pb) { 980 /* pb.HasWriteableRT is filled in at draw time. 981 * pb.AlphaTestEnable is filled in at draw time. 982 * 983 * pb.ColorBufferBlendEnable is filled in at draw time so we can avoid 984 * setting it when dual color blending without an appropriate shader. 985 */ 986 987 pb.AlphaToCoverageEnable = state->alpha_to_coverage; 988 pb.IndependentAlphaBlendEnable = indep_alpha_blend; 989 990 pb.SourceBlendFactor = 991 fix_blendfactor(state->rt[0].rgb_src_factor, state->alpha_to_one); 992 pb.SourceAlphaBlendFactor = 993 fix_blendfactor(state->rt[0].alpha_src_factor, state->alpha_to_one); 994 pb.DestinationBlendFactor = 995 fix_blendfactor(state->rt[0].rgb_dst_factor, state->alpha_to_one); 996 pb.DestinationAlphaBlendFactor = 997 fix_blendfactor(state->rt[0].alpha_dst_factor, state->alpha_to_one); 998 } 999 1000 iris_pack_state(GENX(BLEND_STATE), cso->blend_state, bs) { 1001 bs.AlphaToCoverageEnable = state->alpha_to_coverage; 1002 bs.IndependentAlphaBlendEnable = indep_alpha_blend; 1003 bs.AlphaToOneEnable = state->alpha_to_one; 1004 bs.AlphaToCoverageDitherEnable = state->alpha_to_coverage; 1005 bs.ColorDitherEnable = state->dither; 1006 /* bl.AlphaTestEnable and bs.AlphaTestFunction are filled in later. */ 1007 } 1008 1009 cso->dual_color_blending = util_blend_state_is_dual(state, 0); 1010 1011 return cso; 1012} 1013 1014/** 1015 * The pipe->bind_blend_state() driver hook. 1016 * 1017 * Bind a blending CSO and flag related dirty bits. 1018 */ 1019static void 1020iris_bind_blend_state(struct pipe_context *ctx, void *state) 1021{ 1022 struct iris_context *ice = (struct iris_context *) ctx; 1023 struct iris_blend_state *cso = state; 1024 1025 ice->state.cso_blend = cso; 1026 ice->state.blend_enables = cso ? cso->blend_enables : 0; 1027 1028 ice->state.dirty |= IRIS_DIRTY_PS_BLEND; 1029 ice->state.dirty |= IRIS_DIRTY_BLEND_STATE; 1030 ice->state.dirty |= IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES; 1031 ice->state.dirty |= ice->state.dirty_for_nos[IRIS_NOS_BLEND]; 1032} 1033 1034/** 1035 * Return true if the FS writes to any color outputs which are not disabled 1036 * via color masking. 1037 */ 1038static bool 1039has_writeable_rt(const struct iris_blend_state *cso_blend, 1040 const struct shader_info *fs_info) 1041{ 1042 if (!fs_info) 1043 return false; 1044 1045 unsigned rt_outputs = fs_info->outputs_written >> FRAG_RESULT_DATA0; 1046 1047 if (fs_info->outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR)) 1048 rt_outputs = (1 << BRW_MAX_DRAW_BUFFERS) - 1; 1049 1050 return cso_blend->color_write_enables & rt_outputs; 1051} 1052 1053/** 1054 * Gallium CSO for depth, stencil, and alpha testing state. 1055 */ 1056struct iris_depth_stencil_alpha_state { 1057 /** Partial 3DSTATE_WM_DEPTH_STENCIL. */ 1058 uint32_t wmds[GENX(3DSTATE_WM_DEPTH_STENCIL_length)]; 1059 1060 /** Outbound to BLEND_STATE, 3DSTATE_PS_BLEND, COLOR_CALC_STATE. */ 1061 struct pipe_alpha_state alpha; 1062 1063 /** Outbound to resolve and cache set tracking. */ 1064 bool depth_writes_enabled; 1065 bool stencil_writes_enabled; 1066}; 1067 1068/** 1069 * The pipe->create_depth_stencil_alpha_state() driver hook. 1070 * 1071 * We encode most of 3DSTATE_WM_DEPTH_STENCIL, and just save off the alpha 1072 * testing state since we need pieces of it in a variety of places. 1073 */ 1074static void * 1075iris_create_zsa_state(struct pipe_context *ctx, 1076 const struct pipe_depth_stencil_alpha_state *state) 1077{ 1078 struct iris_depth_stencil_alpha_state *cso = 1079 malloc(sizeof(struct iris_depth_stencil_alpha_state)); 1080 1081 bool two_sided_stencil = state->stencil[1].enabled; 1082 1083 cso->alpha = state->alpha; 1084 cso->depth_writes_enabled = state->depth.writemask; 1085 cso->stencil_writes_enabled = 1086 state->stencil[0].writemask != 0 || 1087 (two_sided_stencil && state->stencil[1].writemask != 0); 1088 1089 /* The state tracker needs to optimize away EQUAL writes for us. */ 1090 assert(!(state->depth.func == PIPE_FUNC_EQUAL && state->depth.writemask)); 1091 1092 iris_pack_command(GENX(3DSTATE_WM_DEPTH_STENCIL), cso->wmds, wmds) { 1093 wmds.StencilFailOp = state->stencil[0].fail_op; 1094 wmds.StencilPassDepthFailOp = state->stencil[0].zfail_op; 1095 wmds.StencilPassDepthPassOp = state->stencil[0].zpass_op; 1096 wmds.StencilTestFunction = 1097 translate_compare_func(state->stencil[0].func); 1098 wmds.BackfaceStencilFailOp = state->stencil[1].fail_op; 1099 wmds.BackfaceStencilPassDepthFailOp = state->stencil[1].zfail_op; 1100 wmds.BackfaceStencilPassDepthPassOp = state->stencil[1].zpass_op; 1101 wmds.BackfaceStencilTestFunction = 1102 translate_compare_func(state->stencil[1].func); 1103 wmds.DepthTestFunction = translate_compare_func(state->depth.func); 1104 wmds.DoubleSidedStencilEnable = two_sided_stencil; 1105 wmds.StencilTestEnable = state->stencil[0].enabled; 1106 wmds.StencilBufferWriteEnable = 1107 state->stencil[0].writemask != 0 || 1108 (two_sided_stencil && state->stencil[1].writemask != 0); 1109 wmds.DepthTestEnable = state->depth.enabled; 1110 wmds.DepthBufferWriteEnable = state->depth.writemask; 1111 wmds.StencilTestMask = state->stencil[0].valuemask; 1112 wmds.StencilWriteMask = state->stencil[0].writemask; 1113 wmds.BackfaceStencilTestMask = state->stencil[1].valuemask; 1114 wmds.BackfaceStencilWriteMask = state->stencil[1].writemask; 1115 /* wmds.[Backface]StencilReferenceValue are merged later */ 1116 } 1117 1118 return cso; 1119} 1120 1121/** 1122 * The pipe->bind_depth_stencil_alpha_state() driver hook. 1123 * 1124 * Bind a depth/stencil/alpha CSO and flag related dirty bits. 1125 */ 1126static void 1127iris_bind_zsa_state(struct pipe_context *ctx, void *state) 1128{ 1129 struct iris_context *ice = (struct iris_context *) ctx; 1130 struct iris_depth_stencil_alpha_state *old_cso = ice->state.cso_zsa; 1131 struct iris_depth_stencil_alpha_state *new_cso = state; 1132 1133 if (new_cso) { 1134 if (cso_changed(alpha.ref_value)) 1135 ice->state.dirty |= IRIS_DIRTY_COLOR_CALC_STATE; 1136 1137 if (cso_changed(alpha.enabled)) 1138 ice->state.dirty |= IRIS_DIRTY_PS_BLEND | IRIS_DIRTY_BLEND_STATE; 1139 1140 if (cso_changed(alpha.func)) 1141 ice->state.dirty |= IRIS_DIRTY_BLEND_STATE; 1142 1143 if (cso_changed(depth_writes_enabled)) 1144 ice->state.dirty |= IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES; 1145 1146 ice->state.depth_writes_enabled = new_cso->depth_writes_enabled; 1147 ice->state.stencil_writes_enabled = new_cso->stencil_writes_enabled; 1148 } 1149 1150 ice->state.cso_zsa = new_cso; 1151 ice->state.dirty |= IRIS_DIRTY_CC_VIEWPORT; 1152 ice->state.dirty |= IRIS_DIRTY_WM_DEPTH_STENCIL; 1153 ice->state.dirty |= ice->state.dirty_for_nos[IRIS_NOS_DEPTH_STENCIL_ALPHA]; 1154} 1155 1156/** 1157 * Gallium CSO for rasterizer state. 1158 */ 1159struct iris_rasterizer_state { 1160 uint32_t sf[GENX(3DSTATE_SF_length)]; 1161 uint32_t clip[GENX(3DSTATE_CLIP_length)]; 1162 uint32_t raster[GENX(3DSTATE_RASTER_length)]; 1163 uint32_t wm[GENX(3DSTATE_WM_length)]; 1164 uint32_t line_stipple[GENX(3DSTATE_LINE_STIPPLE_length)]; 1165 1166 uint8_t num_clip_plane_consts; 1167 bool clip_halfz; /* for CC_VIEWPORT */ 1168 bool depth_clip_near; /* for CC_VIEWPORT */ 1169 bool depth_clip_far; /* for CC_VIEWPORT */ 1170 bool flatshade; /* for shader state */ 1171 bool flatshade_first; /* for stream output */ 1172 bool clamp_fragment_color; /* for shader state */ 1173 bool light_twoside; /* for shader state */ 1174 bool rasterizer_discard; /* for 3DSTATE_STREAMOUT and 3DSTATE_CLIP */ 1175 bool half_pixel_center; /* for 3DSTATE_MULTISAMPLE */ 1176 bool line_stipple_enable; 1177 bool poly_stipple_enable; 1178 bool multisample; 1179 bool force_persample_interp; 1180 bool conservative_rasterization; 1181 bool fill_mode_point_or_line; 1182 enum pipe_sprite_coord_mode sprite_coord_mode; /* PIPE_SPRITE_* */ 1183 uint16_t sprite_coord_enable; 1184}; 1185 1186static float 1187get_line_width(const struct pipe_rasterizer_state *state) 1188{ 1189 float line_width = state->line_width; 1190 1191 /* From the OpenGL 4.4 spec: 1192 * 1193 * "The actual width of non-antialiased lines is determined by rounding 1194 * the supplied width to the nearest integer, then clamping it to the 1195 * implementation-dependent maximum non-antialiased line width." 1196 */ 1197 if (!state->multisample && !state->line_smooth) 1198 line_width = roundf(state->line_width); 1199 1200 if (!state->multisample && state->line_smooth && line_width < 1.5f) { 1201 /* For 1 pixel line thickness or less, the general anti-aliasing 1202 * algorithm gives up, and a garbage line is generated. Setting a 1203 * Line Width of 0.0 specifies the rasterization of the "thinnest" 1204 * (one-pixel-wide), non-antialiased lines. 1205 * 1206 * Lines rendered with zero Line Width are rasterized using the 1207 * "Grid Intersection Quantization" rules as specified by the 1208 * "Zero-Width (Cosmetic) Line Rasterization" section of the docs. 1209 */ 1210 line_width = 0.0f; 1211 } 1212 1213 return line_width; 1214} 1215 1216/** 1217 * The pipe->create_rasterizer_state() driver hook. 1218 */ 1219static void * 1220iris_create_rasterizer_state(struct pipe_context *ctx, 1221 const struct pipe_rasterizer_state *state) 1222{ 1223 struct iris_rasterizer_state *cso = 1224 malloc(sizeof(struct iris_rasterizer_state)); 1225 1226 cso->multisample = state->multisample; 1227 cso->force_persample_interp = state->force_persample_interp; 1228 cso->clip_halfz = state->clip_halfz; 1229 cso->depth_clip_near = state->depth_clip_near; 1230 cso->depth_clip_far = state->depth_clip_far; 1231 cso->flatshade = state->flatshade; 1232 cso->flatshade_first = state->flatshade_first; 1233 cso->clamp_fragment_color = state->clamp_fragment_color; 1234 cso->light_twoside = state->light_twoside; 1235 cso->rasterizer_discard = state->rasterizer_discard; 1236 cso->half_pixel_center = state->half_pixel_center; 1237 cso->sprite_coord_mode = state->sprite_coord_mode; 1238 cso->sprite_coord_enable = state->sprite_coord_enable; 1239 cso->line_stipple_enable = state->line_stipple_enable; 1240 cso->poly_stipple_enable = state->poly_stipple_enable; 1241 cso->conservative_rasterization = 1242 state->conservative_raster_mode == PIPE_CONSERVATIVE_RASTER_POST_SNAP; 1243 1244 cso->fill_mode_point_or_line = 1245 state->fill_front == PIPE_POLYGON_MODE_LINE || 1246 state->fill_front == PIPE_POLYGON_MODE_POINT || 1247 state->fill_back == PIPE_POLYGON_MODE_LINE || 1248 state->fill_back == PIPE_POLYGON_MODE_POINT; 1249 1250 if (state->clip_plane_enable != 0) 1251 cso->num_clip_plane_consts = util_logbase2(state->clip_plane_enable) + 1; 1252 else 1253 cso->num_clip_plane_consts = 0; 1254 1255 float line_width = get_line_width(state); 1256 1257 iris_pack_command(GENX(3DSTATE_SF), cso->sf, sf) { 1258 sf.StatisticsEnable = true; 1259 sf.ViewportTransformEnable = true; 1260 sf.AALineDistanceMode = AALINEDISTANCE_TRUE; 1261 sf.LineEndCapAntialiasingRegionWidth = 1262 state->line_smooth ? _10pixels : _05pixels; 1263 sf.LastPixelEnable = state->line_last_pixel; 1264 sf.LineWidth = line_width; 1265 sf.SmoothPointEnable = (state->point_smooth || state->multisample) && 1266 !state->point_quad_rasterization; 1267 sf.PointWidthSource = state->point_size_per_vertex ? Vertex : State; 1268 sf.PointWidth = state->point_size; 1269 1270 if (state->flatshade_first) { 1271 sf.TriangleFanProvokingVertexSelect = 1; 1272 } else { 1273 sf.TriangleStripListProvokingVertexSelect = 2; 1274 sf.TriangleFanProvokingVertexSelect = 2; 1275 sf.LineStripListProvokingVertexSelect = 1; 1276 } 1277 } 1278 1279 iris_pack_command(GENX(3DSTATE_RASTER), cso->raster, rr) { 1280 rr.FrontWinding = state->front_ccw ? CounterClockwise : Clockwise; 1281 rr.CullMode = translate_cull_mode(state->cull_face); 1282 rr.FrontFaceFillMode = translate_fill_mode(state->fill_front); 1283 rr.BackFaceFillMode = translate_fill_mode(state->fill_back); 1284 rr.DXMultisampleRasterizationEnable = state->multisample; 1285 rr.GlobalDepthOffsetEnableSolid = state->offset_tri; 1286 rr.GlobalDepthOffsetEnableWireframe = state->offset_line; 1287 rr.GlobalDepthOffsetEnablePoint = state->offset_point; 1288 rr.GlobalDepthOffsetConstant = state->offset_units * 2; 1289 rr.GlobalDepthOffsetScale = state->offset_scale; 1290 rr.GlobalDepthOffsetClamp = state->offset_clamp; 1291 rr.SmoothPointEnable = state->point_smooth; 1292 rr.AntialiasingEnable = state->line_smooth; 1293 rr.ScissorRectangleEnable = state->scissor; 1294#if GEN_GEN >= 9 1295 rr.ViewportZNearClipTestEnable = state->depth_clip_near; 1296 rr.ViewportZFarClipTestEnable = state->depth_clip_far; 1297 rr.ConservativeRasterizationEnable = 1298 cso->conservative_rasterization; 1299#else 1300 rr.ViewportZClipTestEnable = (state->depth_clip_near || state->depth_clip_far); 1301#endif 1302 } 1303 1304 iris_pack_command(GENX(3DSTATE_CLIP), cso->clip, cl) { 1305 /* cl.NonPerspectiveBarycentricEnable is filled in at draw time from 1306 * the FS program; cl.ForceZeroRTAIndexEnable is filled in from the FB. 1307 */ 1308 cl.EarlyCullEnable = true; 1309 cl.UserClipDistanceClipTestEnableBitmask = state->clip_plane_enable; 1310 cl.ForceUserClipDistanceClipTestEnableBitmask = true; 1311 cl.APIMode = state->clip_halfz ? APIMODE_D3D : APIMODE_OGL; 1312 cl.GuardbandClipTestEnable = true; 1313 cl.ClipEnable = true; 1314 cl.MinimumPointWidth = 0.125; 1315 cl.MaximumPointWidth = 255.875; 1316 1317 if (state->flatshade_first) { 1318 cl.TriangleFanProvokingVertexSelect = 1; 1319 } else { 1320 cl.TriangleStripListProvokingVertexSelect = 2; 1321 cl.TriangleFanProvokingVertexSelect = 2; 1322 cl.LineStripListProvokingVertexSelect = 1; 1323 } 1324 } 1325 1326 iris_pack_command(GENX(3DSTATE_WM), cso->wm, wm) { 1327 /* wm.BarycentricInterpolationMode and wm.EarlyDepthStencilControl are 1328 * filled in at draw time from the FS program. 1329 */ 1330 wm.LineAntialiasingRegionWidth = _10pixels; 1331 wm.LineEndCapAntialiasingRegionWidth = _05pixels; 1332 wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT; 1333 wm.LineStippleEnable = state->line_stipple_enable; 1334 wm.PolygonStippleEnable = state->poly_stipple_enable; 1335 } 1336 1337 /* Remap from 0..255 back to 1..256 */ 1338 const unsigned line_stipple_factor = state->line_stipple_factor + 1; 1339 1340 iris_pack_command(GENX(3DSTATE_LINE_STIPPLE), cso->line_stipple, line) { 1341 line.LineStipplePattern = state->line_stipple_pattern; 1342 line.LineStippleInverseRepeatCount = 1.0f / line_stipple_factor; 1343 line.LineStippleRepeatCount = line_stipple_factor; 1344 } 1345 1346 return cso; 1347} 1348 1349/** 1350 * The pipe->bind_rasterizer_state() driver hook. 1351 * 1352 * Bind a rasterizer CSO and flag related dirty bits. 1353 */ 1354static void 1355iris_bind_rasterizer_state(struct pipe_context *ctx, void *state) 1356{ 1357 struct iris_context *ice = (struct iris_context *) ctx; 1358 struct iris_rasterizer_state *old_cso = ice->state.cso_rast; 1359 struct iris_rasterizer_state *new_cso = state; 1360 1361 if (new_cso) { 1362 /* Try to avoid re-emitting 3DSTATE_LINE_STIPPLE, it's non-pipelined */ 1363 if (cso_changed_memcmp(line_stipple)) 1364 ice->state.dirty |= IRIS_DIRTY_LINE_STIPPLE; 1365 1366 if (cso_changed(half_pixel_center)) 1367 ice->state.dirty |= IRIS_DIRTY_MULTISAMPLE; 1368 1369 if (cso_changed(line_stipple_enable) || cso_changed(poly_stipple_enable)) 1370 ice->state.dirty |= IRIS_DIRTY_WM; 1371 1372 if (cso_changed(rasterizer_discard)) 1373 ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP; 1374 1375 if (cso_changed(flatshade_first)) 1376 ice->state.dirty |= IRIS_DIRTY_STREAMOUT; 1377 1378 if (cso_changed(depth_clip_near) || cso_changed(depth_clip_far) || 1379 cso_changed(clip_halfz)) 1380 ice->state.dirty |= IRIS_DIRTY_CC_VIEWPORT; 1381 1382 if (cso_changed(sprite_coord_enable) || 1383 cso_changed(sprite_coord_mode) || 1384 cso_changed(light_twoside)) 1385 ice->state.dirty |= IRIS_DIRTY_SBE; 1386 1387 if (cso_changed(conservative_rasterization)) 1388 ice->state.dirty |= IRIS_DIRTY_FS; 1389 } 1390 1391 ice->state.cso_rast = new_cso; 1392 ice->state.dirty |= IRIS_DIRTY_RASTER; 1393 ice->state.dirty |= IRIS_DIRTY_CLIP; 1394 ice->state.dirty |= ice->state.dirty_for_nos[IRIS_NOS_RASTERIZER]; 1395} 1396 1397/** 1398 * Return true if the given wrap mode requires the border color to exist. 1399 * 1400 * (We can skip uploading it if the sampler isn't going to use it.) 1401 */ 1402static bool 1403wrap_mode_needs_border_color(unsigned wrap_mode) 1404{ 1405 return wrap_mode == TCM_CLAMP_BORDER || wrap_mode == TCM_HALF_BORDER; 1406} 1407 1408/** 1409 * Gallium CSO for sampler state. 1410 */ 1411struct iris_sampler_state { 1412 union pipe_color_union border_color; 1413 bool needs_border_color; 1414 1415 uint32_t sampler_state[GENX(SAMPLER_STATE_length)]; 1416}; 1417 1418/** 1419 * The pipe->create_sampler_state() driver hook. 1420 * 1421 * We fill out SAMPLER_STATE (except for the border color pointer), and 1422 * store that on the CPU. It doesn't make sense to upload it to a GPU 1423 * buffer object yet, because 3DSTATE_SAMPLER_STATE_POINTERS requires 1424 * all bound sampler states to be in contiguous memor. 1425 */ 1426static void * 1427iris_create_sampler_state(struct pipe_context *ctx, 1428 const struct pipe_sampler_state *state) 1429{ 1430 struct iris_sampler_state *cso = CALLOC_STRUCT(iris_sampler_state); 1431 1432 if (!cso) 1433 return NULL; 1434 1435 STATIC_ASSERT(PIPE_TEX_FILTER_NEAREST == MAPFILTER_NEAREST); 1436 STATIC_ASSERT(PIPE_TEX_FILTER_LINEAR == MAPFILTER_LINEAR); 1437 1438 unsigned wrap_s = translate_wrap(state->wrap_s); 1439 unsigned wrap_t = translate_wrap(state->wrap_t); 1440 unsigned wrap_r = translate_wrap(state->wrap_r); 1441 1442 memcpy(&cso->border_color, &state->border_color, sizeof(cso->border_color)); 1443 1444 cso->needs_border_color = wrap_mode_needs_border_color(wrap_s) || 1445 wrap_mode_needs_border_color(wrap_t) || 1446 wrap_mode_needs_border_color(wrap_r); 1447 1448 float min_lod = state->min_lod; 1449 unsigned mag_img_filter = state->mag_img_filter; 1450 1451 // XXX: explain this code ported from ilo...I don't get it at all... 1452 if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && 1453 state->min_lod > 0.0f) { 1454 min_lod = 0.0f; 1455 mag_img_filter = state->min_img_filter; 1456 } 1457 1458 iris_pack_state(GENX(SAMPLER_STATE), cso->sampler_state, samp) { 1459 samp.TCXAddressControlMode = wrap_s; 1460 samp.TCYAddressControlMode = wrap_t; 1461 samp.TCZAddressControlMode = wrap_r; 1462 samp.CubeSurfaceControlMode = state->seamless_cube_map; 1463 samp.NonnormalizedCoordinateEnable = !state->normalized_coords; 1464 samp.MinModeFilter = state->min_img_filter; 1465 samp.MagModeFilter = mag_img_filter; 1466 samp.MipModeFilter = translate_mip_filter(state->min_mip_filter); 1467 samp.MaximumAnisotropy = RATIO21; 1468 1469 if (state->max_anisotropy >= 2) { 1470 if (state->min_img_filter == PIPE_TEX_FILTER_LINEAR) { 1471 samp.MinModeFilter = MAPFILTER_ANISOTROPIC; 1472 samp.AnisotropicAlgorithm = EWAApproximation; 1473 } 1474 1475 if (state->mag_img_filter == PIPE_TEX_FILTER_LINEAR) 1476 samp.MagModeFilter = MAPFILTER_ANISOTROPIC; 1477 1478 samp.MaximumAnisotropy = 1479 MIN2((state->max_anisotropy - 2) / 2, RATIO161); 1480 } 1481 1482 /* Set address rounding bits if not using nearest filtering. */ 1483 if (state->min_img_filter != PIPE_TEX_FILTER_NEAREST) { 1484 samp.UAddressMinFilterRoundingEnable = true; 1485 samp.VAddressMinFilterRoundingEnable = true; 1486 samp.RAddressMinFilterRoundingEnable = true; 1487 } 1488 1489 if (state->mag_img_filter != PIPE_TEX_FILTER_NEAREST) { 1490 samp.UAddressMagFilterRoundingEnable = true; 1491 samp.VAddressMagFilterRoundingEnable = true; 1492 samp.RAddressMagFilterRoundingEnable = true; 1493 } 1494 1495 if (state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) 1496 samp.ShadowFunction = translate_shadow_func(state->compare_func); 1497 1498 const float hw_max_lod = GEN_GEN >= 7 ? 14 : 13; 1499 1500 samp.LODPreClampMode = CLAMP_MODE_OGL; 1501 samp.MinLOD = CLAMP(min_lod, 0, hw_max_lod); 1502 samp.MaxLOD = CLAMP(state->max_lod, 0, hw_max_lod); 1503 samp.TextureLODBias = CLAMP(state->lod_bias, -16, 15); 1504 1505 /* .BorderColorPointer is filled in by iris_bind_sampler_states. */ 1506 } 1507 1508 return cso; 1509} 1510 1511/** 1512 * The pipe->bind_sampler_states() driver hook. 1513 */ 1514static void 1515iris_bind_sampler_states(struct pipe_context *ctx, 1516 enum pipe_shader_type p_stage, 1517 unsigned start, unsigned count, 1518 void **states) 1519{ 1520 struct iris_context *ice = (struct iris_context *) ctx; 1521 gl_shader_stage stage = stage_from_pipe(p_stage); 1522 struct iris_shader_state *shs = &ice->state.shaders[stage]; 1523 1524 assert(start + count <= IRIS_MAX_TEXTURE_SAMPLERS); 1525 1526 for (int i = 0; i < count; i++) { 1527 shs->samplers[start + i] = states[i]; 1528 } 1529 1530 ice->state.dirty |= IRIS_DIRTY_SAMPLER_STATES_VS << stage; 1531} 1532 1533/** 1534 * Upload the sampler states into a contiguous area of GPU memory, for 1535 * for 3DSTATE_SAMPLER_STATE_POINTERS_*. 1536 * 1537 * Also fill out the border color state pointers. 1538 */ 1539static void 1540iris_upload_sampler_states(struct iris_context *ice, gl_shader_stage stage) 1541{ 1542 struct iris_shader_state *shs = &ice->state.shaders[stage]; 1543 const struct shader_info *info = iris_get_shader_info(ice, stage); 1544 1545 /* We assume the state tracker will call pipe->bind_sampler_states() 1546 * if the program's number of textures changes. 1547 */ 1548 unsigned count = info ? util_last_bit(info->textures_used) : 0; 1549 1550 if (!count) 1551 return; 1552 1553 /* Assemble the SAMPLER_STATEs into a contiguous table that lives 1554 * in the dynamic state memory zone, so we can point to it via the 1555 * 3DSTATE_SAMPLER_STATE_POINTERS_* commands. 1556 */ 1557 uint32_t *map = 1558 upload_state(ice->state.dynamic_uploader, &shs->sampler_table, 1559 count * 4 * GENX(SAMPLER_STATE_length), 32); 1560 if (unlikely(!map)) 1561 return; 1562 1563 struct pipe_resource *res = shs->sampler_table.res; 1564 shs->sampler_table.offset += 1565 iris_bo_offset_from_base_address(iris_resource_bo(res)); 1566 1567 /* Make sure all land in the same BO */ 1568 iris_border_color_pool_reserve(ice, IRIS_MAX_TEXTURE_SAMPLERS); 1569 1570 ice->state.need_border_colors &= ~(1 << stage); 1571 1572 for (int i = 0; i < count; i++) { 1573 struct iris_sampler_state *state = shs->samplers[i]; 1574 struct iris_sampler_view *tex = shs->textures[i]; 1575 1576 if (!state) { 1577 memset(map, 0, 4 * GENX(SAMPLER_STATE_length)); 1578 } else if (!state->needs_border_color) { 1579 memcpy(map, state->sampler_state, 4 * GENX(SAMPLER_STATE_length)); 1580 } else { 1581 ice->state.need_border_colors |= 1 << stage; 1582 1583 /* We may need to swizzle the border color for format faking. 1584 * A/LA formats are faked as R/RG with 000R or R00G swizzles. 1585 * This means we need to move the border color's A channel into 1586 * the R or G channels so that those read swizzles will move it 1587 * back into A. 1588 */ 1589 union pipe_color_union *color = &state->border_color; 1590 union pipe_color_union tmp; 1591 if (tex) { 1592 enum pipe_format internal_format = tex->res->internal_format; 1593 1594 if (util_format_is_alpha(internal_format)) { 1595 unsigned char swz[4] = { 1596 PIPE_SWIZZLE_W, PIPE_SWIZZLE_0, 1597 PIPE_SWIZZLE_0, PIPE_SWIZZLE_0 1598 }; 1599 util_format_apply_color_swizzle(&tmp, color, swz, true); 1600 color = &tmp; 1601 } else if (util_format_is_luminance_alpha(internal_format) && 1602 internal_format != PIPE_FORMAT_L8A8_SRGB) { 1603 unsigned char swz[4] = { 1604 PIPE_SWIZZLE_X, PIPE_SWIZZLE_W, 1605 PIPE_SWIZZLE_0, PIPE_SWIZZLE_0 1606 }; 1607 util_format_apply_color_swizzle(&tmp, color, swz, true); 1608 color = &tmp; 1609 } 1610 } 1611 1612 /* Stream out the border color and merge the pointer. */ 1613 uint32_t offset = iris_upload_border_color(ice, color); 1614 1615 uint32_t dynamic[GENX(SAMPLER_STATE_length)]; 1616 iris_pack_state(GENX(SAMPLER_STATE), dynamic, dyns) { 1617 dyns.BorderColorPointer = offset; 1618 } 1619 1620 for (uint32_t j = 0; j < GENX(SAMPLER_STATE_length); j++) 1621 map[j] = state->sampler_state[j] | dynamic[j]; 1622 } 1623 1624 map += GENX(SAMPLER_STATE_length); 1625 } 1626} 1627 1628static enum isl_channel_select 1629fmt_swizzle(const struct iris_format_info *fmt, enum pipe_swizzle swz) 1630{ 1631 switch (swz) { 1632 case PIPE_SWIZZLE_X: return fmt->swizzle.r; 1633 case PIPE_SWIZZLE_Y: return fmt->swizzle.g; 1634 case PIPE_SWIZZLE_Z: return fmt->swizzle.b; 1635 case PIPE_SWIZZLE_W: return fmt->swizzle.a; 1636 case PIPE_SWIZZLE_1: return SCS_ONE; 1637 case PIPE_SWIZZLE_0: return SCS_ZERO; 1638 default: unreachable("invalid swizzle"); 1639 } 1640} 1641 1642static void 1643fill_buffer_surface_state(struct isl_device *isl_dev, 1644 struct iris_bo *bo, 1645 void *map, 1646 enum isl_format format, 1647 struct isl_swizzle swizzle, 1648 unsigned offset, 1649 unsigned size) 1650{ 1651 const struct isl_format_layout *fmtl = isl_format_get_layout(format); 1652 const unsigned cpp = format == ISL_FORMAT_RAW ? 1 : fmtl->bpb / 8; 1653 1654 /* The ARB_texture_buffer_specification says: 1655 * 1656 * "The number of texels in the buffer texture's texel array is given by 1657 * 1658 * floor(<buffer_size> / (<components> * sizeof(<base_type>)), 1659 * 1660 * where <buffer_size> is the size of the buffer object, in basic 1661 * machine units and <components> and <base_type> are the element count 1662 * and base data type for elements, as specified in Table X.1. The 1663 * number of texels in the texel array is then clamped to the 1664 * implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB." 1665 * 1666 * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride, 1667 * so that when ISL divides by stride to obtain the number of texels, that 1668 * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE. 1669 */ 1670 unsigned final_size = 1671 MIN3(size, bo->size - offset, IRIS_MAX_TEXTURE_BUFFER_SIZE * cpp); 1672 1673 isl_buffer_fill_state(isl_dev, map, 1674 .address = bo->gtt_offset + offset, 1675 .size_B = final_size, 1676 .format = format, 1677 .swizzle = swizzle, 1678 .stride_B = cpp, 1679 .mocs = mocs(bo)); 1680} 1681 1682#define SURFACE_STATE_ALIGNMENT 64 1683 1684/** 1685 * Allocate several contiguous SURFACE_STATE structures, one for each 1686 * supported auxiliary surface mode. 1687 */ 1688static void * 1689alloc_surface_states(struct u_upload_mgr *mgr, 1690 struct iris_state_ref *ref, 1691 unsigned aux_usages) 1692{ 1693 const unsigned surf_size = 4 * GENX(RENDER_SURFACE_STATE_length); 1694 1695 /* If this changes, update this to explicitly align pointers */ 1696 STATIC_ASSERT(surf_size == SURFACE_STATE_ALIGNMENT); 1697 1698 assert(aux_usages != 0); 1699 1700 void *map = 1701 upload_state(mgr, ref, util_bitcount(aux_usages) * surf_size, 1702 SURFACE_STATE_ALIGNMENT); 1703 1704 ref->offset += iris_bo_offset_from_base_address(iris_resource_bo(ref->res)); 1705 1706 return map; 1707} 1708 1709static void 1710fill_surface_state(struct isl_device *isl_dev, 1711 void *map, 1712 struct iris_resource *res, 1713 struct isl_view *view, 1714 unsigned aux_usage) 1715{ 1716 struct isl_surf_fill_state_info f = { 1717 .surf = &res->surf, 1718 .view = view, 1719 .mocs = mocs(res->bo), 1720 .address = res->bo->gtt_offset, 1721 }; 1722 1723 if (aux_usage != ISL_AUX_USAGE_NONE) { 1724 f.aux_surf = &res->aux.surf; 1725 f.aux_usage = aux_usage; 1726 f.aux_address = res->aux.bo->gtt_offset + res->aux.offset; 1727 1728 struct iris_bo *clear_bo = NULL; 1729 uint64_t clear_offset = 0; 1730 f.clear_color = 1731 iris_resource_get_clear_color(res, &clear_bo, &clear_offset); 1732 if (clear_bo) { 1733 f.clear_address = clear_bo->gtt_offset + clear_offset; 1734 f.use_clear_address = isl_dev->info->gen > 9; 1735 } 1736 } 1737 1738 isl_surf_fill_state_s(isl_dev, map, &f); 1739} 1740 1741/** 1742 * The pipe->create_sampler_view() driver hook. 1743 */ 1744static struct pipe_sampler_view * 1745iris_create_sampler_view(struct pipe_context *ctx, 1746 struct pipe_resource *tex, 1747 const struct pipe_sampler_view *tmpl) 1748{ 1749 struct iris_context *ice = (struct iris_context *) ctx; 1750 struct iris_screen *screen = (struct iris_screen *)ctx->screen; 1751 const struct gen_device_info *devinfo = &screen->devinfo; 1752 struct iris_sampler_view *isv = calloc(1, sizeof(struct iris_sampler_view)); 1753 1754 if (!isv) 1755 return NULL; 1756 1757 /* initialize base object */ 1758 isv->base = *tmpl; 1759 isv->base.context = ctx; 1760 isv->base.texture = NULL; 1761 pipe_reference_init(&isv->base.reference, 1); 1762 pipe_resource_reference(&isv->base.texture, tex); 1763 1764 if (util_format_is_depth_or_stencil(tmpl->format)) { 1765 struct iris_resource *zres, *sres; 1766 const struct util_format_description *desc = 1767 util_format_description(tmpl->format); 1768 1769 iris_get_depth_stencil_resources(tex, &zres, &sres); 1770 1771 tex = util_format_has_depth(desc) ? &zres->base : &sres->base; 1772 } 1773 1774 isv->res = (struct iris_resource *) tex; 1775 1776 void *map = alloc_surface_states(ice->state.surface_uploader, 1777 &isv->surface_state, 1778 isv->res->aux.sampler_usages); 1779 if (!unlikely(map)) 1780 return NULL; 1781 1782 isl_surf_usage_flags_t usage = ISL_SURF_USAGE_TEXTURE_BIT; 1783 1784 if (isv->base.target == PIPE_TEXTURE_CUBE || 1785 isv->base.target == PIPE_TEXTURE_CUBE_ARRAY) 1786 usage |= ISL_SURF_USAGE_CUBE_BIT; 1787 1788 const struct iris_format_info fmt = 1789 iris_format_for_usage(devinfo, tmpl->format, usage); 1790 1791 isv->clear_color = isv->res->aux.clear_color; 1792 1793 isv->view = (struct isl_view) { 1794 .format = fmt.fmt, 1795 .swizzle = (struct isl_swizzle) { 1796 .r = fmt_swizzle(&fmt, tmpl->swizzle_r), 1797 .g = fmt_swizzle(&fmt, tmpl->swizzle_g), 1798 .b = fmt_swizzle(&fmt, tmpl->swizzle_b), 1799 .a = fmt_swizzle(&fmt, tmpl->swizzle_a), 1800 }, 1801 .usage = usage, 1802 }; 1803 1804 /* Fill out SURFACE_STATE for this view. */ 1805 if (tmpl->target != PIPE_BUFFER) { 1806 isv->view.base_level = tmpl->u.tex.first_level; 1807 isv->view.levels = tmpl->u.tex.last_level - tmpl->u.tex.first_level + 1; 1808 // XXX: do I need to port f9fd0cf4790cb2a530e75d1a2206dbb9d8af7cb2? 1809 isv->view.base_array_layer = tmpl->u.tex.first_layer; 1810 isv->view.array_len = 1811 tmpl->u.tex.last_layer - tmpl->u.tex.first_layer + 1; 1812 1813 unsigned aux_modes = isv->res->aux.sampler_usages; 1814 while (aux_modes) { 1815 enum isl_aux_usage aux_usage = u_bit_scan(&aux_modes); 1816 1817 /* If we have a multisampled depth buffer, do not create a sampler 1818 * surface state with HiZ. 1819 */ 1820 fill_surface_state(&screen->isl_dev, map, isv->res, &isv->view, 1821 aux_usage); 1822 1823 map += SURFACE_STATE_ALIGNMENT; 1824 } 1825 } else { 1826 fill_buffer_surface_state(&screen->isl_dev, isv->res->bo, map, 1827 isv->view.format, isv->view.swizzle, 1828 tmpl->u.buf.offset, tmpl->u.buf.size); 1829 } 1830 1831 return &isv->base; 1832} 1833 1834static void 1835iris_sampler_view_destroy(struct pipe_context *ctx, 1836 struct pipe_sampler_view *state) 1837{ 1838 struct iris_sampler_view *isv = (void *) state; 1839 pipe_resource_reference(&state->texture, NULL); 1840 pipe_resource_reference(&isv->surface_state.res, NULL); 1841 free(isv); 1842} 1843 1844/** 1845 * The pipe->create_surface() driver hook. 1846 * 1847 * In Gallium nomenclature, "surfaces" are a view of a resource that 1848 * can be bound as a render target or depth/stencil buffer. 1849 */ 1850static struct pipe_surface * 1851iris_create_surface(struct pipe_context *ctx, 1852 struct pipe_resource *tex, 1853 const struct pipe_surface *tmpl) 1854{ 1855 struct iris_context *ice = (struct iris_context *) ctx; 1856 struct iris_screen *screen = (struct iris_screen *)ctx->screen; 1857 const struct gen_device_info *devinfo = &screen->devinfo; 1858 struct iris_surface *surf = calloc(1, sizeof(struct iris_surface)); 1859 struct pipe_surface *psurf = &surf->base; 1860 struct iris_resource *res = (struct iris_resource *) tex; 1861 1862 if (!surf) 1863 return NULL; 1864 1865 pipe_reference_init(&psurf->reference, 1); 1866 pipe_resource_reference(&psurf->texture, tex); 1867 psurf->context = ctx; 1868 psurf->format = tmpl->format; 1869 psurf->width = tex->width0; 1870 psurf->height = tex->height0; 1871 psurf->texture = tex; 1872 psurf->u.tex.first_layer = tmpl->u.tex.first_layer; 1873 psurf->u.tex.last_layer = tmpl->u.tex.last_layer; 1874 psurf->u.tex.level = tmpl->u.tex.level; 1875 1876 isl_surf_usage_flags_t usage = 0; 1877 if (tmpl->writable) 1878 usage = ISL_SURF_USAGE_STORAGE_BIT; 1879 else if (util_format_is_depth_or_stencil(tmpl->format)) 1880 usage = ISL_SURF_USAGE_DEPTH_BIT; 1881 else 1882 usage = ISL_SURF_USAGE_RENDER_TARGET_BIT; 1883 1884 const struct iris_format_info fmt = 1885 iris_format_for_usage(devinfo, psurf->format, usage); 1886 1887 if ((usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) && 1888 !isl_format_supports_rendering(devinfo, fmt.fmt)) { 1889 /* Framebuffer validation will reject this invalid case, but it 1890 * hasn't had the opportunity yet. In the meantime, we need to 1891 * avoid hitting ISL asserts about unsupported formats below. 1892 */ 1893 free(surf); 1894 return NULL; 1895 } 1896 1897 struct isl_view *view = &surf->view; 1898 *view = (struct isl_view) { 1899 .format = fmt.fmt, 1900 .base_level = tmpl->u.tex.level, 1901 .levels = 1, 1902 .base_array_layer = tmpl->u.tex.first_layer, 1903 .array_len = tmpl->u.tex.last_layer - tmpl->u.tex.first_layer + 1, 1904 .swizzle = ISL_SWIZZLE_IDENTITY, 1905 .usage = usage, 1906 }; 1907 1908 surf->clear_color = res->aux.clear_color; 1909 1910 /* Bail early for depth/stencil - we don't want SURFACE_STATE for them. */ 1911 if (res->surf.usage & (ISL_SURF_USAGE_DEPTH_BIT | 1912 ISL_SURF_USAGE_STENCIL_BIT)) 1913 return psurf; 1914 1915 1916 void *map = alloc_surface_states(ice->state.surface_uploader, 1917 &surf->surface_state, 1918 res->aux.possible_usages); 1919 if (!unlikely(map)) 1920 return NULL; 1921 1922 if (!isl_format_is_compressed(res->surf.format)) { 1923 /* This is a normal surface. Fill out a SURFACE_STATE for each possible 1924 * auxiliary surface mode and return the pipe_surface. 1925 */ 1926 unsigned aux_modes = res->aux.possible_usages; 1927 while (aux_modes) { 1928 enum isl_aux_usage aux_usage = u_bit_scan(&aux_modes); 1929 1930 fill_surface_state(&screen->isl_dev, map, res, view, aux_usage); 1931 1932 map += SURFACE_STATE_ALIGNMENT; 1933 } 1934 1935 return psurf; 1936 } 1937 1938 /* The resource has a compressed format, which is not renderable, but we 1939 * have a renderable view format. We must be attempting to upload blocks 1940 * of compressed data via an uncompressed view. 1941 * 1942 * In this case, we can assume there are no auxiliary buffers, a single 1943 * miplevel, and that the resource is single-sampled. Gallium may try 1944 * and create an uncompressed view with multiple layers, however. 1945 */ 1946 assert(!isl_format_is_compressed(fmt.fmt)); 1947 assert(res->aux.possible_usages == 1 << ISL_AUX_USAGE_NONE); 1948 assert(res->surf.samples == 1); 1949 assert(view->levels == 1); 1950 1951 struct isl_surf isl_surf; 1952 uint32_t offset_B = 0, tile_x_sa = 0, tile_y_sa = 0; 1953 1954 if (view->base_level > 0) { 1955 /* We can't rely on the hardware's miplevel selection with such 1956 * a substantial lie about the format, so we select a single image 1957 * using the Tile X/Y Offset fields. In this case, we can't handle 1958 * multiple array slices. 1959 * 1960 * On Broadwell, HALIGN and VALIGN are specified in pixels and are 1961 * hard-coded to align to exactly the block size of the compressed 1962 * texture. This means that, when reinterpreted as a non-compressed 1963 * texture, the tile offsets may be anything and we can't rely on 1964 * X/Y Offset. 1965 * 1966 * Return NULL to force the state tracker to take fallback paths. 1967 */ 1968 if (view->array_len > 1 || GEN_GEN == 8) 1969 return NULL; 1970 1971 const bool is_3d = res->surf.dim == ISL_SURF_DIM_3D; 1972 isl_surf_get_image_surf(&screen->isl_dev, &res->surf, 1973 view->base_level, 1974 is_3d ? 0 : view->base_array_layer, 1975 is_3d ? view->base_array_layer : 0, 1976 &isl_surf, 1977 &offset_B, &tile_x_sa, &tile_y_sa); 1978 1979 /* We use address and tile offsets to access a single level/layer 1980 * as a subimage, so reset level/layer so it doesn't offset again. 1981 */ 1982 view->base_array_layer = 0; 1983 view->base_level = 0; 1984 } else { 1985 /* Level 0 doesn't require tile offsets, and the hardware can find 1986 * array slices using QPitch even with the format override, so we 1987 * can allow layers in this case. Copy the original ISL surface. 1988 */ 1989 memcpy(&isl_surf, &res->surf, sizeof(isl_surf)); 1990 } 1991 1992 /* Scale down the image dimensions by the block size. */ 1993 const struct isl_format_layout *fmtl = 1994 isl_format_get_layout(res->surf.format); 1995 isl_surf.format = fmt.fmt; 1996 isl_surf.logical_level0_px = isl_surf_get_logical_level0_el(&isl_surf); 1997 isl_surf.phys_level0_sa = isl_surf_get_phys_level0_el(&isl_surf); 1998 tile_x_sa /= fmtl->bw; 1999 tile_y_sa /= fmtl->bh; 2000 2001 psurf->width = isl_surf.logical_level0_px.width; 2002 psurf->height = isl_surf.logical_level0_px.height; 2003 2004 struct isl_surf_fill_state_info f = { 2005 .surf = &isl_surf, 2006 .view = view, 2007 .mocs = mocs(res->bo), 2008 .address = res->bo->gtt_offset + offset_B, 2009 .x_offset_sa = tile_x_sa, 2010 .y_offset_sa = tile_y_sa, 2011 }; 2012 2013 isl_surf_fill_state_s(&screen->isl_dev, map, &f); 2014 return psurf; 2015} 2016 2017#if GEN_GEN < 9 2018static void 2019fill_default_image_param(struct brw_image_param *param) 2020{ 2021 memset(param, 0, sizeof(*param)); 2022 /* Set the swizzling shifts to all-ones to effectively disable swizzling -- 2023 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more 2024 * detailed explanation of these parameters. 2025 */ 2026 param->swizzling[0] = 0xff; 2027 param->swizzling[1] = 0xff; 2028} 2029 2030static void 2031fill_buffer_image_param(struct brw_image_param *param, 2032 enum pipe_format pfmt, 2033 unsigned size) 2034{ 2035 const unsigned cpp = util_format_get_blocksize(pfmt); 2036 2037 fill_default_image_param(param); 2038 param->size[0] = size / cpp; 2039 param->stride[0] = cpp; 2040} 2041#else 2042#define isl_surf_fill_image_param(x, ...) 2043#define fill_default_image_param(x, ...) 2044#define fill_buffer_image_param(x, ...) 2045#endif 2046 2047/** 2048 * The pipe->set_shader_images() driver hook. 2049 */ 2050static void 2051iris_set_shader_images(struct pipe_context *ctx, 2052 enum pipe_shader_type p_stage, 2053 unsigned start_slot, unsigned count, 2054 const struct pipe_image_view *p_images) 2055{ 2056 struct iris_context *ice = (struct iris_context *) ctx; 2057 struct iris_screen *screen = (struct iris_screen *)ctx->screen; 2058 const struct gen_device_info *devinfo = &screen->devinfo; 2059 gl_shader_stage stage = stage_from_pipe(p_stage); 2060 struct iris_shader_state *shs = &ice->state.shaders[stage]; 2061#if GEN_GEN == 8 2062 struct iris_genx_state *genx = ice->state.genx; 2063 struct brw_image_param *image_params = genx->shaders[stage].image_param; 2064#endif 2065 2066 shs->bound_image_views &= ~u_bit_consecutive(start_slot, count); 2067 2068 for (unsigned i = 0; i < count; i++) { 2069 struct iris_image_view *iv = &shs->image[start_slot + i]; 2070 2071 if (p_images && p_images[i].resource) { 2072 const struct pipe_image_view *img = &p_images[i]; 2073 struct iris_resource *res = (void *) img->resource; 2074 2075 // XXX: these are not retained forever, use a separate uploader? 2076 void *map = 2077 alloc_surface_states(ice->state.surface_uploader, 2078 &iv->surface_state, 1 << ISL_AUX_USAGE_NONE); 2079 if (!unlikely(map)) 2080 return; 2081 2082 iv->base = *img; 2083 iv->base.resource = NULL; 2084 pipe_resource_reference(&iv->base.resource, &res->base); 2085 2086 shs->bound_image_views |= 1 << (start_slot + i); 2087 2088 res->bind_history |= PIPE_BIND_SHADER_IMAGE; 2089 2090 isl_surf_usage_flags_t usage = ISL_SURF_USAGE_STORAGE_BIT; 2091 enum isl_format isl_fmt = 2092 iris_format_for_usage(devinfo, img->format, usage).fmt; 2093 2094 bool untyped_fallback = false; 2095 2096 if (img->shader_access & PIPE_IMAGE_ACCESS_READ) { 2097 /* On Gen8, try to use typed surfaces reads (which support a 2098 * limited number of formats), and if not possible, fall back 2099 * to untyped reads. 2100 */ 2101 untyped_fallback = GEN_GEN == 8 && 2102 !isl_has_matching_typed_storage_image_format(devinfo, isl_fmt); 2103 2104 if (untyped_fallback) 2105 isl_fmt = ISL_FORMAT_RAW; 2106 else 2107 isl_fmt = isl_lower_storage_image_format(devinfo, isl_fmt); 2108 } 2109 2110 if (res->base.target != PIPE_BUFFER) { 2111 struct isl_view view = { 2112 .format = isl_fmt, 2113 .base_level = img->u.tex.level, 2114 .levels = 1, 2115 .base_array_layer = img->u.tex.first_layer, 2116 .array_len = img->u.tex.last_layer - img->u.tex.first_layer + 1, 2117 .swizzle = ISL_SWIZZLE_IDENTITY, 2118 .usage = usage, 2119 }; 2120 2121 if (untyped_fallback) { 2122 fill_buffer_surface_state(&screen->isl_dev, res->bo, map, 2123 isl_fmt, ISL_SWIZZLE_IDENTITY, 2124 0, res->bo->size); 2125 } else { 2126 /* Images don't support compression */ 2127 unsigned aux_modes = 1 << ISL_AUX_USAGE_NONE; 2128 while (aux_modes) { 2129 enum isl_aux_usage usage = u_bit_scan(&aux_modes); 2130 2131 fill_surface_state(&screen->isl_dev, map, res, &view, usage); 2132 2133 map += SURFACE_STATE_ALIGNMENT; 2134 } 2135 } 2136 2137 isl_surf_fill_image_param(&screen->isl_dev, 2138 &image_params[start_slot + i], 2139 &res->surf, &view); 2140 } else { 2141 util_range_add(&res->valid_buffer_range, img->u.buf.offset, 2142 img->u.buf.offset + img->u.buf.size); 2143 2144 fill_buffer_surface_state(&screen->isl_dev, res->bo, map, 2145 isl_fmt, ISL_SWIZZLE_IDENTITY, 2146 img->u.buf.offset, img->u.buf.size); 2147 fill_buffer_image_param(&image_params[start_slot + i], 2148 img->format, img->u.buf.size); 2149 } 2150 } else { 2151 pipe_resource_reference(&iv->base.resource, NULL); 2152 pipe_resource_reference(&iv->surface_state.res, NULL); 2153 fill_default_image_param(&image_params[start_slot + i]); 2154 } 2155 } 2156 2157 ice->state.dirty |= IRIS_DIRTY_BINDINGS_VS << stage; 2158 ice->state.dirty |= 2159 stage == MESA_SHADER_COMPUTE ? IRIS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES 2160 : IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES; 2161 2162 /* Broadwell also needs brw_image_params re-uploaded */ 2163 if (GEN_GEN < 9) { 2164 ice->state.dirty |= IRIS_DIRTY_CONSTANTS_VS << stage; 2165 shs->cbuf0_needs_upload = true; 2166 } 2167} 2168 2169 2170/** 2171 * The pipe->set_sampler_views() driver hook. 2172 */ 2173static void 2174iris_set_sampler_views(struct pipe_context *ctx, 2175 enum pipe_shader_type p_stage, 2176 unsigned start, unsigned count, 2177 struct pipe_sampler_view **views) 2178{ 2179 struct iris_context *ice = (struct iris_context *) ctx; 2180 gl_shader_stage stage = stage_from_pipe(p_stage); 2181 struct iris_shader_state *shs = &ice->state.shaders[stage]; 2182 2183 shs->bound_sampler_views &= ~u_bit_consecutive(start, count); 2184 2185 for (unsigned i = 0; i < count; i++) { 2186 struct pipe_sampler_view *pview = views ? views[i] : NULL; 2187 pipe_sampler_view_reference((struct pipe_sampler_view **) 2188 &shs->textures[start + i], pview); 2189 struct iris_sampler_view *view = (void *) pview; 2190 if (view) { 2191 view->res->bind_history |= PIPE_BIND_SAMPLER_VIEW; 2192 shs->bound_sampler_views |= 1 << (start + i); 2193 } 2194 } 2195 2196 ice->state.dirty |= (IRIS_DIRTY_BINDINGS_VS << stage); 2197 ice->state.dirty |= 2198 stage == MESA_SHADER_COMPUTE ? IRIS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES 2199 : IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES; 2200} 2201 2202/** 2203 * The pipe->set_tess_state() driver hook. 2204 */ 2205static void 2206iris_set_tess_state(struct pipe_context *ctx, 2207 const float default_outer_level[4], 2208 const float default_inner_level[2]) 2209{ 2210 struct iris_context *ice = (struct iris_context *) ctx; 2211 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL]; 2212 2213 memcpy(&ice->state.default_outer_level[0], &default_outer_level[0], 4 * sizeof(float)); 2214 memcpy(&ice->state.default_inner_level[0], &default_inner_level[0], 2 * sizeof(float)); 2215 2216 ice->state.dirty |= IRIS_DIRTY_CONSTANTS_TCS; 2217 shs->cbuf0_needs_upload = true; 2218} 2219 2220static void 2221iris_surface_destroy(struct pipe_context *ctx, struct pipe_surface *p_surf) 2222{ 2223 struct iris_surface *surf = (void *) p_surf; 2224 pipe_resource_reference(&p_surf->texture, NULL); 2225 pipe_resource_reference(&surf->surface_state.res, NULL); 2226 free(surf); 2227} 2228 2229static void 2230iris_set_clip_state(struct pipe_context *ctx, 2231 const struct pipe_clip_state *state) 2232{ 2233 struct iris_context *ice = (struct iris_context *) ctx; 2234 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX]; 2235 2236 memcpy(&ice->state.clip_planes, state, sizeof(*state)); 2237 2238 ice->state.dirty |= IRIS_DIRTY_CONSTANTS_VS; 2239 shs->cbuf0_needs_upload = true; 2240} 2241 2242/** 2243 * The pipe->set_polygon_stipple() driver hook. 2244 */ 2245static void 2246iris_set_polygon_stipple(struct pipe_context *ctx, 2247 const struct pipe_poly_stipple *state) 2248{ 2249 struct iris_context *ice = (struct iris_context *) ctx; 2250 memcpy(&ice->state.poly_stipple, state, sizeof(*state)); 2251 ice->state.dirty |= IRIS_DIRTY_POLYGON_STIPPLE; 2252} 2253 2254/** 2255 * The pipe->set_sample_mask() driver hook. 2256 */ 2257static void 2258iris_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) 2259{ 2260 struct iris_context *ice = (struct iris_context *) ctx; 2261 2262 /* We only support 16x MSAA, so we have 16 bits of sample maks. 2263 * st/mesa may pass us 0xffffffff though, meaning "enable all samples". 2264 */ 2265 ice->state.sample_mask = sample_mask & 0xffff; 2266 ice->state.dirty |= IRIS_DIRTY_SAMPLE_MASK; 2267} 2268 2269/** 2270 * The pipe->set_scissor_states() driver hook. 2271 * 2272 * This corresponds to our SCISSOR_RECT state structures. It's an 2273 * exact match, so we just store them, and memcpy them out later. 2274 */ 2275static void 2276iris_set_scissor_states(struct pipe_context *ctx, 2277 unsigned start_slot, 2278 unsigned num_scissors, 2279 const struct pipe_scissor_state *rects) 2280{ 2281 struct iris_context *ice = (struct iris_context *) ctx; 2282 2283 for (unsigned i = 0; i < num_scissors; i++) { 2284 if (rects[i].minx == rects[i].maxx || rects[i].miny == rects[i].maxy) { 2285 /* If the scissor was out of bounds and got clamped to 0 width/height 2286 * at the bounds, the subtraction of 1 from maximums could produce a 2287 * negative number and thus not clip anything. Instead, just provide 2288 * a min > max scissor inside the bounds, which produces the expected 2289 * no rendering. 2290 */ 2291 ice->state.scissors[start_slot + i] = (struct pipe_scissor_state) { 2292 .minx = 1, .maxx = 0, .miny = 1, .maxy = 0, 2293 }; 2294 } else { 2295 ice->state.scissors[start_slot + i] = (struct pipe_scissor_state) { 2296 .minx = rects[i].minx, .miny = rects[i].miny, 2297 .maxx = rects[i].maxx - 1, .maxy = rects[i].maxy - 1, 2298 }; 2299 } 2300 } 2301 2302 ice->state.dirty |= IRIS_DIRTY_SCISSOR_RECT; 2303} 2304 2305/** 2306 * The pipe->set_stencil_ref() driver hook. 2307 * 2308 * This is added to 3DSTATE_WM_DEPTH_STENCIL dynamically at draw time. 2309 */ 2310static void 2311iris_set_stencil_ref(struct pipe_context *ctx, 2312 const struct pipe_stencil_ref *state) 2313{ 2314 struct iris_context *ice = (struct iris_context *) ctx; 2315 memcpy(&ice->state.stencil_ref, state, sizeof(*state)); 2316 if (GEN_GEN == 8) 2317 ice->state.dirty |= IRIS_DIRTY_COLOR_CALC_STATE; 2318 else 2319 ice->state.dirty |= IRIS_DIRTY_WM_DEPTH_STENCIL; 2320} 2321 2322static float 2323viewport_extent(const struct pipe_viewport_state *state, int axis, float sign) 2324{ 2325 return copysignf(state->scale[axis], sign) + state->translate[axis]; 2326} 2327 2328static void 2329calculate_guardband_size(uint32_t fb_width, uint32_t fb_height, 2330 float m00, float m11, float m30, float m31, 2331 float *xmin, float *xmax, 2332 float *ymin, float *ymax) 2333{ 2334 /* According to the "Vertex X,Y Clamping and Quantization" section of the 2335 * Strips and Fans documentation: 2336 * 2337 * "The vertex X and Y screen-space coordinates are also /clamped/ to the 2338 * fixed-point "guardband" range supported by the rasterization hardware" 2339 * 2340 * and 2341 * 2342 * "In almost all circumstances, if an object’s vertices are actually 2343 * modified by this clamping (i.e., had X or Y coordinates outside of 2344 * the guardband extent the rendered object will not match the intended 2345 * result. Therefore software should take steps to ensure that this does 2346 * not happen - e.g., by clipping objects such that they do not exceed 2347 * these limits after the Drawing Rectangle is applied." 2348 * 2349 * I believe the fundamental restriction is that the rasterizer (in 2350 * the SF/WM stages) have a limit on the number of pixels that can be 2351 * rasterized. We need to ensure any coordinates beyond the rasterizer 2352 * limit are handled by the clipper. So effectively that limit becomes 2353 * the clipper's guardband size. 2354 * 2355 * It goes on to say: 2356 * 2357 * "In addition, in order to be correctly rendered, objects must have a 2358 * screenspace bounding box not exceeding 8K in the X or Y direction. 2359 * This additional restriction must also be comprehended by software, 2360 * i.e., enforced by use of clipping." 2361 * 2362 * This makes no sense. Gen7+ hardware supports 16K render targets, 2363 * and you definitely need to be able to draw polygons that fill the 2364 * surface. Our assumption is that the rasterizer was limited to 8K 2365 * on Sandybridge, which only supports 8K surfaces, and it was actually 2366 * increased to 16K on Ivybridge and later. 2367 * 2368 * So, limit the guardband to 16K on Gen7+ and 8K on Sandybridge. 2369 */ 2370 const float gb_size = GEN_GEN >= 7 ? 16384.0f : 8192.0f; 2371 2372 if (m00 != 0 && m11 != 0) { 2373 /* First, we compute the screen-space render area */ 2374 const float ss_ra_xmin = MIN3( 0, m30 + m00, m30 - m00); 2375 const float ss_ra_xmax = MAX3( fb_width, m30 + m00, m30 - m00); 2376 const float ss_ra_ymin = MIN3( 0, m31 + m11, m31 - m11); 2377 const float ss_ra_ymax = MAX3(fb_height, m31 + m11, m31 - m11); 2378 2379 /* We want the guardband to be centered on that */ 2380 const float ss_gb_xmin = (ss_ra_xmin + ss_ra_xmax) / 2 - gb_size; 2381 const float ss_gb_xmax = (ss_ra_xmin + ss_ra_xmax) / 2 + gb_size; 2382 const float ss_gb_ymin = (ss_ra_ymin + ss_ra_ymax) / 2 - gb_size; 2383 const float ss_gb_ymax = (ss_ra_ymin + ss_ra_ymax) / 2 + gb_size; 2384 2385 /* Now we need it in native device coordinates */ 2386 const float ndc_gb_xmin = (ss_gb_xmin - m30) / m00; 2387 const float ndc_gb_xmax = (ss_gb_xmax - m30) / m00; 2388 const float ndc_gb_ymin = (ss_gb_ymin - m31) / m11; 2389 const float ndc_gb_ymax = (ss_gb_ymax - m31) / m11; 2390 2391 /* Thanks to Y-flipping and ORIGIN_UPPER_LEFT, the Y coordinates may be 2392 * flipped upside-down. X should be fine though. 2393 */ 2394 assert(ndc_gb_xmin <= ndc_gb_xmax); 2395 *xmin = ndc_gb_xmin; 2396 *xmax = ndc_gb_xmax; 2397 *ymin = MIN2(ndc_gb_ymin, ndc_gb_ymax); 2398 *ymax = MAX2(ndc_gb_ymin, ndc_gb_ymax); 2399 } else { 2400 /* The viewport scales to 0, so nothing will be rendered. */ 2401 *xmin = 0.0f; 2402 *xmax = 0.0f; 2403 *ymin = 0.0f; 2404 *ymax = 0.0f; 2405 } 2406} 2407 2408/** 2409 * The pipe->set_viewport_states() driver hook. 2410 * 2411 * This corresponds to our SF_CLIP_VIEWPORT states. We can't calculate 2412 * the guardband yet, as we need the framebuffer dimensions, but we can 2413 * at least fill out the rest. 2414 */ 2415static void 2416iris_set_viewport_states(struct pipe_context *ctx, 2417 unsigned start_slot, 2418 unsigned count, 2419 const struct pipe_viewport_state *states) 2420{ 2421 struct iris_context *ice = (struct iris_context *) ctx; 2422 2423 memcpy(&ice->state.viewports[start_slot], states, sizeof(*states) * count); 2424 2425 ice->state.dirty |= IRIS_DIRTY_SF_CL_VIEWPORT; 2426 2427 if (ice->state.cso_rast && (!ice->state.cso_rast->depth_clip_near || 2428 !ice->state.cso_rast->depth_clip_far)) 2429 ice->state.dirty |= IRIS_DIRTY_CC_VIEWPORT; 2430} 2431 2432/** 2433 * The pipe->set_framebuffer_state() driver hook. 2434 * 2435 * Sets the current draw FBO, including color render targets, depth, 2436 * and stencil buffers. 2437 */ 2438static void 2439iris_set_framebuffer_state(struct pipe_context *ctx, 2440 const struct pipe_framebuffer_state *state) 2441{ 2442 struct iris_context *ice = (struct iris_context *) ctx; 2443 struct iris_screen *screen = (struct iris_screen *)ctx->screen; 2444 struct isl_device *isl_dev = &screen->isl_dev; 2445 struct pipe_framebuffer_state *cso = &ice->state.framebuffer; 2446 struct iris_resource *zres; 2447 struct iris_resource *stencil_res; 2448 2449 unsigned samples = util_framebuffer_get_num_samples(state); 2450 unsigned layers = util_framebuffer_get_num_layers(state); 2451 2452 if (cso->samples != samples) { 2453 ice->state.dirty |= IRIS_DIRTY_MULTISAMPLE; 2454 } 2455 2456 if (cso->nr_cbufs != state->nr_cbufs) { 2457 ice->state.dirty |= IRIS_DIRTY_BLEND_STATE; 2458 } 2459 2460 if ((cso->layers == 0) != (layers == 0)) { 2461 ice->state.dirty |= IRIS_DIRTY_CLIP; 2462 } 2463 2464 if (cso->width != state->width || cso->height != state->height) { 2465 ice->state.dirty |= IRIS_DIRTY_SF_CL_VIEWPORT; 2466 } 2467 2468 util_copy_framebuffer_state(cso, state); 2469 cso->samples = samples; 2470 cso->layers = layers; 2471 2472 struct iris_depth_buffer_state *cso_z = &ice->state.genx->depth_buffer; 2473 2474 struct isl_view view = { 2475 .base_level = 0, 2476 .levels = 1, 2477 .base_array_layer = 0, 2478 .array_len = 1, 2479 .swizzle = ISL_SWIZZLE_IDENTITY, 2480 }; 2481 2482 struct isl_depth_stencil_hiz_emit_info info = { .view = &view }; 2483 2484 if (cso->zsbuf) { 2485 iris_get_depth_stencil_resources(cso->zsbuf->texture, &zres, 2486 &stencil_res); 2487 2488 view.base_level = cso->zsbuf->u.tex.level; 2489 view.base_array_layer = cso->zsbuf->u.tex.first_layer; 2490 view.array_len = 2491 cso->zsbuf->u.tex.last_layer - cso->zsbuf->u.tex.first_layer + 1; 2492 2493 if (zres) { 2494 view.usage |= ISL_SURF_USAGE_DEPTH_BIT; 2495 2496 info.depth_surf = &zres->surf; 2497 info.depth_address = zres->bo->gtt_offset; 2498 info.mocs = mocs(zres->bo); 2499 2500 view.format = zres->surf.format; 2501 2502 if (iris_resource_level_has_hiz(zres, view.base_level)) { 2503 info.hiz_usage = ISL_AUX_USAGE_HIZ; 2504 info.hiz_surf = &zres->aux.surf; 2505 info.hiz_address = zres->aux.bo->gtt_offset; 2506 } 2507 } 2508 2509 if (stencil_res) { 2510 view.usage |= ISL_SURF_USAGE_STENCIL_BIT; 2511 info.stencil_surf = &stencil_res->surf; 2512 info.stencil_address = stencil_res->bo->gtt_offset; 2513 if (!zres) { 2514 view.format = stencil_res->surf.format; 2515 info.mocs = mocs(stencil_res->bo); 2516 } 2517 } 2518 } 2519 2520 isl_emit_depth_stencil_hiz_s(isl_dev, cso_z->packets, &info); 2521 2522 /* Make a null surface for unbound buffers */ 2523 void *null_surf_map = 2524 upload_state(ice->state.surface_uploader, &ice->state.null_fb, 2525 4 * GENX(RENDER_SURFACE_STATE_length), 64); 2526 isl_null_fill_state(&screen->isl_dev, null_surf_map, 2527 isl_extent3d(MAX2(cso->width, 1), 2528 MAX2(cso->height, 1), 2529 cso->layers ? cso->layers : 1)); 2530 ice->state.null_fb.offset += 2531 iris_bo_offset_from_base_address(iris_resource_bo(ice->state.null_fb.res)); 2532 2533 ice->state.dirty |= IRIS_DIRTY_DEPTH_BUFFER; 2534 2535 /* Render target change */ 2536 ice->state.dirty |= IRIS_DIRTY_BINDINGS_FS; 2537 2538 ice->state.dirty |= IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES; 2539 2540 ice->state.dirty |= ice->state.dirty_for_nos[IRIS_NOS_FRAMEBUFFER]; 2541 2542#if GEN_GEN == 11 2543 // XXX: we may want to flag IRIS_DIRTY_MULTISAMPLE (or SAMPLE_MASK?) 2544 // XXX: see commit 979fc1bc9bcc64027ff2cfafd285676f31b930a6 2545 2546 /* The PIPE_CONTROL command description says: 2547 * 2548 * "Whenever a Binding Table Index (BTI) used by a Render Target Message 2549 * points to a different RENDER_SURFACE_STATE, SW must issue a Render 2550 * Target Cache Flush by enabling this bit. When render target flush 2551 * is set due to new association of BTI, PS Scoreboard Stall bit must 2552 * be set in this packet." 2553 */ 2554 // XXX: does this need to happen at 3DSTATE_BTP_PS time? 2555 iris_emit_pipe_control_flush(&ice->batches[IRIS_BATCH_RENDER], 2556 PIPE_CONTROL_RENDER_TARGET_FLUSH | 2557 PIPE_CONTROL_STALL_AT_SCOREBOARD); 2558#endif 2559} 2560 2561static void 2562upload_ubo_ssbo_surf_state(struct iris_context *ice, 2563 struct pipe_shader_buffer *buf, 2564 struct iris_state_ref *surf_state, 2565 bool ssbo) 2566{ 2567 struct pipe_context *ctx = &ice->ctx; 2568 struct iris_screen *screen = (struct iris_screen *) ctx->screen; 2569 2570 // XXX: these are not retained forever, use a separate uploader? 2571 void *map = 2572 upload_state(ice->state.surface_uploader, surf_state, 2573 4 * GENX(RENDER_SURFACE_STATE_length), 64); 2574 if (!unlikely(map)) { 2575 surf_state->res = NULL; 2576 return; 2577 } 2578 2579 struct iris_resource *res = (void *) buf->buffer; 2580 struct iris_bo *surf_bo = iris_resource_bo(surf_state->res); 2581 surf_state->offset += iris_bo_offset_from_base_address(surf_bo); 2582 2583 isl_buffer_fill_state(&screen->isl_dev, map, 2584 .address = res->bo->gtt_offset + buf->buffer_offset, 2585 .size_B = buf->buffer_size, 2586 .format = ssbo ? ISL_FORMAT_RAW 2587 : ISL_FORMAT_R32G32B32A32_FLOAT, 2588 .swizzle = ISL_SWIZZLE_IDENTITY, 2589 .stride_B = 1, 2590 .mocs = mocs(res->bo)) 2591 2592} 2593 2594/** 2595 * The pipe->set_constant_buffer() driver hook. 2596 * 2597 * This uploads any constant data in user buffers, and references 2598 * any UBO resources containing constant data. 2599 */ 2600static void 2601iris_set_constant_buffer(struct pipe_context *ctx, 2602 enum pipe_shader_type p_stage, unsigned index, 2603 const struct pipe_constant_buffer *input) 2604{ 2605 struct iris_context *ice = (struct iris_context *) ctx; 2606 gl_shader_stage stage = stage_from_pipe(p_stage); 2607 struct iris_shader_state *shs = &ice->state.shaders[stage]; 2608 struct pipe_shader_buffer *cbuf = &shs->constbuf[index]; 2609 2610 if (input && input->buffer) { 2611 shs->bound_cbufs |= 1u << index; 2612 2613 assert(index > 0); 2614 2615 pipe_resource_reference(&cbuf->buffer, input->buffer); 2616 cbuf->buffer_offset = input->buffer_offset; 2617 cbuf->buffer_size = 2618 MIN2(input->buffer_size, 2619 iris_resource_bo(input->buffer)->size - cbuf->buffer_offset); 2620 2621 struct iris_resource *res = (void *) cbuf->buffer; 2622 res->bind_history |= PIPE_BIND_CONSTANT_BUFFER; 2623 2624 upload_ubo_ssbo_surf_state(ice, cbuf, &shs->constbuf_surf_state[index], 2625 false); 2626 } else { 2627 shs->bound_cbufs &= ~(1u << index); 2628 pipe_resource_reference(&cbuf->buffer, NULL); 2629 pipe_resource_reference(&shs->constbuf_surf_state[index].res, NULL); 2630 } 2631 2632 if (index == 0) { 2633 if (input) 2634 memcpy(&shs->cbuf0, input, sizeof(shs->cbuf0)); 2635 else 2636 memset(&shs->cbuf0, 0, sizeof(shs->cbuf0)); 2637 2638 shs->cbuf0_needs_upload = true; 2639 } 2640 2641 ice->state.dirty |= IRIS_DIRTY_CONSTANTS_VS << stage; 2642 // XXX: maybe not necessary all the time...? 2643 // XXX: we need 3DS_BTP to commit these changes, and if we fell back to 2644 // XXX: pull model we may need actual new bindings... 2645 ice->state.dirty |= IRIS_DIRTY_BINDINGS_VS << stage; 2646} 2647 2648static void 2649upload_uniforms(struct iris_context *ice, 2650 gl_shader_stage stage) 2651{ 2652 UNUSED struct iris_genx_state *genx = ice->state.genx; 2653 struct iris_shader_state *shs = &ice->state.shaders[stage]; 2654 struct pipe_shader_buffer *cbuf = &shs->constbuf[0]; 2655 struct iris_compiled_shader *shader = ice->shaders.prog[stage]; 2656 2657 unsigned upload_size = shader->num_system_values * sizeof(uint32_t) + 2658 shs->cbuf0.buffer_size; 2659 2660 if (upload_size == 0) 2661 return; 2662 2663 uint32_t *map = NULL; 2664 u_upload_alloc(ice->ctx.const_uploader, 0, upload_size, 64, 2665 &cbuf->buffer_offset, &cbuf->buffer, (void **) &map); 2666 2667 for (int i = 0; i < shader->num_system_values; i++) { 2668 uint32_t sysval = shader->system_values[i]; 2669 uint32_t value = 0; 2670 2671 if (BRW_PARAM_DOMAIN(sysval) == BRW_PARAM_DOMAIN_IMAGE) { 2672#if GEN_GEN == 8 2673 unsigned img = BRW_PARAM_IMAGE_IDX(sysval); 2674 unsigned offset = BRW_PARAM_IMAGE_OFFSET(sysval); 2675 struct brw_image_param *param = 2676 &genx->shaders[stage].image_param[img]; 2677 2678 assert(offset < sizeof(struct brw_image_param)); 2679 value = ((uint32_t *) param)[offset]; 2680#endif 2681 } else if (sysval == BRW_PARAM_BUILTIN_ZERO) { 2682 value = 0; 2683 } else if (BRW_PARAM_BUILTIN_IS_CLIP_PLANE(sysval)) { 2684 int plane = BRW_PARAM_BUILTIN_CLIP_PLANE_IDX(sysval); 2685 int comp = BRW_PARAM_BUILTIN_CLIP_PLANE_COMP(sysval); 2686 value = fui(ice->state.clip_planes.ucp[plane][comp]); 2687 } else if (sysval == BRW_PARAM_BUILTIN_PATCH_VERTICES_IN) { 2688 if (stage == MESA_SHADER_TESS_CTRL) { 2689 value = ice->state.vertices_per_patch; 2690 } else { 2691 assert(stage == MESA_SHADER_TESS_EVAL); 2692 const struct shader_info *tcs_info = 2693 iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL); 2694 if (tcs_info) 2695 value = tcs_info->tess.tcs_vertices_out; 2696 else 2697 value = ice->state.vertices_per_patch; 2698 } 2699 } else if (sysval >= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X && 2700 sysval <= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_W) { 2701 unsigned i = sysval - BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X; 2702 value = fui(ice->state.default_outer_level[i]); 2703 } else if (sysval == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X) { 2704 value = fui(ice->state.default_inner_level[0]); 2705 } else if (sysval == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y) { 2706 value = fui(ice->state.default_inner_level[1]); 2707 } else { 2708 assert(!"unhandled system value"); 2709 } 2710 2711 *map++ = value; 2712 } 2713 2714 if (shs->cbuf0.user_buffer) { 2715 memcpy(map, shs->cbuf0.user_buffer, shs->cbuf0.buffer_size); 2716 } 2717 2718 cbuf->buffer_size = upload_size; 2719 upload_ubo_ssbo_surf_state(ice, cbuf, &shs->constbuf_surf_state[0], false); 2720} 2721 2722/** 2723 * The pipe->set_shader_buffers() driver hook. 2724 * 2725 * This binds SSBOs and ABOs. Unfortunately, we need to stream out 2726 * SURFACE_STATE here, as the buffer offset may change each time. 2727 */ 2728static void 2729iris_set_shader_buffers(struct pipe_context *ctx, 2730 enum pipe_shader_type p_stage, 2731 unsigned start_slot, unsigned count, 2732 const struct pipe_shader_buffer *buffers, 2733 unsigned writable_bitmask) 2734{ 2735 struct iris_context *ice = (struct iris_context *) ctx; 2736 gl_shader_stage stage = stage_from_pipe(p_stage); 2737 struct iris_shader_state *shs = &ice->state.shaders[stage]; 2738 2739 unsigned modified_bits = u_bit_consecutive(start_slot, count); 2740 2741 shs->bound_ssbos &= ~modified_bits; 2742 shs->writable_ssbos &= ~modified_bits; 2743 shs->writable_ssbos |= writable_bitmask << start_slot; 2744 2745 for (unsigned i = 0; i < count; i++) { 2746 if (buffers && buffers[i].buffer) { 2747 struct iris_resource *res = (void *) buffers[i].buffer; 2748 struct pipe_shader_buffer *ssbo = &shs->ssbo[start_slot + i]; 2749 struct iris_state_ref *surf_state = 2750 &shs->ssbo_surf_state[start_slot + i]; 2751 pipe_resource_reference(&ssbo->buffer, &res->base); 2752 ssbo->buffer_offset = buffers[i].buffer_offset; 2753 ssbo->buffer_size = 2754 MIN2(buffers[i].buffer_size, res->bo->size - ssbo->buffer_offset); 2755 2756 shs->bound_ssbos |= 1 << (start_slot + i); 2757 2758 upload_ubo_ssbo_surf_state(ice, ssbo, surf_state, true); 2759 2760 res->bind_history |= PIPE_BIND_SHADER_BUFFER; 2761 2762 util_range_add(&res->valid_buffer_range, ssbo->buffer_offset, 2763 ssbo->buffer_offset + ssbo->buffer_size); 2764 } else { 2765 pipe_resource_reference(&shs->ssbo[start_slot + i].buffer, NULL); 2766 pipe_resource_reference(&shs->ssbo_surf_state[start_slot + i].res, 2767 NULL); 2768 } 2769 } 2770 2771 ice->state.dirty |= IRIS_DIRTY_BINDINGS_VS << stage; 2772} 2773 2774static void 2775iris_delete_state(struct pipe_context *ctx, void *state) 2776{ 2777 free(state); 2778} 2779 2780/** 2781 * The pipe->set_vertex_buffers() driver hook. 2782 * 2783 * This translates pipe_vertex_buffer to our 3DSTATE_VERTEX_BUFFERS packet. 2784 */ 2785static void 2786iris_set_vertex_buffers(struct pipe_context *ctx, 2787 unsigned start_slot, unsigned count, 2788 const struct pipe_vertex_buffer *buffers) 2789{ 2790 struct iris_context *ice = (struct iris_context *) ctx; 2791 struct iris_genx_state *genx = ice->state.genx; 2792 2793 ice->state.bound_vertex_buffers &= ~u_bit_consecutive64(start_slot, count); 2794 2795 for (unsigned i = 0; i < count; i++) { 2796 const struct pipe_vertex_buffer *buffer = buffers ? &buffers[i] : NULL; 2797 struct iris_vertex_buffer_state *state = 2798 &genx->vertex_buffers[start_slot + i]; 2799 2800 if (!buffer) { 2801 pipe_resource_reference(&state->resource, NULL); 2802 continue; 2803 } 2804 2805 /* We may see user buffers that are NULL bindings. */ 2806 assert(!(buffer->is_user_buffer && buffer->buffer.user != NULL)); 2807 2808 pipe_resource_reference(&state->resource, buffer->buffer.resource); 2809 struct iris_resource *res = (void *) state->resource; 2810 2811 if (res) { 2812 ice->state.bound_vertex_buffers |= 1ull << (start_slot + i); 2813 res->bind_history |= PIPE_BIND_VERTEX_BUFFER; 2814 } 2815 2816 iris_pack_state(GENX(VERTEX_BUFFER_STATE), state->state, vb) { 2817 vb.VertexBufferIndex = start_slot + i; 2818 vb.AddressModifyEnable = true; 2819 vb.BufferPitch = buffer->stride; 2820 if (res) { 2821 vb.BufferSize = res->bo->size - (int) buffer->buffer_offset; 2822 vb.BufferStartingAddress = 2823 ro_bo(NULL, res->bo->gtt_offset + (int) buffer->buffer_offset); 2824 vb.MOCS = mocs(res->bo); 2825 } else { 2826 vb.NullVertexBuffer = true; 2827 } 2828 } 2829 } 2830 2831 ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS; 2832} 2833 2834/** 2835 * Gallium CSO for vertex elements. 2836 */ 2837struct iris_vertex_element_state { 2838 uint32_t vertex_elements[1 + 33 * GENX(VERTEX_ELEMENT_STATE_length)]; 2839 uint32_t vf_instancing[33 * GENX(3DSTATE_VF_INSTANCING_length)]; 2840 uint32_t edgeflag_ve[GENX(VERTEX_ELEMENT_STATE_length)]; 2841 uint32_t edgeflag_vfi[GENX(3DSTATE_VF_INSTANCING_length)]; 2842 unsigned count; 2843}; 2844 2845/** 2846 * The pipe->create_vertex_elements() driver hook. 2847 * 2848 * This translates pipe_vertex_element to our 3DSTATE_VERTEX_ELEMENTS 2849 * and 3DSTATE_VF_INSTANCING commands. The vertex_elements and vf_instancing 2850 * arrays are ready to be emitted at draw time if no EdgeFlag or SGVs are 2851 * needed. In these cases we will need information available at draw time. 2852 * We setup edgeflag_ve and edgeflag_vfi as alternatives last 2853 * 3DSTATE_VERTEX_ELEMENT and 3DSTATE_VF_INSTANCING that can be used at 2854 * draw time if we detect that EdgeFlag is needed by the Vertex Shader. 2855 */ 2856static void * 2857iris_create_vertex_elements(struct pipe_context *ctx, 2858 unsigned count, 2859 const struct pipe_vertex_element *state) 2860{ 2861 struct iris_screen *screen = (struct iris_screen *)ctx->screen; 2862 const struct gen_device_info *devinfo = &screen->devinfo; 2863 struct iris_vertex_element_state *cso = 2864 malloc(sizeof(struct iris_vertex_element_state)); 2865 2866 cso->count = count; 2867 2868 iris_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), cso->vertex_elements, ve) { 2869 ve.DWordLength = 2870 1 + GENX(VERTEX_ELEMENT_STATE_length) * MAX2(count, 1) - 2; 2871 } 2872 2873 uint32_t *ve_pack_dest = &cso->vertex_elements[1]; 2874 uint32_t *vfi_pack_dest = cso->vf_instancing; 2875 2876 if (count == 0) { 2877 iris_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) { 2878 ve.Valid = true; 2879 ve.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT; 2880 ve.Component0Control = VFCOMP_STORE_0; 2881 ve.Component1Control = VFCOMP_STORE_0; 2882 ve.Component2Control = VFCOMP_STORE_0; 2883 ve.Component3Control = VFCOMP_STORE_1_FP; 2884 } 2885 2886 iris_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) { 2887 } 2888 } 2889 2890 for (int i = 0; i < count; i++) { 2891 const struct iris_format_info fmt = 2892 iris_format_for_usage(devinfo, state[i].src_format, 0); 2893 unsigned comp[4] = { VFCOMP_STORE_SRC, VFCOMP_STORE_SRC, 2894 VFCOMP_STORE_SRC, VFCOMP_STORE_SRC }; 2895 2896 switch (isl_format_get_num_channels(fmt.fmt)) { 2897 case 0: comp[0] = VFCOMP_STORE_0; /* fallthrough */ 2898 case 1: comp[1] = VFCOMP_STORE_0; /* fallthrough */ 2899 case 2: comp[2] = VFCOMP_STORE_0; /* fallthrough */ 2900 case 3: 2901 comp[3] = isl_format_has_int_channel(fmt.fmt) ? VFCOMP_STORE_1_INT 2902 : VFCOMP_STORE_1_FP; 2903 break; 2904 } 2905 iris_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) { 2906 ve.EdgeFlagEnable = false; 2907 ve.VertexBufferIndex = state[i].vertex_buffer_index; 2908 ve.Valid = true; 2909 ve.SourceElementOffset = state[i].src_offset; 2910 ve.SourceElementFormat = fmt.fmt; 2911 ve.Component0Control = comp[0]; 2912 ve.Component1Control = comp[1]; 2913 ve.Component2Control = comp[2]; 2914 ve.Component3Control = comp[3]; 2915 } 2916 2917 iris_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) { 2918 vi.VertexElementIndex = i; 2919 vi.InstancingEnable = state[i].instance_divisor > 0; 2920 vi.InstanceDataStepRate = state[i].instance_divisor; 2921 } 2922 2923 ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length); 2924 vfi_pack_dest += GENX(3DSTATE_VF_INSTANCING_length); 2925 } 2926 2927 /* An alternative version of the last VE and VFI is stored so it 2928 * can be used at draw time in case Vertex Shader uses EdgeFlag 2929 */ 2930 if (count) { 2931 const unsigned edgeflag_index = count - 1; 2932 const struct iris_format_info fmt = 2933 iris_format_for_usage(devinfo, state[edgeflag_index].src_format, 0); 2934 iris_pack_state(GENX(VERTEX_ELEMENT_STATE), cso->edgeflag_ve, ve) { 2935 ve.EdgeFlagEnable = true ; 2936 ve.VertexBufferIndex = state[edgeflag_index].vertex_buffer_index; 2937 ve.Valid = true; 2938 ve.SourceElementOffset = state[edgeflag_index].src_offset; 2939 ve.SourceElementFormat = fmt.fmt; 2940 ve.Component0Control = VFCOMP_STORE_SRC; 2941 ve.Component1Control = VFCOMP_STORE_0; 2942 ve.Component2Control = VFCOMP_STORE_0; 2943 ve.Component3Control = VFCOMP_STORE_0; 2944 } 2945 iris_pack_command(GENX(3DSTATE_VF_INSTANCING), cso->edgeflag_vfi, vi) { 2946 /* The vi.VertexElementIndex of the EdgeFlag Vertex Element is filled 2947 * at draw time, as it should change if SGVs are emitted. 2948 */ 2949 vi.InstancingEnable = state[edgeflag_index].instance_divisor > 0; 2950 vi.InstanceDataStepRate = state[edgeflag_index].instance_divisor; 2951 } 2952 } 2953 2954 return cso; 2955} 2956 2957/** 2958 * The pipe->bind_vertex_elements_state() driver hook. 2959 */ 2960static void 2961iris_bind_vertex_elements_state(struct pipe_context *ctx, void *state) 2962{ 2963 struct iris_context *ice = (struct iris_context *) ctx; 2964 struct iris_vertex_element_state *old_cso = ice->state.cso_vertex_elements; 2965 struct iris_vertex_element_state *new_cso = state; 2966 2967 /* 3DSTATE_VF_SGVs overrides the last VE, so if the count is changing, 2968 * we need to re-emit it to ensure we're overriding the right one. 2969 */ 2970 if (new_cso && cso_changed(count)) 2971 ice->state.dirty |= IRIS_DIRTY_VF_SGVS; 2972 2973 ice->state.cso_vertex_elements = state; 2974 ice->state.dirty |= IRIS_DIRTY_VERTEX_ELEMENTS; 2975} 2976 2977/** 2978 * The pipe->create_stream_output_target() driver hook. 2979 * 2980 * "Target" here refers to a destination buffer. We translate this into 2981 * a 3DSTATE_SO_BUFFER packet. We can handle most fields, but don't yet 2982 * know which buffer this represents, or whether we ought to zero the 2983 * write-offsets, or append. Those are handled in the set() hook. 2984 */ 2985static struct pipe_stream_output_target * 2986iris_create_stream_output_target(struct pipe_context *ctx, 2987 struct pipe_resource *p_res, 2988 unsigned buffer_offset, 2989 unsigned buffer_size) 2990{ 2991 struct iris_resource *res = (void *) p_res; 2992 struct iris_stream_output_target *cso = calloc(1, sizeof(*cso)); 2993 if (!cso) 2994 return NULL; 2995 2996 res->bind_history |= PIPE_BIND_STREAM_OUTPUT; 2997 2998 pipe_reference_init(&cso->base.reference, 1); 2999 pipe_resource_reference(&cso->base.buffer, p_res); 3000 cso->base.buffer_offset = buffer_offset; 3001 cso->base.buffer_size = buffer_size; 3002 cso->base.context = ctx; 3003 3004 util_range_add(&res->valid_buffer_range, buffer_offset, 3005 buffer_offset + buffer_size); 3006 3007 upload_state(ctx->stream_uploader, &cso->offset, sizeof(uint32_t), 4); 3008 3009 return &cso->base; 3010} 3011 3012static void 3013iris_stream_output_target_destroy(struct pipe_context *ctx, 3014 struct pipe_stream_output_target *state) 3015{ 3016 struct iris_stream_output_target *cso = (void *) state; 3017 3018 pipe_resource_reference(&cso->base.buffer, NULL); 3019 pipe_resource_reference(&cso->offset.res, NULL); 3020 3021 free(cso); 3022} 3023 3024/** 3025 * The pipe->set_stream_output_targets() driver hook. 3026 * 3027 * At this point, we know which targets are bound to a particular index, 3028 * and also whether we want to append or start over. We can finish the 3029 * 3DSTATE_SO_BUFFER packets we started earlier. 3030 */ 3031static void 3032iris_set_stream_output_targets(struct pipe_context *ctx, 3033 unsigned num_targets, 3034 struct pipe_stream_output_target **targets, 3035 const unsigned *offsets) 3036{ 3037 struct iris_context *ice = (struct iris_context *) ctx; 3038 struct iris_genx_state *genx = ice->state.genx; 3039 uint32_t *so_buffers = genx->so_buffers; 3040 3041 const bool active = num_targets > 0; 3042 if (ice->state.streamout_active != active) { 3043 ice->state.streamout_active = active; 3044 ice->state.dirty |= IRIS_DIRTY_STREAMOUT; 3045 3046 /* We only emit 3DSTATE_SO_DECL_LIST when streamout is active, because 3047 * it's a non-pipelined command. If we're switching streamout on, we 3048 * may have missed emitting it earlier, so do so now. (We're already 3049 * taking a stall to update 3DSTATE_SO_BUFFERS anyway...) 3050 */ 3051 if (active) { 3052 ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST; 3053 } else { 3054 uint32_t flush = 0; 3055 for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { 3056 struct iris_stream_output_target *tgt = 3057 (void *) ice->state.so_target[i]; 3058 if (tgt) { 3059 struct iris_resource *res = (void *) tgt->base.buffer; 3060 3061 flush |= iris_flush_bits_for_history(res); 3062 iris_dirty_for_history(ice, res); 3063 } 3064 } 3065 iris_emit_pipe_control_flush(&ice->batches[IRIS_BATCH_RENDER], flush); 3066 } 3067 } 3068 3069 for (int i = 0; i < 4; i++) { 3070 pipe_so_target_reference(&ice->state.so_target[i], 3071 i < num_targets ? targets[i] : NULL); 3072 } 3073 3074 /* No need to update 3DSTATE_SO_BUFFER unless SOL is active. */ 3075 if (!active) 3076 return; 3077 3078 for (unsigned i = 0; i < 4; i++, 3079 so_buffers += GENX(3DSTATE_SO_BUFFER_length)) { 3080 3081 struct iris_stream_output_target *tgt = (void *) ice->state.so_target[i]; 3082 unsigned offset = offsets[i]; 3083 3084 if (!tgt) { 3085 iris_pack_command(GENX(3DSTATE_SO_BUFFER), so_buffers, sob) 3086 sob.SOBufferIndex = i; 3087 continue; 3088 } 3089 3090 struct iris_resource *res = (void *) tgt->base.buffer; 3091 3092 /* Note that offsets[i] will either be 0, causing us to zero 3093 * the value in the buffer, or 0xFFFFFFFF, which happens to mean 3094 * "continue appending at the existing offset." 3095 */ 3096 assert(offset == 0 || offset == 0xFFFFFFFF); 3097 3098 /* We might be called by Begin (offset = 0), Pause, then Resume 3099 * (offset = 0xFFFFFFFF) before ever drawing (where these commands 3100 * will actually be sent to the GPU). In this case, we don't want 3101 * to append - we still want to do our initial zeroing. 3102 */ 3103 if (!tgt->zeroed) 3104 offset = 0; 3105 3106 iris_pack_command(GENX(3DSTATE_SO_BUFFER), so_buffers, sob) { 3107 sob.SurfaceBaseAddress = 3108 rw_bo(NULL, res->bo->gtt_offset + tgt->base.buffer_offset); 3109 sob.SOBufferEnable = true; 3110 sob.StreamOffsetWriteEnable = true; 3111 sob.StreamOutputBufferOffsetAddressEnable = true; 3112 sob.MOCS = mocs(res->bo); 3113 3114 sob.SurfaceSize = MAX2(tgt->base.buffer_size / 4, 1) - 1; 3115 3116 sob.SOBufferIndex = i; 3117 sob.StreamOffset = offset; 3118 sob.StreamOutputBufferOffsetAddress = 3119 rw_bo(NULL, iris_resource_bo(tgt->offset.res)->gtt_offset + 3120 tgt->offset.offset); 3121 } 3122 } 3123 3124 ice->state.dirty |= IRIS_DIRTY_SO_BUFFERS; 3125} 3126 3127/** 3128 * An iris-vtable helper for encoding the 3DSTATE_SO_DECL_LIST and 3129 * 3DSTATE_STREAMOUT packets. 3130 * 3131 * 3DSTATE_SO_DECL_LIST is a list of shader outputs we want the streamout 3132 * hardware to record. We can create it entirely based on the shader, with 3133 * no dynamic state dependencies. 3134 * 3135 * 3DSTATE_STREAMOUT is an annoying mix of shader-based information and 3136 * state-based settings. We capture the shader-related ones here, and merge 3137 * the rest in at draw time. 3138 */ 3139static uint32_t * 3140iris_create_so_decl_list(const struct pipe_stream_output_info *info, 3141 const struct brw_vue_map *vue_map) 3142{ 3143 struct GENX(SO_DECL) so_decl[MAX_VERTEX_STREAMS][128]; 3144 int buffer_mask[MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; 3145 int next_offset[MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; 3146 int decls[MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; 3147 int max_decls = 0; 3148 STATIC_ASSERT(ARRAY_SIZE(so_decl[0]) >= MAX_PROGRAM_OUTPUTS); 3149 3150 memset(so_decl, 0, sizeof(so_decl)); 3151 3152 /* Construct the list of SO_DECLs to be emitted. The formatting of the 3153 * command feels strange -- each dword pair contains a SO_DECL per stream. 3154 */ 3155 for (unsigned i = 0; i < info->num_outputs; i++) { 3156 const struct pipe_stream_output *output = &info->output[i]; 3157 const int buffer = output->output_buffer; 3158 const int varying = output->register_index; 3159 const unsigned stream_id = output->stream; 3160 assert(stream_id < MAX_VERTEX_STREAMS); 3161 3162 buffer_mask[stream_id] |= 1 << buffer; 3163 3164 assert(vue_map->varying_to_slot[varying] >= 0); 3165 3166 /* Mesa doesn't store entries for gl_SkipComponents in the Outputs[] 3167 * array. Instead, it simply increments DstOffset for the following 3168 * input by the number of components that should be skipped. 3169 * 3170 * Our hardware is unusual in that it requires us to program SO_DECLs 3171 * for fake "hole" components, rather than simply taking the offset 3172 * for each real varying. Each hole can have size 1, 2, 3, or 4; we 3173 * program as many size = 4 holes as we can, then a final hole to 3174 * accommodate the final 1, 2, or 3 remaining. 3175 */ 3176 int skip_components = output->dst_offset - next_offset[buffer]; 3177 3178 while (skip_components > 0) { 3179 so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) { 3180 .HoleFlag = 1, 3181 .OutputBufferSlot = output->output_buffer, 3182 .ComponentMask = (1 << MIN2(skip_components, 4)) - 1, 3183 }; 3184 skip_components -= 4; 3185 } 3186 3187 next_offset[buffer] = output->dst_offset + output->num_components; 3188 3189 so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) { 3190 .OutputBufferSlot = output->output_buffer, 3191 .RegisterIndex = vue_map->varying_to_slot[varying], 3192 .ComponentMask = 3193 ((1 << output->num_components) - 1) << output->start_component, 3194 }; 3195 3196 if (decls[stream_id] > max_decls) 3197 max_decls = decls[stream_id]; 3198 } 3199 3200 unsigned dwords = GENX(3DSTATE_STREAMOUT_length) + (3 + 2 * max_decls); 3201 uint32_t *map = ralloc_size(NULL, sizeof(uint32_t) * dwords); 3202 uint32_t *so_decl_map = map + GENX(3DSTATE_STREAMOUT_length); 3203 3204 iris_pack_command(GENX(3DSTATE_STREAMOUT), map, sol) { 3205 int urb_entry_read_offset = 0; 3206 int urb_entry_read_length = (vue_map->num_slots + 1) / 2 - 3207 urb_entry_read_offset; 3208 3209 /* We always read the whole vertex. This could be reduced at some 3210 * point by reading less and offsetting the register index in the 3211 * SO_DECLs. 3212 */ 3213 sol.Stream0VertexReadOffset = urb_entry_read_offset; 3214 sol.Stream0VertexReadLength = urb_entry_read_length - 1; 3215 sol.Stream1VertexReadOffset = urb_entry_read_offset; 3216 sol.Stream1VertexReadLength = urb_entry_read_length - 1; 3217 sol.Stream2VertexReadOffset = urb_entry_read_offset; 3218 sol.Stream2VertexReadLength = urb_entry_read_length - 1; 3219 sol.Stream3VertexReadOffset = urb_entry_read_offset; 3220 sol.Stream3VertexReadLength = urb_entry_read_length - 1; 3221 3222 /* Set buffer pitches; 0 means unbound. */ 3223 sol.Buffer0SurfacePitch = 4 * info->stride[0]; 3224 sol.Buffer1SurfacePitch = 4 * info->stride[1]; 3225 sol.Buffer2SurfacePitch = 4 * info->stride[2]; 3226 sol.Buffer3SurfacePitch = 4 * info->stride[3]; 3227 } 3228 3229 iris_pack_command(GENX(3DSTATE_SO_DECL_LIST), so_decl_map, list) { 3230 list.DWordLength = 3 + 2 * max_decls - 2; 3231 list.StreamtoBufferSelects0 = buffer_mask[0]; 3232 list.StreamtoBufferSelects1 = buffer_mask[1]; 3233 list.StreamtoBufferSelects2 = buffer_mask[2]; 3234 list.StreamtoBufferSelects3 = buffer_mask[3]; 3235 list.NumEntries0 = decls[0]; 3236 list.NumEntries1 = decls[1]; 3237 list.NumEntries2 = decls[2]; 3238 list.NumEntries3 = decls[3]; 3239 } 3240 3241 for (int i = 0; i < max_decls; i++) { 3242 iris_pack_state(GENX(SO_DECL_ENTRY), so_decl_map + 3 + i * 2, entry) { 3243 entry.Stream0Decl = so_decl[0][i]; 3244 entry.Stream1Decl = so_decl[1][i]; 3245 entry.Stream2Decl = so_decl[2][i]; 3246 entry.Stream3Decl = so_decl[3][i]; 3247 } 3248 } 3249 3250 return map; 3251} 3252 3253static void 3254iris_compute_sbe_urb_read_interval(uint64_t fs_input_slots, 3255 const struct brw_vue_map *last_vue_map, 3256 bool two_sided_color, 3257 unsigned *out_offset, 3258 unsigned *out_length) 3259{ 3260 /* The compiler computes the first URB slot without considering COL/BFC 3261 * swizzling (because it doesn't know whether it's enabled), so we need 3262 * to do that here too. This may result in a smaller offset, which 3263 * should be safe. 3264 */ 3265 const unsigned first_slot = 3266 brw_compute_first_urb_slot_required(fs_input_slots, last_vue_map); 3267 3268 /* This becomes the URB read offset (counted in pairs of slots). */ 3269 assert(first_slot % 2 == 0); 3270 *out_offset = first_slot / 2; 3271 3272 /* We need to adjust the inputs read to account for front/back color 3273 * swizzling, as it can make the URB length longer. 3274 */ 3275 for (int c = 0; c <= 1; c++) { 3276 if (fs_input_slots & (VARYING_BIT_COL0 << c)) { 3277 /* If two sided color is enabled, the fragment shader's gl_Color 3278 * (COL0) input comes from either the gl_FrontColor (COL0) or 3279 * gl_BackColor (BFC0) input varyings. Mark BFC as used, too. 3280 */ 3281 if (two_sided_color) 3282 fs_input_slots |= (VARYING_BIT_BFC0 << c); 3283 3284 /* If front color isn't written, we opt to give them back color 3285 * instead of an undefined value. Switch from COL to BFC. 3286 */ 3287 if (last_vue_map->varying_to_slot[VARYING_SLOT_COL0 + c] == -1) { 3288 fs_input_slots &= ~(VARYING_BIT_COL0 << c); 3289 fs_input_slots |= (VARYING_BIT_BFC0 << c); 3290 } 3291 } 3292 } 3293 3294 /* Compute the minimum URB Read Length necessary for the FS inputs. 3295 * 3296 * From the Sandy Bridge PRM, Volume 2, Part 1, documentation for 3297 * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length": 3298 * 3299 * "This field should be set to the minimum length required to read the 3300 * maximum source attribute. The maximum source attribute is indicated 3301 * by the maximum value of the enabled Attribute # Source Attribute if 3302 * Attribute Swizzle Enable is set, Number of Output Attributes-1 if 3303 * enable is not set. 3304 * read_length = ceiling((max_source_attr + 1) / 2) 3305 * 3306 * [errata] Corruption/Hang possible if length programmed larger than 3307 * recommended" 3308 * 3309 * Similar text exists for Ivy Bridge. 3310 * 3311 * We find the last URB slot that's actually read by the FS. 3312 */ 3313 unsigned last_read_slot = last_vue_map->num_slots - 1; 3314 while (last_read_slot > first_slot && !(fs_input_slots & 3315 (1ull << last_vue_map->slot_to_varying[last_read_slot]))) 3316 --last_read_slot; 3317 3318 /* The URB read length is the difference of the two, counted in pairs. */ 3319 *out_length = DIV_ROUND_UP(last_read_slot - first_slot + 1, 2); 3320} 3321 3322static void 3323iris_emit_sbe_swiz(struct iris_batch *batch, 3324 const struct iris_context *ice, 3325 unsigned urb_read_offset, 3326 unsigned sprite_coord_enables) 3327{ 3328 struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attr_overrides[16] = {}; 3329 const struct brw_wm_prog_data *wm_prog_data = (void *) 3330 ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data; 3331 const struct brw_vue_map *vue_map = ice->shaders.last_vue_map; 3332 const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; 3333 3334 /* XXX: this should be generated when putting programs in place */ 3335 3336 for (int fs_attr = 0; fs_attr < VARYING_SLOT_MAX; fs_attr++) { 3337 const int input_index = wm_prog_data->urb_setup[fs_attr]; 3338 if (input_index < 0 || input_index >= 16) 3339 continue; 3340 3341 struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr = 3342 &attr_overrides[input_index]; 3343 int slot = vue_map->varying_to_slot[fs_attr]; 3344 3345 /* Viewport and Layer are stored in the VUE header. We need to override 3346 * them to zero if earlier stages didn't write them, as GL requires that 3347 * they read back as zero when not explicitly set. 3348 */ 3349 switch (fs_attr) { 3350 case VARYING_SLOT_VIEWPORT: 3351 case VARYING_SLOT_LAYER: 3352 attr->ComponentOverrideX = true; 3353 attr->ComponentOverrideW = true; 3354 attr->ConstantSource = CONST_0000; 3355 3356 if (!(vue_map->slots_valid & VARYING_BIT_LAYER)) 3357 attr->ComponentOverrideY = true; 3358 if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT)) 3359 attr->ComponentOverrideZ = true; 3360 continue; 3361 3362 case VARYING_SLOT_PRIMITIVE_ID: 3363 /* Override if the previous shader stage didn't write gl_PrimitiveID. */ 3364 if (slot == -1) { 3365 attr->ComponentOverrideX = true; 3366 attr->ComponentOverrideY = true; 3367 attr->ComponentOverrideZ = true; 3368 attr->ComponentOverrideW = true; 3369 attr->ConstantSource = PRIM_ID; 3370 continue; 3371 } 3372 3373 default: 3374 break; 3375 } 3376 3377 if (sprite_coord_enables & (1 << input_index)) 3378 continue; 3379 3380 /* If there was only a back color written but not front, use back 3381 * as the color instead of undefined. 3382 */ 3383 if (slot == -1 && fs_attr == VARYING_SLOT_COL0) 3384 slot = vue_map->varying_to_slot[VARYING_SLOT_BFC0]; 3385 if (slot == -1 && fs_attr == VARYING_SLOT_COL1) 3386 slot = vue_map->varying_to_slot[VARYING_SLOT_BFC1]; 3387 3388 /* Not written by the previous stage - undefined. */ 3389 if (slot == -1) { 3390 attr->ComponentOverrideX = true; 3391 attr->ComponentOverrideY = true; 3392 attr->ComponentOverrideZ = true; 3393 attr->ComponentOverrideW = true; 3394 attr->ConstantSource = CONST_0001_FLOAT; 3395 continue; 3396 } 3397 3398 /* Compute the location of the attribute relative to the read offset, 3399 * which is counted in 256-bit increments (two 128-bit VUE slots). 3400 */ 3401 const int source_attr = slot - 2 * urb_read_offset; 3402 assert(source_attr >= 0 && source_attr <= 32); 3403 attr->SourceAttribute = source_attr; 3404 3405 /* If we are doing two-sided color, and the VUE slot following this one 3406 * represents a back-facing color, then we need to instruct the SF unit 3407 * to do back-facing swizzling. 3408 */ 3409 if (cso_rast->light_twoside && 3410 ((vue_map->slot_to_varying[slot] == VARYING_SLOT_COL0 && 3411 vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC0) || 3412 (vue_map->slot_to_varying[slot] == VARYING_SLOT_COL1 && 3413 vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC1))) 3414 attr->SwizzleSelect = INPUTATTR_FACING; 3415 } 3416 3417 iris_emit_cmd(batch, GENX(3DSTATE_SBE_SWIZ), sbes) { 3418 for (int i = 0; i < 16; i++) 3419 sbes.Attribute[i] = attr_overrides[i]; 3420 } 3421} 3422 3423static unsigned 3424iris_calculate_point_sprite_overrides(const struct brw_wm_prog_data *prog_data, 3425 const struct iris_rasterizer_state *cso) 3426{ 3427 unsigned overrides = 0; 3428 3429 if (prog_data->urb_setup[VARYING_SLOT_PNTC] != -1) 3430 overrides |= 1 << prog_data->urb_setup[VARYING_SLOT_PNTC]; 3431 3432 for (int i = 0; i < 8; i++) { 3433 if ((cso->sprite_coord_enable & (1 << i)) && 3434 prog_data->urb_setup[VARYING_SLOT_TEX0 + i] != -1) 3435 overrides |= 1 << prog_data->urb_setup[VARYING_SLOT_TEX0 + i]; 3436 } 3437 3438 return overrides; 3439} 3440 3441static void 3442iris_emit_sbe(struct iris_batch *batch, const struct iris_context *ice) 3443{ 3444 const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; 3445 const struct brw_wm_prog_data *wm_prog_data = (void *) 3446 ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data; 3447 const struct shader_info *fs_info = 3448 iris_get_shader_info(ice, MESA_SHADER_FRAGMENT); 3449 3450 unsigned urb_read_offset, urb_read_length; 3451 iris_compute_sbe_urb_read_interval(fs_info->inputs_read, 3452 ice->shaders.last_vue_map, 3453 cso_rast->light_twoside, 3454 &urb_read_offset, &urb_read_length); 3455 3456 unsigned sprite_coord_overrides = 3457 iris_calculate_point_sprite_overrides(wm_prog_data, cso_rast); 3458 3459 iris_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) { 3460 sbe.AttributeSwizzleEnable = true; 3461 sbe.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs; 3462 sbe.PointSpriteTextureCoordinateOrigin = cso_rast->sprite_coord_mode; 3463 sbe.VertexURBEntryReadOffset = urb_read_offset; 3464 sbe.VertexURBEntryReadLength = urb_read_length; 3465 sbe.ForceVertexURBEntryReadOffset = true; 3466 sbe.ForceVertexURBEntryReadLength = true; 3467 sbe.ConstantInterpolationEnable = wm_prog_data->flat_inputs; 3468 sbe.PointSpriteTextureCoordinateEnable = sprite_coord_overrides; 3469#if GEN_GEN >= 9 3470 for (int i = 0; i < 32; i++) { 3471 sbe.AttributeActiveComponentFormat[i] = ACTIVE_COMPONENT_XYZW; 3472 } 3473#endif 3474 } 3475 3476 iris_emit_sbe_swiz(batch, ice, urb_read_offset, sprite_coord_overrides); 3477} 3478 3479/* ------------------------------------------------------------------- */ 3480 3481/** 3482 * Populate VS program key fields based on the current state. 3483 */ 3484static void 3485iris_populate_vs_key(const struct iris_context *ice, 3486 const struct shader_info *info, 3487 struct brw_vs_prog_key *key) 3488{ 3489 const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; 3490 3491 if (info->clip_distance_array_size == 0 && 3492 (info->outputs_written & (VARYING_BIT_POS | VARYING_BIT_CLIP_VERTEX))) 3493 key->nr_userclip_plane_consts = cso_rast->num_clip_plane_consts; 3494} 3495 3496/** 3497 * Populate TCS program key fields based on the current state. 3498 */ 3499static void 3500iris_populate_tcs_key(const struct iris_context *ice, 3501 struct brw_tcs_prog_key *key) 3502{ 3503} 3504 3505/** 3506 * Populate TES program key fields based on the current state. 3507 */ 3508static void 3509iris_populate_tes_key(const struct iris_context *ice, 3510 struct brw_tes_prog_key *key) 3511{ 3512} 3513 3514/** 3515 * Populate GS program key fields based on the current state. 3516 */ 3517static void 3518iris_populate_gs_key(const struct iris_context *ice, 3519 struct brw_gs_prog_key *key) 3520{ 3521} 3522 3523/** 3524 * Populate FS program key fields based on the current state. 3525 */ 3526static void 3527iris_populate_fs_key(const struct iris_context *ice, 3528 struct brw_wm_prog_key *key) 3529{ 3530 struct iris_screen *screen = (void *) ice->ctx.screen; 3531 const struct pipe_framebuffer_state *fb = &ice->state.framebuffer; 3532 const struct iris_depth_stencil_alpha_state *zsa = ice->state.cso_zsa; 3533 const struct iris_rasterizer_state *rast = ice->state.cso_rast; 3534 const struct iris_blend_state *blend = ice->state.cso_blend; 3535 3536 key->nr_color_regions = fb->nr_cbufs; 3537 3538 key->clamp_fragment_color = rast->clamp_fragment_color; 3539 3540 key->alpha_to_coverage = blend->alpha_to_coverage; 3541 3542 key->alpha_test_replicate_alpha = fb->nr_cbufs > 1 && zsa->alpha.enabled; 3543 3544 /* XXX: only bother if COL0/1 are read */ 3545 key->flat_shade = rast->flatshade; 3546 3547 key->persample_interp = rast->force_persample_interp; 3548 key->multisample_fbo = rast->multisample && fb->samples > 1; 3549 3550 key->coherent_fb_fetch = true; 3551 3552 key->force_dual_color_blend = 3553 screen->driconf.dual_color_blend_by_location && 3554 (blend->blend_enables & 1) && blend->dual_color_blending; 3555 3556 /* TODO: support key->force_dual_color_blend for Unigine */ 3557 /* TODO: Respect glHint for key->high_quality_derivatives */ 3558} 3559 3560static void 3561iris_populate_cs_key(const struct iris_context *ice, 3562 struct brw_cs_prog_key *key) 3563{ 3564} 3565 3566static uint64_t 3567KSP(const struct iris_compiled_shader *shader) 3568{ 3569 struct iris_resource *res = (void *) shader->assembly.res; 3570 return iris_bo_offset_from_base_address(res->bo) + shader->assembly.offset; 3571} 3572 3573/* Gen11 workaround table #2056 WABTPPrefetchDisable suggests to disable 3574 * prefetching of binding tables in A0 and B0 steppings. XXX: Revisit 3575 * this WA on C0 stepping. 3576 * 3577 * TODO: Fill out SamplerCount for prefetching? 3578 */ 3579 3580#define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix, stage) \ 3581 pkt.KernelStartPointer = KSP(shader); \ 3582 pkt.BindingTableEntryCount = GEN_GEN == 11 ? 0 : \ 3583 prog_data->binding_table.size_bytes / 4; \ 3584 pkt.FloatingPointMode = prog_data->use_alt_mode; \ 3585 \ 3586 pkt.DispatchGRFStartRegisterForURBData = \ 3587 prog_data->dispatch_grf_start_reg; \ 3588 pkt.prefix##URBEntryReadLength = vue_prog_data->urb_read_length; \ 3589 pkt.prefix##URBEntryReadOffset = 0; \ 3590 \ 3591 pkt.StatisticsEnable = true; \ 3592 pkt.Enable = true; \ 3593 \ 3594 if (prog_data->total_scratch) { \ 3595 struct iris_bo *bo = \ 3596 iris_get_scratch_space(ice, prog_data->total_scratch, stage); \ 3597 uint32_t scratch_addr = bo->gtt_offset; \ 3598 pkt.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11; \ 3599 pkt.ScratchSpaceBasePointer = rw_bo(NULL, scratch_addr); \ 3600 } 3601 3602/** 3603 * Encode most of 3DSTATE_VS based on the compiled shader. 3604 */ 3605static void 3606iris_store_vs_state(struct iris_context *ice, 3607 const struct gen_device_info *devinfo, 3608 struct iris_compiled_shader *shader) 3609{ 3610 struct brw_stage_prog_data *prog_data = shader->prog_data; 3611 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; 3612 3613 iris_pack_command(GENX(3DSTATE_VS), shader->derived_data, vs) { 3614 INIT_THREAD_DISPATCH_FIELDS(vs, Vertex, MESA_SHADER_VERTEX); 3615 vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1; 3616 vs.SIMD8DispatchEnable = true; 3617 vs.UserClipDistanceCullTestEnableBitmask = 3618 vue_prog_data->cull_distance_mask; 3619 } 3620} 3621 3622/** 3623 * Encode most of 3DSTATE_HS based on the compiled shader. 3624 */ 3625static void 3626iris_store_tcs_state(struct iris_context *ice, 3627 const struct gen_device_info *devinfo, 3628 struct iris_compiled_shader *shader) 3629{ 3630 struct brw_stage_prog_data *prog_data = shader->prog_data; 3631 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; 3632 struct brw_tcs_prog_data *tcs_prog_data = (void *) prog_data; 3633 3634 iris_pack_command(GENX(3DSTATE_HS), shader->derived_data, hs) { 3635 INIT_THREAD_DISPATCH_FIELDS(hs, Vertex, MESA_SHADER_TESS_CTRL); 3636 3637 hs.InstanceCount = tcs_prog_data->instances - 1; 3638 hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1; 3639 hs.IncludeVertexHandles = true; 3640 } 3641} 3642 3643/** 3644 * Encode 3DSTATE_TE and most of 3DSTATE_DS based on the compiled shader. 3645 */ 3646static void 3647iris_store_tes_state(struct iris_context *ice, 3648 const struct gen_device_info *devinfo, 3649 struct iris_compiled_shader *shader) 3650{ 3651 struct brw_stage_prog_data *prog_data = shader->prog_data; 3652 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; 3653 struct brw_tes_prog_data *tes_prog_data = (void *) prog_data; 3654 3655 uint32_t *te_state = (void *) shader->derived_data; 3656 uint32_t *ds_state = te_state + GENX(3DSTATE_TE_length); 3657 3658 iris_pack_command(GENX(3DSTATE_TE), te_state, te) { 3659 te.Partitioning = tes_prog_data->partitioning; 3660 te.OutputTopology = tes_prog_data->output_topology; 3661 te.TEDomain = tes_prog_data->domain; 3662 te.TEEnable = true; 3663 te.MaximumTessellationFactorOdd = 63.0; 3664 te.MaximumTessellationFactorNotOdd = 64.0; 3665 } 3666 3667 iris_pack_command(GENX(3DSTATE_DS), ds_state, ds) { 3668 INIT_THREAD_DISPATCH_FIELDS(ds, Patch, MESA_SHADER_TESS_EVAL); 3669 3670 ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH; 3671 ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1; 3672 ds.ComputeWCoordinateEnable = 3673 tes_prog_data->domain == BRW_TESS_DOMAIN_TRI; 3674 3675 ds.UserClipDistanceCullTestEnableBitmask = 3676 vue_prog_data->cull_distance_mask; 3677 } 3678 3679} 3680 3681/** 3682 * Encode most of 3DSTATE_GS based on the compiled shader. 3683 */ 3684static void 3685iris_store_gs_state(struct iris_context *ice, 3686 const struct gen_device_info *devinfo, 3687 struct iris_compiled_shader *shader) 3688{ 3689 struct brw_stage_prog_data *prog_data = shader->prog_data; 3690 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; 3691 struct brw_gs_prog_data *gs_prog_data = (void *) prog_data; 3692 3693 iris_pack_command(GENX(3DSTATE_GS), shader->derived_data, gs) { 3694 INIT_THREAD_DISPATCH_FIELDS(gs, Vertex, MESA_SHADER_GEOMETRY); 3695 3696 gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1; 3697 gs.OutputTopology = gs_prog_data->output_topology; 3698 gs.ControlDataHeaderSize = 3699 gs_prog_data->control_data_header_size_hwords; 3700 gs.InstanceControl = gs_prog_data->invocations - 1; 3701 gs.DispatchMode = DISPATCH_MODE_SIMD8; 3702 gs.IncludePrimitiveID = gs_prog_data->include_primitive_id; 3703 gs.ControlDataFormat = gs_prog_data->control_data_format; 3704 gs.ReorderMode = TRAILING; 3705 gs.ExpectedVertexCount = gs_prog_data->vertices_in; 3706 gs.MaximumNumberofThreads = 3707 GEN_GEN == 8 ? (devinfo->max_gs_threads / 2 - 1) 3708 : (devinfo->max_gs_threads - 1); 3709 3710 if (gs_prog_data->static_vertex_count != -1) { 3711 gs.StaticOutput = true; 3712 gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count; 3713 } 3714 gs.IncludeVertexHandles = vue_prog_data->include_vue_handles; 3715 3716 gs.UserClipDistanceCullTestEnableBitmask = 3717 vue_prog_data->cull_distance_mask; 3718 3719 const int urb_entry_write_offset = 1; 3720 const uint32_t urb_entry_output_length = 3721 DIV_ROUND_UP(vue_prog_data->vue_map.num_slots, 2) - 3722 urb_entry_write_offset; 3723 3724 gs.VertexURBEntryOutputReadOffset = urb_entry_write_offset; 3725 gs.VertexURBEntryOutputLength = MAX2(urb_entry_output_length, 1); 3726 } 3727} 3728 3729/** 3730 * Encode most of 3DSTATE_PS and 3DSTATE_PS_EXTRA based on the shader. 3731 */ 3732static void 3733iris_store_fs_state(struct iris_context *ice, 3734 const struct gen_device_info *devinfo, 3735 struct iris_compiled_shader *shader) 3736{ 3737 struct brw_stage_prog_data *prog_data = shader->prog_data; 3738 struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data; 3739 3740 uint32_t *ps_state = (void *) shader->derived_data; 3741 uint32_t *psx_state = ps_state + GENX(3DSTATE_PS_length); 3742 3743 iris_pack_command(GENX(3DSTATE_PS), ps_state, ps) { 3744 ps.VectorMaskEnable = true; 3745 // XXX: WABTPPrefetchDisable, see above, drop at C0 3746 ps.BindingTableEntryCount = GEN_GEN == 11 ? 0 : 3747 prog_data->binding_table.size_bytes / 4; 3748 ps.FloatingPointMode = prog_data->use_alt_mode; 3749 ps.MaximumNumberofThreadsPerPSD = 64 - (GEN_GEN == 8 ? 2 : 1); 3750 3751 ps.PushConstantEnable = prog_data->ubo_ranges[0].length > 0; 3752 3753 /* From the documentation for this packet: 3754 * "If the PS kernel does not need the Position XY Offsets to 3755 * compute a Position Value, then this field should be programmed 3756 * to POSOFFSET_NONE." 3757 * 3758 * "SW Recommendation: If the PS kernel needs the Position Offsets 3759 * to compute a Position XY value, this field should match Position 3760 * ZW Interpolation Mode to ensure a consistent position.xyzw 3761 * computation." 3762 * 3763 * We only require XY sample offsets. So, this recommendation doesn't 3764 * look useful at the moment. We might need this in future. 3765 */ 3766 ps.PositionXYOffsetSelect = 3767 wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE; 3768 ps._8PixelDispatchEnable = wm_prog_data->dispatch_8; 3769 ps._16PixelDispatchEnable = wm_prog_data->dispatch_16; 3770 ps._32PixelDispatchEnable = wm_prog_data->dispatch_32; 3771 3772 // XXX: Disable SIMD32 with 16x MSAA 3773 3774 ps.DispatchGRFStartRegisterForConstantSetupData0 = 3775 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0); 3776 ps.DispatchGRFStartRegisterForConstantSetupData1 = 3777 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1); 3778 ps.DispatchGRFStartRegisterForConstantSetupData2 = 3779 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2); 3780 3781 ps.KernelStartPointer0 = 3782 KSP(shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0); 3783 ps.KernelStartPointer1 = 3784 KSP(shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1); 3785 ps.KernelStartPointer2 = 3786 KSP(shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2); 3787 3788 if (prog_data->total_scratch) { 3789 struct iris_bo *bo = 3790 iris_get_scratch_space(ice, prog_data->total_scratch, 3791 MESA_SHADER_FRAGMENT); 3792 uint32_t scratch_addr = bo->gtt_offset; 3793 ps.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11; 3794 ps.ScratchSpaceBasePointer = rw_bo(NULL, scratch_addr); 3795 } 3796 } 3797 3798 iris_pack_command(GENX(3DSTATE_PS_EXTRA), psx_state, psx) { 3799 psx.PixelShaderValid = true; 3800 psx.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode; 3801 psx.PixelShaderKillsPixel = wm_prog_data->uses_kill; 3802 psx.AttributeEnable = wm_prog_data->num_varying_inputs != 0; 3803 psx.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; 3804 psx.PixelShaderUsesSourceW = wm_prog_data->uses_src_w; 3805 psx.PixelShaderIsPerSample = wm_prog_data->persample_dispatch; 3806 psx.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask; 3807 3808#if GEN_GEN >= 9 3809 psx.PixelShaderPullsBary = wm_prog_data->pulls_bary; 3810 psx.PixelShaderComputesStencil = wm_prog_data->computed_stencil; 3811#else 3812 psx.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask; 3813#endif 3814 // XXX: UAV bit 3815 } 3816} 3817 3818/** 3819 * Compute the size of the derived data (shader command packets). 3820 * 3821 * This must match the data written by the iris_store_xs_state() functions. 3822 */ 3823static void 3824iris_store_cs_state(struct iris_context *ice, 3825 const struct gen_device_info *devinfo, 3826 struct iris_compiled_shader *shader) 3827{ 3828 struct brw_stage_prog_data *prog_data = shader->prog_data; 3829 struct brw_cs_prog_data *cs_prog_data = (void *) shader->prog_data; 3830 void *map = shader->derived_data; 3831 3832 iris_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), map, desc) { 3833 desc.KernelStartPointer = KSP(shader); 3834 desc.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs; 3835 desc.NumberofThreadsinGPGPUThreadGroup = cs_prog_data->threads; 3836 desc.SharedLocalMemorySize = 3837 encode_slm_size(GEN_GEN, prog_data->total_shared); 3838 desc.BarrierEnable = cs_prog_data->uses_barrier; 3839 desc.CrossThreadConstantDataReadLength = 3840 cs_prog_data->push.cross_thread.regs; 3841 } 3842} 3843 3844static unsigned 3845iris_derived_program_state_size(enum iris_program_cache_id cache_id) 3846{ 3847 assert(cache_id <= IRIS_CACHE_BLORP); 3848 3849 static const unsigned dwords[] = { 3850 [IRIS_CACHE_VS] = GENX(3DSTATE_VS_length), 3851 [IRIS_CACHE_TCS] = GENX(3DSTATE_HS_length), 3852 [IRIS_CACHE_TES] = GENX(3DSTATE_TE_length) + GENX(3DSTATE_DS_length), 3853 [IRIS_CACHE_GS] = GENX(3DSTATE_GS_length), 3854 [IRIS_CACHE_FS] = 3855 GENX(3DSTATE_PS_length) + GENX(3DSTATE_PS_EXTRA_length), 3856 [IRIS_CACHE_CS] = GENX(INTERFACE_DESCRIPTOR_DATA_length), 3857 [IRIS_CACHE_BLORP] = 0, 3858 }; 3859 3860 return sizeof(uint32_t) * dwords[cache_id]; 3861} 3862 3863/** 3864 * Create any state packets corresponding to the given shader stage 3865 * (i.e. 3DSTATE_VS) and save them as "derived data" in the shader variant. 3866 * This means that we can look up a program in the in-memory cache and 3867 * get most of the state packet without having to reconstruct it. 3868 */ 3869static void 3870iris_store_derived_program_state(struct iris_context *ice, 3871 enum iris_program_cache_id cache_id, 3872 struct iris_compiled_shader *shader) 3873{ 3874 struct iris_screen *screen = (void *) ice->ctx.screen; 3875 const struct gen_device_info *devinfo = &screen->devinfo; 3876 3877 switch (cache_id) { 3878 case IRIS_CACHE_VS: 3879 iris_store_vs_state(ice, devinfo, shader); 3880 break; 3881 case IRIS_CACHE_TCS: 3882 iris_store_tcs_state(ice, devinfo, shader); 3883 break; 3884 case IRIS_CACHE_TES: 3885 iris_store_tes_state(ice, devinfo, shader); 3886 break; 3887 case IRIS_CACHE_GS: 3888 iris_store_gs_state(ice, devinfo, shader); 3889 break; 3890 case IRIS_CACHE_FS: 3891 iris_store_fs_state(ice, devinfo, shader); 3892 break; 3893 case IRIS_CACHE_CS: 3894 iris_store_cs_state(ice, devinfo, shader); 3895 case IRIS_CACHE_BLORP: 3896 break; 3897 default: 3898 break; 3899 } 3900} 3901 3902/* ------------------------------------------------------------------- */ 3903 3904static const uint32_t push_constant_opcodes[] = { 3905 [MESA_SHADER_VERTEX] = 21, 3906 [MESA_SHADER_TESS_CTRL] = 25, /* HS */ 3907 [MESA_SHADER_TESS_EVAL] = 26, /* DS */ 3908 [MESA_SHADER_GEOMETRY] = 22, 3909 [MESA_SHADER_FRAGMENT] = 23, 3910 [MESA_SHADER_COMPUTE] = 0, 3911}; 3912 3913static uint32_t 3914use_null_surface(struct iris_batch *batch, struct iris_context *ice) 3915{ 3916 struct iris_bo *state_bo = iris_resource_bo(ice->state.unbound_tex.res); 3917 3918 iris_use_pinned_bo(batch, state_bo, false); 3919 3920 return ice->state.unbound_tex.offset; 3921} 3922 3923static uint32_t 3924use_null_fb_surface(struct iris_batch *batch, struct iris_context *ice) 3925{ 3926 /* If set_framebuffer_state() was never called, fall back to 1x1x1 */ 3927 if (!ice->state.null_fb.res) 3928 return use_null_surface(batch, ice); 3929 3930 struct iris_bo *state_bo = iris_resource_bo(ice->state.null_fb.res); 3931 3932 iris_use_pinned_bo(batch, state_bo, false); 3933 3934 return ice->state.null_fb.offset; 3935} 3936 3937static uint32_t 3938surf_state_offset_for_aux(struct iris_resource *res, 3939 unsigned aux_modes, 3940 enum isl_aux_usage aux_usage) 3941{ 3942 return SURFACE_STATE_ALIGNMENT * 3943 util_bitcount(aux_modes & ((1 << aux_usage) - 1)); 3944} 3945 3946static void 3947surf_state_update_clear_value(struct iris_batch *batch, 3948 struct iris_resource *res, 3949 struct iris_state_ref *state, 3950 unsigned aux_modes, 3951 enum isl_aux_usage aux_usage) 3952{ 3953 struct isl_device *isl_dev = &batch->screen->isl_dev; 3954 struct iris_bo *state_bo = iris_resource_bo(state->res); 3955 uint64_t real_offset = state->offset + 3956 IRIS_MEMZONE_BINDER_START; 3957 uint32_t offset_into_bo = real_offset - state_bo->gtt_offset; 3958 uint32_t clear_offset = offset_into_bo + 3959 isl_dev->ss.clear_value_offset + 3960 surf_state_offset_for_aux(res, aux_modes, aux_usage); 3961 3962 batch->vtbl->copy_mem_mem(batch, state_bo, clear_offset, 3963 res->aux.clear_color_bo, 3964 res->aux.clear_color_offset, 3965 isl_dev->ss.clear_value_size); 3966} 3967 3968static void 3969update_clear_value(struct iris_context *ice, 3970 struct iris_batch *batch, 3971 struct iris_resource *res, 3972 struct iris_state_ref *state, 3973 unsigned all_aux_modes, 3974 struct isl_view *view) 3975{ 3976 struct iris_screen *screen = batch->screen; 3977 const struct gen_device_info *devinfo = &screen->devinfo; 3978 UNUSED unsigned aux_modes = all_aux_modes; 3979 3980 /* We only need to update the clear color in the surface state for gen8 and 3981 * gen9. Newer gens can read it directly from the clear color state buffer. 3982 */ 3983 if (devinfo->gen > 9) 3984 return; 3985 3986 if (devinfo->gen == 9) { 3987 /* Skip updating the ISL_AUX_USAGE_NONE surface state */ 3988 aux_modes &= ~(1 << ISL_AUX_USAGE_NONE); 3989 3990 while (aux_modes) { 3991 enum isl_aux_usage aux_usage = u_bit_scan(&aux_modes); 3992 3993 surf_state_update_clear_value(batch, res, state, all_aux_modes, 3994 aux_usage); 3995 } 3996 } else if (devinfo->gen == 8) { 3997 pipe_resource_reference(&state->res, NULL); 3998 void *map = alloc_surface_states(ice->state.surface_uploader, 3999 state, all_aux_modes); 4000 while (aux_modes) { 4001 enum isl_aux_usage aux_usage = u_bit_scan(&aux_modes); 4002 fill_surface_state(&screen->isl_dev, map, res, view, aux_usage); 4003 map += SURFACE_STATE_ALIGNMENT; 4004 } 4005 } 4006} 4007 4008/** 4009 * Add a surface to the validation list, as well as the buffer containing 4010 * the corresponding SURFACE_STATE. 4011 * 4012 * Returns the binding table entry (offset to SURFACE_STATE). 4013 */ 4014static uint32_t 4015use_surface(struct iris_context *ice, 4016 struct iris_batch *batch, 4017 struct pipe_surface *p_surf, 4018 bool writeable, 4019 enum isl_aux_usage aux_usage) 4020{ 4021 struct iris_surface *surf = (void *) p_surf; 4022 struct iris_resource *res = (void *) p_surf->texture; 4023 4024 iris_use_pinned_bo(batch, iris_resource_bo(p_surf->texture), writeable); 4025 iris_use_pinned_bo(batch, iris_resource_bo(surf->surface_state.res), false); 4026 4027 if (res->aux.bo) { 4028 iris_use_pinned_bo(batch, res->aux.bo, writeable); 4029 if (res->aux.clear_color_bo) 4030 iris_use_pinned_bo(batch, res->aux.clear_color_bo, false); 4031 4032 if (memcmp(&res->aux.clear_color, &surf->clear_color, 4033 sizeof(surf->clear_color)) != 0) { 4034 update_clear_value(ice, batch, res, &surf->surface_state, 4035 res->aux.possible_usages, &surf->view); 4036 surf->clear_color = res->aux.clear_color; 4037 } 4038 } 4039 4040 return surf->surface_state.offset + 4041 surf_state_offset_for_aux(res, res->aux.possible_usages, aux_usage); 4042} 4043 4044static uint32_t 4045use_sampler_view(struct iris_context *ice, 4046 struct iris_batch *batch, 4047 struct iris_sampler_view *isv) 4048{ 4049 // XXX: ASTC hacks 4050 enum isl_aux_usage aux_usage = 4051 iris_resource_texture_aux_usage(ice, isv->res, isv->view.format, 0); 4052 4053 iris_use_pinned_bo(batch, isv->res->bo, false); 4054 iris_use_pinned_bo(batch, iris_resource_bo(isv->surface_state.res), false); 4055 4056 if (isv->res->aux.bo) { 4057 iris_use_pinned_bo(batch, isv->res->aux.bo, false); 4058 if (isv->res->aux.clear_color_bo) 4059 iris_use_pinned_bo(batch, isv->res->aux.clear_color_bo, false); 4060 if (memcmp(&isv->res->aux.clear_color, &isv->clear_color, 4061 sizeof(isv->clear_color)) != 0) { 4062 update_clear_value(ice, batch, isv->res, &isv->surface_state, 4063 isv->res->aux.sampler_usages, &isv->view); 4064 isv->clear_color = isv->res->aux.clear_color; 4065 } 4066 } 4067 4068 return isv->surface_state.offset + 4069 surf_state_offset_for_aux(isv->res, isv->res->aux.sampler_usages, 4070 aux_usage); 4071} 4072 4073static uint32_t 4074use_ubo_ssbo(struct iris_batch *batch, 4075 struct iris_context *ice, 4076 struct pipe_shader_buffer *buf, 4077 struct iris_state_ref *surf_state, 4078 bool writable) 4079{ 4080 if (!buf->buffer) 4081 return use_null_surface(batch, ice); 4082 4083 iris_use_pinned_bo(batch, iris_resource_bo(buf->buffer), writable); 4084 iris_use_pinned_bo(batch, iris_resource_bo(surf_state->res), false); 4085 4086 return surf_state->offset; 4087} 4088 4089static uint32_t 4090use_image(struct iris_batch *batch, struct iris_context *ice, 4091 struct iris_shader_state *shs, int i) 4092{ 4093 struct iris_image_view *iv = &shs->image[i]; 4094 struct iris_resource *res = (void *) iv->base.resource; 4095 4096 if (!res) 4097 return use_null_surface(batch, ice); 4098 4099 bool write = iv->base.shader_access & PIPE_IMAGE_ACCESS_WRITE; 4100 4101 iris_use_pinned_bo(batch, res->bo, write); 4102 iris_use_pinned_bo(batch, iris_resource_bo(iv->surface_state.res), false); 4103 4104 if (res->aux.bo) 4105 iris_use_pinned_bo(batch, res->aux.bo, write); 4106 4107 return iv->surface_state.offset; 4108} 4109 4110#define push_bt_entry(addr) \ 4111 assert(addr >= binder_addr); \ 4112 assert(s < prog_data->binding_table.size_bytes / sizeof(uint32_t)); \ 4113 if (!pin_only) bt_map[s++] = (addr) - binder_addr; 4114 4115#define bt_assert(section, exists) \ 4116 if (!pin_only) assert(prog_data->binding_table.section == \ 4117 (exists) ? s : 0xd0d0d0d0) 4118 4119/** 4120 * Populate the binding table for a given shader stage. 4121 * 4122 * This fills out the table of pointers to surfaces required by the shader, 4123 * and also adds those buffers to the validation list so the kernel can make 4124 * resident before running our batch. 4125 */ 4126static void 4127iris_populate_binding_table(struct iris_context *ice, 4128 struct iris_batch *batch, 4129 gl_shader_stage stage, 4130 bool pin_only) 4131{ 4132 const struct iris_binder *binder = &ice->state.binder; 4133 struct iris_compiled_shader *shader = ice->shaders.prog[stage]; 4134 if (!shader) 4135 return; 4136 4137 UNUSED struct brw_stage_prog_data *prog_data = shader->prog_data; 4138 struct iris_shader_state *shs = &ice->state.shaders[stage]; 4139 uint32_t binder_addr = binder->bo->gtt_offset; 4140 4141 //struct brw_stage_prog_data *prog_data = (void *) shader->prog_data; 4142 uint32_t *bt_map = binder->map + binder->bt_offset[stage]; 4143 int s = 0; 4144 4145 const struct shader_info *info = iris_get_shader_info(ice, stage); 4146 if (!info) { 4147 /* TCS passthrough doesn't need a binding table. */ 4148 assert(stage == MESA_SHADER_TESS_CTRL); 4149 return; 4150 } 4151 4152 if (stage == MESA_SHADER_COMPUTE) { 4153 /* surface for gl_NumWorkGroups */ 4154 struct iris_state_ref *grid_data = &ice->state.grid_size; 4155 struct iris_state_ref *grid_state = &ice->state.grid_surf_state; 4156 iris_use_pinned_bo(batch, iris_resource_bo(grid_data->res), false); 4157 iris_use_pinned_bo(batch, iris_resource_bo(grid_state->res), false); 4158 push_bt_entry(grid_state->offset); 4159 } 4160 4161 if (stage == MESA_SHADER_FRAGMENT) { 4162 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 4163 /* Note that cso_fb->nr_cbufs == fs_key->nr_color_regions. */ 4164 if (cso_fb->nr_cbufs) { 4165 for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) { 4166 uint32_t addr; 4167 if (cso_fb->cbufs[i]) { 4168 addr = use_surface(ice, batch, cso_fb->cbufs[i], true, 4169 ice->state.draw_aux_usage[i]); 4170 } else { 4171 addr = use_null_fb_surface(batch, ice); 4172 } 4173 push_bt_entry(addr); 4174 } 4175 } else { 4176 uint32_t addr = use_null_fb_surface(batch, ice); 4177 push_bt_entry(addr); 4178 } 4179 } 4180 4181 unsigned num_textures = util_last_bit(info->textures_used); 4182 4183 bt_assert(texture_start, num_textures > 0); 4184 4185 for (int i = 0; i < num_textures; i++) { 4186 struct iris_sampler_view *view = shs->textures[i]; 4187 uint32_t addr = view ? use_sampler_view(ice, batch, view) 4188 : use_null_surface(batch, ice); 4189 push_bt_entry(addr); 4190 } 4191 4192 bt_assert(image_start, info->num_images > 0); 4193 4194 for (int i = 0; i < info->num_images; i++) { 4195 uint32_t addr = use_image(batch, ice, shs, i); 4196 push_bt_entry(addr); 4197 } 4198 4199 bt_assert(ubo_start, shader->num_cbufs > 0); 4200 4201 for (int i = 0; i < shader->num_cbufs; i++) { 4202 uint32_t addr = use_ubo_ssbo(batch, ice, &shs->constbuf[i], 4203 &shs->constbuf_surf_state[i], false); 4204 push_bt_entry(addr); 4205 } 4206 4207 bt_assert(ssbo_start, info->num_abos + info->num_ssbos > 0); 4208 4209 /* XXX: st is wasting 16 binding table slots for ABOs. Should add a cap 4210 * for changing nir_lower_atomics_to_ssbos setting and buffer_base offset 4211 * in st_atom_storagebuf.c so it'll compact them into one range, with 4212 * SSBOs starting at info->num_abos. Ideally it'd reset num_abos to 0 too 4213 */ 4214 if (info->num_abos + info->num_ssbos > 0) { 4215 for (int i = 0; i < IRIS_MAX_ABOS + info->num_ssbos; i++) { 4216 uint32_t addr = 4217 use_ubo_ssbo(batch, ice, &shs->ssbo[i], &shs->ssbo_surf_state[i], 4218 shs->writable_ssbos & (1u << i)); 4219 push_bt_entry(addr); 4220 } 4221 } 4222 4223#if 0 4224 /* XXX: YUV surfaces not implemented yet */ 4225 bt_assert(plane_start[1], ...); 4226 bt_assert(plane_start[2], ...); 4227#endif 4228} 4229 4230static void 4231iris_use_optional_res(struct iris_batch *batch, 4232 struct pipe_resource *res, 4233 bool writeable) 4234{ 4235 if (res) { 4236 struct iris_bo *bo = iris_resource_bo(res); 4237 iris_use_pinned_bo(batch, bo, writeable); 4238 } 4239} 4240 4241static void 4242pin_depth_and_stencil_buffers(struct iris_batch *batch, 4243 struct pipe_surface *zsbuf, 4244 struct iris_depth_stencil_alpha_state *cso_zsa) 4245{ 4246 if (!zsbuf) 4247 return; 4248 4249 struct iris_resource *zres, *sres; 4250 iris_get_depth_stencil_resources(zsbuf->texture, &zres, &sres); 4251 4252 if (zres) { 4253 iris_use_pinned_bo(batch, zres->bo, cso_zsa->depth_writes_enabled); 4254 if (zres->aux.bo) { 4255 iris_use_pinned_bo(batch, zres->aux.bo, 4256 cso_zsa->depth_writes_enabled); 4257 } 4258 } 4259 4260 if (sres) { 4261 iris_use_pinned_bo(batch, sres->bo, cso_zsa->stencil_writes_enabled); 4262 } 4263} 4264 4265/* ------------------------------------------------------------------- */ 4266 4267/** 4268 * Pin any BOs which were installed by a previous batch, and restored 4269 * via the hardware logical context mechanism. 4270 * 4271 * We don't need to re-emit all state every batch - the hardware context 4272 * mechanism will save and restore it for us. This includes pointers to 4273 * various BOs...which won't exist unless we ask the kernel to pin them 4274 * by adding them to the validation list. 4275 * 4276 * We can skip buffers if we've re-emitted those packets, as we're 4277 * overwriting those stale pointers with new ones, and don't actually 4278 * refer to the old BOs. 4279 */ 4280static void 4281iris_restore_render_saved_bos(struct iris_context *ice, 4282 struct iris_batch *batch, 4283 const struct pipe_draw_info *draw) 4284{ 4285 struct iris_genx_state *genx = ice->state.genx; 4286 4287 const uint64_t clean = ~ice->state.dirty; 4288 4289 if (clean & IRIS_DIRTY_CC_VIEWPORT) { 4290 iris_use_optional_res(batch, ice->state.last_res.cc_vp, false); 4291 } 4292 4293 if (clean & IRIS_DIRTY_SF_CL_VIEWPORT) { 4294 iris_use_optional_res(batch, ice->state.last_res.sf_cl_vp, false); 4295 } 4296 4297 if (clean & IRIS_DIRTY_BLEND_STATE) { 4298 iris_use_optional_res(batch, ice->state.last_res.blend, false); 4299 } 4300 4301 if (clean & IRIS_DIRTY_COLOR_CALC_STATE) { 4302 iris_use_optional_res(batch, ice->state.last_res.color_calc, false); 4303 } 4304 4305 if (clean & IRIS_DIRTY_SCISSOR_RECT) { 4306 iris_use_optional_res(batch, ice->state.last_res.scissor, false); 4307 } 4308 4309 if (ice->state.streamout_active && (clean & IRIS_DIRTY_SO_BUFFERS)) { 4310 for (int i = 0; i < 4; i++) { 4311 struct iris_stream_output_target *tgt = 4312 (void *) ice->state.so_target[i]; 4313 if (tgt) { 4314 iris_use_pinned_bo(batch, iris_resource_bo(tgt->base.buffer), 4315 true); 4316 iris_use_pinned_bo(batch, iris_resource_bo(tgt->offset.res), 4317 true); 4318 } 4319 } 4320 } 4321 4322 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { 4323 if (!(clean & (IRIS_DIRTY_CONSTANTS_VS << stage))) 4324 continue; 4325 4326 struct iris_shader_state *shs = &ice->state.shaders[stage]; 4327 struct iris_compiled_shader *shader = ice->shaders.prog[stage]; 4328 4329 if (!shader) 4330 continue; 4331 4332 struct brw_stage_prog_data *prog_data = (void *) shader->prog_data; 4333 4334 for (int i = 0; i < 4; i++) { 4335 const struct brw_ubo_range *range = &prog_data->ubo_ranges[i]; 4336 4337 if (range->length == 0) 4338 continue; 4339 4340 struct pipe_shader_buffer *cbuf = &shs->constbuf[range->block]; 4341 struct iris_resource *res = (void *) cbuf->buffer; 4342 4343 if (res) 4344 iris_use_pinned_bo(batch, res->bo, false); 4345 else 4346 iris_use_pinned_bo(batch, batch->screen->workaround_bo, false); 4347 } 4348 } 4349 4350 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { 4351 if (clean & (IRIS_DIRTY_BINDINGS_VS << stage)) { 4352 /* Re-pin any buffers referred to by the binding table. */ 4353 iris_populate_binding_table(ice, batch, stage, true); 4354 } 4355 } 4356 4357 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { 4358 struct iris_shader_state *shs = &ice->state.shaders[stage]; 4359 struct pipe_resource *res = shs->sampler_table.res; 4360 if (res) 4361 iris_use_pinned_bo(batch, iris_resource_bo(res), false); 4362 } 4363 4364 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { 4365 if (clean & (IRIS_DIRTY_VS << stage)) { 4366 struct iris_compiled_shader *shader = ice->shaders.prog[stage]; 4367 4368 if (shader) { 4369 struct iris_bo *bo = iris_resource_bo(shader->assembly.res); 4370 iris_use_pinned_bo(batch, bo, false); 4371 4372 struct brw_stage_prog_data *prog_data = shader->prog_data; 4373 4374 if (prog_data->total_scratch > 0) { 4375 struct iris_bo *bo = 4376 iris_get_scratch_space(ice, prog_data->total_scratch, stage); 4377 iris_use_pinned_bo(batch, bo, true); 4378 } 4379 } 4380 } 4381 } 4382 4383 if ((clean & IRIS_DIRTY_DEPTH_BUFFER) && 4384 (clean & IRIS_DIRTY_WM_DEPTH_STENCIL)) { 4385 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 4386 pin_depth_and_stencil_buffers(batch, cso_fb->zsbuf, ice->state.cso_zsa); 4387 } 4388 4389 if (draw->index_size == 0 && ice->state.last_res.index_buffer) { 4390 /* This draw didn't emit a new index buffer, so we are inheriting the 4391 * older index buffer. This draw didn't need it, but future ones may. 4392 */ 4393 struct iris_bo *bo = iris_resource_bo(ice->state.last_res.index_buffer); 4394 iris_use_pinned_bo(batch, bo, false); 4395 } 4396 4397 if (clean & IRIS_DIRTY_VERTEX_BUFFERS) { 4398 uint64_t bound = ice->state.bound_vertex_buffers; 4399 while (bound) { 4400 const int i = u_bit_scan64(&bound); 4401 struct pipe_resource *res = genx->vertex_buffers[i].resource; 4402 iris_use_pinned_bo(batch, iris_resource_bo(res), false); 4403 } 4404 } 4405} 4406 4407static void 4408iris_restore_compute_saved_bos(struct iris_context *ice, 4409 struct iris_batch *batch, 4410 const struct pipe_grid_info *grid) 4411{ 4412 const uint64_t clean = ~ice->state.dirty; 4413 4414 const int stage = MESA_SHADER_COMPUTE; 4415 struct iris_shader_state *shs = &ice->state.shaders[stage]; 4416 4417 if (clean & IRIS_DIRTY_CONSTANTS_CS) { 4418 struct iris_compiled_shader *shader = ice->shaders.prog[stage]; 4419 4420 if (shader) { 4421 struct brw_stage_prog_data *prog_data = (void *) shader->prog_data; 4422 const struct brw_ubo_range *range = &prog_data->ubo_ranges[0]; 4423 4424 if (range->length > 0) { 4425 struct pipe_shader_buffer *cbuf = &shs->constbuf[range->block]; 4426 struct iris_resource *res = (void *) cbuf->buffer; 4427 4428 if (res) 4429 iris_use_pinned_bo(batch, res->bo, false); 4430 else 4431 iris_use_pinned_bo(batch, batch->screen->workaround_bo, false); 4432 } 4433 } 4434 } 4435 4436 if (clean & IRIS_DIRTY_BINDINGS_CS) { 4437 /* Re-pin any buffers referred to by the binding table. */ 4438 iris_populate_binding_table(ice, batch, stage, true); 4439 } 4440 4441 struct pipe_resource *sampler_res = shs->sampler_table.res; 4442 if (sampler_res) 4443 iris_use_pinned_bo(batch, iris_resource_bo(sampler_res), false); 4444 4445 if (clean & IRIS_DIRTY_CS) { 4446 struct iris_compiled_shader *shader = ice->shaders.prog[stage]; 4447 4448 if (shader) { 4449 struct iris_bo *bo = iris_resource_bo(shader->assembly.res); 4450 iris_use_pinned_bo(batch, bo, false); 4451 4452 struct brw_stage_prog_data *prog_data = shader->prog_data; 4453 4454 if (prog_data->total_scratch > 0) { 4455 struct iris_bo *bo = 4456 iris_get_scratch_space(ice, prog_data->total_scratch, stage); 4457 iris_use_pinned_bo(batch, bo, true); 4458 } 4459 } 4460 } 4461} 4462 4463/** 4464 * Possibly emit STATE_BASE_ADDRESS to update Surface State Base Address. 4465 */ 4466static void 4467iris_update_surface_base_address(struct iris_batch *batch, 4468 struct iris_binder *binder) 4469{ 4470 if (batch->last_surface_base_address == binder->bo->gtt_offset) 4471 return; 4472 4473 flush_for_state_base_change(batch); 4474 4475 iris_emit_cmd(batch, GENX(STATE_BASE_ADDRESS), sba) { 4476 sba.SurfaceStateMOCS = MOCS_WB; 4477 sba.SurfaceStateBaseAddressModifyEnable = true; 4478 sba.SurfaceStateBaseAddress = ro_bo(binder->bo, 0); 4479 } 4480 4481 batch->last_surface_base_address = binder->bo->gtt_offset; 4482} 4483 4484static void 4485iris_upload_dirty_render_state(struct iris_context *ice, 4486 struct iris_batch *batch, 4487 const struct pipe_draw_info *draw) 4488{ 4489 const uint64_t dirty = ice->state.dirty; 4490 4491 if (!(dirty & IRIS_ALL_DIRTY_FOR_RENDER)) 4492 return; 4493 4494 struct iris_genx_state *genx = ice->state.genx; 4495 struct iris_binder *binder = &ice->state.binder; 4496 struct brw_wm_prog_data *wm_prog_data = (void *) 4497 ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data; 4498 4499 if (dirty & IRIS_DIRTY_CC_VIEWPORT) { 4500 const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; 4501 uint32_t cc_vp_address; 4502 4503 /* XXX: could avoid streaming for depth_clip [0,1] case. */ 4504 uint32_t *cc_vp_map = 4505 stream_state(batch, ice->state.dynamic_uploader, 4506 &ice->state.last_res.cc_vp, 4507 4 * ice->state.num_viewports * 4508 GENX(CC_VIEWPORT_length), 32, &cc_vp_address); 4509 for (int i = 0; i < ice->state.num_viewports; i++) { 4510 float zmin, zmax; 4511 util_viewport_zmin_zmax(&ice->state.viewports[i], 4512 cso_rast->clip_halfz, &zmin, &zmax); 4513 if (cso_rast->depth_clip_near) 4514 zmin = 0.0; 4515 if (cso_rast->depth_clip_far) 4516 zmax = 1.0; 4517 4518 iris_pack_state(GENX(CC_VIEWPORT), cc_vp_map, ccv) { 4519 ccv.MinimumDepth = zmin; 4520 ccv.MaximumDepth = zmax; 4521 } 4522 4523 cc_vp_map += GENX(CC_VIEWPORT_length); 4524 } 4525 4526 iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), ptr) { 4527 ptr.CCViewportPointer = cc_vp_address; 4528 } 4529 } 4530 4531 if (dirty & IRIS_DIRTY_SF_CL_VIEWPORT) { 4532 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 4533 uint32_t sf_cl_vp_address; 4534 uint32_t *vp_map = 4535 stream_state(batch, ice->state.dynamic_uploader, 4536 &ice->state.last_res.sf_cl_vp, 4537 4 * ice->state.num_viewports * 4538 GENX(SF_CLIP_VIEWPORT_length), 64, &sf_cl_vp_address); 4539 4540 for (unsigned i = 0; i < ice->state.num_viewports; i++) { 4541 const struct pipe_viewport_state *state = &ice->state.viewports[i]; 4542 float gb_xmin, gb_xmax, gb_ymin, gb_ymax; 4543 4544 float vp_xmin = viewport_extent(state, 0, -1.0f); 4545 float vp_xmax = viewport_extent(state, 0, 1.0f); 4546 float vp_ymin = viewport_extent(state, 1, -1.0f); 4547 float vp_ymax = viewport_extent(state, 1, 1.0f); 4548 4549 calculate_guardband_size(cso_fb->width, cso_fb->height, 4550 state->scale[0], state->scale[1], 4551 state->translate[0], state->translate[1], 4552 &gb_xmin, &gb_xmax, &gb_ymin, &gb_ymax); 4553 4554 iris_pack_state(GENX(SF_CLIP_VIEWPORT), vp_map, vp) { 4555 vp.ViewportMatrixElementm00 = state->scale[0]; 4556 vp.ViewportMatrixElementm11 = state->scale[1]; 4557 vp.ViewportMatrixElementm22 = state->scale[2]; 4558 vp.ViewportMatrixElementm30 = state->translate[0]; 4559 vp.ViewportMatrixElementm31 = state->translate[1]; 4560 vp.ViewportMatrixElementm32 = state->translate[2]; 4561 vp.XMinClipGuardband = gb_xmin; 4562 vp.XMaxClipGuardband = gb_xmax; 4563 vp.YMinClipGuardband = gb_ymin; 4564 vp.YMaxClipGuardband = gb_ymax; 4565 vp.XMinViewPort = MAX2(vp_xmin, 0); 4566 vp.XMaxViewPort = MIN2(vp_xmax, cso_fb->width) - 1; 4567 vp.YMinViewPort = MAX2(vp_ymin, 0); 4568 vp.YMaxViewPort = MIN2(vp_ymax, cso_fb->height) - 1; 4569 } 4570 4571 vp_map += GENX(SF_CLIP_VIEWPORT_length); 4572 } 4573 4574 iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), ptr) { 4575 ptr.SFClipViewportPointer = sf_cl_vp_address; 4576 } 4577 } 4578 4579 if (dirty & IRIS_DIRTY_URB) { 4580 unsigned size[4]; 4581 4582 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { 4583 if (!ice->shaders.prog[i]) { 4584 size[i] = 1; 4585 } else { 4586 struct brw_vue_prog_data *vue_prog_data = 4587 (void *) ice->shaders.prog[i]->prog_data; 4588 size[i] = vue_prog_data->urb_entry_size; 4589 } 4590 assert(size[i] != 0); 4591 } 4592 4593 genX(emit_urb_setup)(ice, batch, size, 4594 ice->shaders.prog[MESA_SHADER_TESS_EVAL] != NULL, 4595 ice->shaders.prog[MESA_SHADER_GEOMETRY] != NULL); 4596 } 4597 4598 if (dirty & IRIS_DIRTY_BLEND_STATE) { 4599 struct iris_blend_state *cso_blend = ice->state.cso_blend; 4600 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 4601 struct iris_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa; 4602 const int header_dwords = GENX(BLEND_STATE_length); 4603 4604 /* Always write at least one BLEND_STATE - the final RT message will 4605 * reference BLEND_STATE[0] even if there aren't color writes. There 4606 * may still be alpha testing, computed depth, and so on. 4607 */ 4608 const int rt_dwords = 4609 MAX2(cso_fb->nr_cbufs, 1) * GENX(BLEND_STATE_ENTRY_length); 4610 4611 uint32_t blend_offset; 4612 uint32_t *blend_map = 4613 stream_state(batch, ice->state.dynamic_uploader, 4614 &ice->state.last_res.blend, 4615 4 * (header_dwords + rt_dwords), 64, &blend_offset); 4616 4617 uint32_t blend_state_header; 4618 iris_pack_state(GENX(BLEND_STATE), &blend_state_header, bs) { 4619 bs.AlphaTestEnable = cso_zsa->alpha.enabled; 4620 bs.AlphaTestFunction = translate_compare_func(cso_zsa->alpha.func); 4621 } 4622 4623 blend_map[0] = blend_state_header | cso_blend->blend_state[0]; 4624 memcpy(&blend_map[1], &cso_blend->blend_state[1], 4 * rt_dwords); 4625 4626 iris_emit_cmd(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), ptr) { 4627 ptr.BlendStatePointer = blend_offset; 4628 ptr.BlendStatePointerValid = true; 4629 } 4630 } 4631 4632 if (dirty & IRIS_DIRTY_COLOR_CALC_STATE) { 4633 struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa; 4634#if GEN_GEN == 8 4635 struct pipe_stencil_ref *p_stencil_refs = &ice->state.stencil_ref; 4636#endif 4637 uint32_t cc_offset; 4638 void *cc_map = 4639 stream_state(batch, ice->state.dynamic_uploader, 4640 &ice->state.last_res.color_calc, 4641 sizeof(uint32_t) * GENX(COLOR_CALC_STATE_length), 4642 64, &cc_offset); 4643 iris_pack_state(GENX(COLOR_CALC_STATE), cc_map, cc) { 4644 cc.AlphaTestFormat = ALPHATEST_FLOAT32; 4645 cc.AlphaReferenceValueAsFLOAT32 = cso->alpha.ref_value; 4646 cc.BlendConstantColorRed = ice->state.blend_color.color[0]; 4647 cc.BlendConstantColorGreen = ice->state.blend_color.color[1]; 4648 cc.BlendConstantColorBlue = ice->state.blend_color.color[2]; 4649 cc.BlendConstantColorAlpha = ice->state.blend_color.color[3]; 4650#if GEN_GEN == 8 4651 cc.StencilReferenceValue = p_stencil_refs->ref_value[0]; 4652 cc.BackfaceStencilReferenceValue = p_stencil_refs->ref_value[1]; 4653#endif 4654 } 4655 iris_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), ptr) { 4656 ptr.ColorCalcStatePointer = cc_offset; 4657 ptr.ColorCalcStatePointerValid = true; 4658 } 4659 } 4660 4661 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { 4662 if (!(dirty & (IRIS_DIRTY_CONSTANTS_VS << stage))) 4663 continue; 4664 4665 struct iris_shader_state *shs = &ice->state.shaders[stage]; 4666 struct iris_compiled_shader *shader = ice->shaders.prog[stage]; 4667 4668 if (!shader) 4669 continue; 4670 4671 if (shs->cbuf0_needs_upload) 4672 upload_uniforms(ice, stage); 4673 4674 struct brw_stage_prog_data *prog_data = (void *) shader->prog_data; 4675 4676 iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_VS), pkt) { 4677 pkt._3DCommandSubOpcode = push_constant_opcodes[stage]; 4678 if (prog_data) { 4679 /* The Skylake PRM contains the following restriction: 4680 * 4681 * "The driver must ensure The following case does not occur 4682 * without a flush to the 3D engine: 3DSTATE_CONSTANT_* with 4683 * buffer 3 read length equal to zero committed followed by a 4684 * 3DSTATE_CONSTANT_* with buffer 0 read length not equal to 4685 * zero committed." 4686 * 4687 * To avoid this, we program the buffers in the highest slots. 4688 * This way, slot 0 is only used if slot 3 is also used. 4689 */ 4690 int n = 3; 4691 4692 for (int i = 3; i >= 0; i--) { 4693 const struct brw_ubo_range *range = &prog_data->ubo_ranges[i]; 4694 4695 if (range->length == 0) 4696 continue; 4697 4698 struct pipe_shader_buffer *cbuf = &shs->constbuf[range->block]; 4699 struct iris_resource *res = (void *) cbuf->buffer; 4700 4701 assert(cbuf->buffer_offset % 32 == 0); 4702 4703 pkt.ConstantBody.ReadLength[n] = range->length; 4704 pkt.ConstantBody.Buffer[n] = 4705 res ? ro_bo(res->bo, range->start * 32 + cbuf->buffer_offset) 4706 : ro_bo(batch->screen->workaround_bo, 0); 4707 n--; 4708 } 4709 } 4710 } 4711 } 4712 4713 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { 4714 if (dirty & (IRIS_DIRTY_BINDINGS_VS << stage)) { 4715 iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), ptr) { 4716 ptr._3DCommandSubOpcode = 38 + stage; 4717 ptr.PointertoVSBindingTable = binder->bt_offset[stage]; 4718 } 4719 } 4720 } 4721 4722 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { 4723 if (dirty & (IRIS_DIRTY_BINDINGS_VS << stage)) { 4724 iris_populate_binding_table(ice, batch, stage, false); 4725 } 4726 } 4727 4728 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { 4729 if (!(dirty & (IRIS_DIRTY_SAMPLER_STATES_VS << stage)) || 4730 !ice->shaders.prog[stage]) 4731 continue; 4732 4733 iris_upload_sampler_states(ice, stage); 4734 4735 struct iris_shader_state *shs = &ice->state.shaders[stage]; 4736 struct pipe_resource *res = shs->sampler_table.res; 4737 if (res) 4738 iris_use_pinned_bo(batch, iris_resource_bo(res), false); 4739 4740 iris_emit_cmd(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS), ptr) { 4741 ptr._3DCommandSubOpcode = 43 + stage; 4742 ptr.PointertoVSSamplerState = shs->sampler_table.offset; 4743 } 4744 } 4745 4746 if (ice->state.need_border_colors) 4747 iris_use_pinned_bo(batch, ice->state.border_color_pool.bo, false); 4748 4749 if (dirty & IRIS_DIRTY_MULTISAMPLE) { 4750 iris_emit_cmd(batch, GENX(3DSTATE_MULTISAMPLE), ms) { 4751 ms.PixelLocation = 4752 ice->state.cso_rast->half_pixel_center ? CENTER : UL_CORNER; 4753 if (ice->state.framebuffer.samples > 0) 4754 ms.NumberofMultisamples = ffs(ice->state.framebuffer.samples) - 1; 4755 } 4756 } 4757 4758 if (dirty & IRIS_DIRTY_SAMPLE_MASK) { 4759 iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_MASK), ms) { 4760 ms.SampleMask = ice->state.sample_mask; 4761 } 4762 } 4763 4764 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { 4765 if (!(dirty & (IRIS_DIRTY_VS << stage))) 4766 continue; 4767 4768 struct iris_compiled_shader *shader = ice->shaders.prog[stage]; 4769 4770 if (shader) { 4771 struct brw_stage_prog_data *prog_data = shader->prog_data; 4772 struct iris_resource *cache = (void *) shader->assembly.res; 4773 iris_use_pinned_bo(batch, cache->bo, false); 4774 4775 if (prog_data->total_scratch > 0) { 4776 struct iris_bo *bo = 4777 iris_get_scratch_space(ice, prog_data->total_scratch, stage); 4778 iris_use_pinned_bo(batch, bo, true); 4779 } 4780#if GEN_GEN >= 9 4781 if (stage == MESA_SHADER_FRAGMENT && wm_prog_data->uses_sample_mask) { 4782 uint32_t psx_state[GENX(3DSTATE_PS_EXTRA_length)] = {0}; 4783 uint32_t *shader_psx = ((uint32_t*)shader->derived_data) + 4784 GENX(3DSTATE_PS_length); 4785 struct iris_rasterizer_state *cso = ice->state.cso_rast; 4786 4787 iris_pack_command(GENX(3DSTATE_PS_EXTRA), &psx_state, psx) { 4788 if (wm_prog_data->post_depth_coverage) 4789 psx.InputCoverageMaskState = ICMS_DEPTH_COVERAGE; 4790 else if (wm_prog_data->inner_coverage && cso->conservative_rasterization) 4791 psx.InputCoverageMaskState = ICMS_INNER_CONSERVATIVE; 4792 else 4793 psx.InputCoverageMaskState = ICMS_NORMAL; 4794 } 4795 4796 iris_batch_emit(batch, shader->derived_data, 4797 sizeof(uint32_t) * GENX(3DSTATE_PS_length)); 4798 iris_emit_merge(batch, 4799 shader_psx, 4800 psx_state, 4801 GENX(3DSTATE_PS_EXTRA_length)); 4802 } else 4803#endif 4804 iris_batch_emit(batch, shader->derived_data, 4805 iris_derived_program_state_size(stage)); 4806 } else { 4807 if (stage == MESA_SHADER_TESS_EVAL) { 4808 iris_emit_cmd(batch, GENX(3DSTATE_HS), hs); 4809 iris_emit_cmd(batch, GENX(3DSTATE_TE), te); 4810 iris_emit_cmd(batch, GENX(3DSTATE_DS), ds); 4811 } else if (stage == MESA_SHADER_GEOMETRY) { 4812 iris_emit_cmd(batch, GENX(3DSTATE_GS), gs); 4813 } 4814 } 4815 } 4816 4817 if (ice->state.streamout_active) { 4818 if (dirty & IRIS_DIRTY_SO_BUFFERS) { 4819 iris_batch_emit(batch, genx->so_buffers, 4820 4 * 4 * GENX(3DSTATE_SO_BUFFER_length)); 4821 for (int i = 0; i < 4; i++) { 4822 struct iris_stream_output_target *tgt = 4823 (void *) ice->state.so_target[i]; 4824 if (tgt) { 4825 tgt->zeroed = true; 4826 iris_use_pinned_bo(batch, iris_resource_bo(tgt->base.buffer), 4827 true); 4828 iris_use_pinned_bo(batch, iris_resource_bo(tgt->offset.res), 4829 true); 4830 } 4831 } 4832 } 4833 4834 if ((dirty & IRIS_DIRTY_SO_DECL_LIST) && ice->state.streamout) { 4835 uint32_t *decl_list = 4836 ice->state.streamout + GENX(3DSTATE_STREAMOUT_length); 4837 iris_batch_emit(batch, decl_list, 4 * ((decl_list[0] & 0xff) + 2)); 4838 } 4839 4840 if (dirty & IRIS_DIRTY_STREAMOUT) { 4841 const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; 4842 4843 uint32_t dynamic_sol[GENX(3DSTATE_STREAMOUT_length)]; 4844 iris_pack_command(GENX(3DSTATE_STREAMOUT), dynamic_sol, sol) { 4845 sol.SOFunctionEnable = true; 4846 sol.SOStatisticsEnable = true; 4847 4848 sol.RenderingDisable = cso_rast->rasterizer_discard && 4849 !ice->state.prims_generated_query_active; 4850 sol.ReorderMode = cso_rast->flatshade_first ? LEADING : TRAILING; 4851 } 4852 4853 assert(ice->state.streamout); 4854 4855 iris_emit_merge(batch, ice->state.streamout, dynamic_sol, 4856 GENX(3DSTATE_STREAMOUT_length)); 4857 } 4858 } else { 4859 if (dirty & IRIS_DIRTY_STREAMOUT) { 4860 iris_emit_cmd(batch, GENX(3DSTATE_STREAMOUT), sol); 4861 } 4862 } 4863 4864 if (dirty & IRIS_DIRTY_CLIP) { 4865 struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; 4866 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 4867 4868 bool gs_or_tes = ice->shaders.prog[MESA_SHADER_GEOMETRY] || 4869 ice->shaders.prog[MESA_SHADER_TESS_EVAL]; 4870 bool points_or_lines = cso_rast->fill_mode_point_or_line || 4871 (gs_or_tes ? ice->shaders.output_topology_is_points_or_lines 4872 : ice->state.prim_is_points_or_lines); 4873 4874 uint32_t dynamic_clip[GENX(3DSTATE_CLIP_length)]; 4875 iris_pack_command(GENX(3DSTATE_CLIP), &dynamic_clip, cl) { 4876 cl.StatisticsEnable = ice->state.statistics_counters_enabled; 4877 cl.ClipMode = cso_rast->rasterizer_discard ? CLIPMODE_REJECT_ALL 4878 : CLIPMODE_NORMAL; 4879 cl.ViewportXYClipTestEnable = !points_or_lines; 4880 4881 if (wm_prog_data->barycentric_interp_modes & 4882 BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) 4883 cl.NonPerspectiveBarycentricEnable = true; 4884 4885 cl.ForceZeroRTAIndexEnable = cso_fb->layers == 0; 4886 cl.MaximumVPIndex = ice->state.num_viewports - 1; 4887 } 4888 iris_emit_merge(batch, cso_rast->clip, dynamic_clip, 4889 ARRAY_SIZE(cso_rast->clip)); 4890 } 4891 4892 if (dirty & IRIS_DIRTY_RASTER) { 4893 struct iris_rasterizer_state *cso = ice->state.cso_rast; 4894 iris_batch_emit(batch, cso->raster, sizeof(cso->raster)); 4895 iris_batch_emit(batch, cso->sf, sizeof(cso->sf)); 4896 4897 } 4898 4899 if (dirty & IRIS_DIRTY_WM) { 4900 struct iris_rasterizer_state *cso = ice->state.cso_rast; 4901 uint32_t dynamic_wm[GENX(3DSTATE_WM_length)]; 4902 4903 iris_pack_command(GENX(3DSTATE_WM), &dynamic_wm, wm) { 4904 wm.StatisticsEnable = ice->state.statistics_counters_enabled; 4905 4906 wm.BarycentricInterpolationMode = 4907 wm_prog_data->barycentric_interp_modes; 4908 4909 if (wm_prog_data->early_fragment_tests) 4910 wm.EarlyDepthStencilControl = EDSC_PREPS; 4911 else if (wm_prog_data->has_side_effects) 4912 wm.EarlyDepthStencilControl = EDSC_PSEXEC; 4913 4914 /* We could skip this bit if color writes are enabled. */ 4915 if (wm_prog_data->has_side_effects || wm_prog_data->uses_kill) 4916 wm.ForceThreadDispatchEnable = ForceON; 4917 } 4918 iris_emit_merge(batch, cso->wm, dynamic_wm, ARRAY_SIZE(cso->wm)); 4919 } 4920 4921 if (dirty & IRIS_DIRTY_SBE) { 4922 iris_emit_sbe(batch, ice); 4923 } 4924 4925 if (dirty & IRIS_DIRTY_PS_BLEND) { 4926 struct iris_blend_state *cso_blend = ice->state.cso_blend; 4927 struct iris_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa; 4928 const struct shader_info *fs_info = 4929 iris_get_shader_info(ice, MESA_SHADER_FRAGMENT); 4930 4931 uint32_t dynamic_pb[GENX(3DSTATE_PS_BLEND_length)]; 4932 iris_pack_command(GENX(3DSTATE_PS_BLEND), &dynamic_pb, pb) { 4933 pb.HasWriteableRT = has_writeable_rt(cso_blend, fs_info); 4934 pb.AlphaTestEnable = cso_zsa->alpha.enabled; 4935 4936 /* The dual source blending docs caution against using SRC1 factors 4937 * when the shader doesn't use a dual source render target write. 4938 * Empirically, this can lead to GPU hangs, and the results are 4939 * undefined anyway, so simply disable blending to avoid the hang. 4940 */ 4941 pb.ColorBufferBlendEnable = (cso_blend->blend_enables & 1) && 4942 (!cso_blend->dual_color_blending || wm_prog_data->dual_src_blend); 4943 } 4944 4945 iris_emit_merge(batch, cso_blend->ps_blend, dynamic_pb, 4946 ARRAY_SIZE(cso_blend->ps_blend)); 4947 } 4948 4949 if (dirty & IRIS_DIRTY_WM_DEPTH_STENCIL) { 4950 struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa; 4951#if GEN_GEN >= 9 4952 struct pipe_stencil_ref *p_stencil_refs = &ice->state.stencil_ref; 4953 uint32_t stencil_refs[GENX(3DSTATE_WM_DEPTH_STENCIL_length)]; 4954 iris_pack_command(GENX(3DSTATE_WM_DEPTH_STENCIL), &stencil_refs, wmds) { 4955 wmds.StencilReferenceValue = p_stencil_refs->ref_value[0]; 4956 wmds.BackfaceStencilReferenceValue = p_stencil_refs->ref_value[1]; 4957 } 4958 iris_emit_merge(batch, cso->wmds, stencil_refs, ARRAY_SIZE(cso->wmds)); 4959#else 4960 iris_batch_emit(batch, cso->wmds, sizeof(cso->wmds)); 4961#endif 4962 } 4963 4964 if (dirty & IRIS_DIRTY_SCISSOR_RECT) { 4965 uint32_t scissor_offset = 4966 emit_state(batch, ice->state.dynamic_uploader, 4967 &ice->state.last_res.scissor, 4968 ice->state.scissors, 4969 sizeof(struct pipe_scissor_state) * 4970 ice->state.num_viewports, 32); 4971 4972 iris_emit_cmd(batch, GENX(3DSTATE_SCISSOR_STATE_POINTERS), ptr) { 4973 ptr.ScissorRectPointer = scissor_offset; 4974 } 4975 } 4976 4977 if (dirty & IRIS_DIRTY_DEPTH_BUFFER) { 4978 struct iris_depth_buffer_state *cso_z = &ice->state.genx->depth_buffer; 4979 4980 /* Do not emit the clear params yets. We need to update the clear value 4981 * first. 4982 */ 4983 uint32_t clear_length = GENX(3DSTATE_CLEAR_PARAMS_length) * 4; 4984 uint32_t cso_z_size = sizeof(cso_z->packets) - clear_length; 4985 iris_batch_emit(batch, cso_z->packets, cso_z_size); 4986 4987 union isl_color_value clear_value = { .f32 = { 0, } }; 4988 4989 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 4990 if (cso_fb->zsbuf) { 4991 struct iris_resource *zres, *sres; 4992 iris_get_depth_stencil_resources(cso_fb->zsbuf->texture, 4993 &zres, &sres); 4994 if (zres && zres->aux.bo) 4995 clear_value = iris_resource_get_clear_color(zres, NULL, NULL); 4996 } 4997 4998 uint32_t clear_params[GENX(3DSTATE_CLEAR_PARAMS_length)]; 4999 iris_pack_command(GENX(3DSTATE_CLEAR_PARAMS), clear_params, clear) { 5000 clear.DepthClearValueValid = true; 5001 clear.DepthClearValue = clear_value.f32[0]; 5002 } 5003 iris_batch_emit(batch, clear_params, clear_length); 5004 } 5005 5006 if (dirty & (IRIS_DIRTY_DEPTH_BUFFER | IRIS_DIRTY_WM_DEPTH_STENCIL)) { 5007 /* Listen for buffer changes, and also write enable changes. */ 5008 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 5009 pin_depth_and_stencil_buffers(batch, cso_fb->zsbuf, ice->state.cso_zsa); 5010 } 5011 5012 if (dirty & IRIS_DIRTY_POLYGON_STIPPLE) { 5013 iris_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_PATTERN), poly) { 5014 for (int i = 0; i < 32; i++) { 5015 poly.PatternRow[i] = ice->state.poly_stipple.stipple[i]; 5016 } 5017 } 5018 } 5019 5020 if (dirty & IRIS_DIRTY_LINE_STIPPLE) { 5021 struct iris_rasterizer_state *cso = ice->state.cso_rast; 5022 iris_batch_emit(batch, cso->line_stipple, sizeof(cso->line_stipple)); 5023 } 5024 5025 if (dirty & IRIS_DIRTY_VF_TOPOLOGY) { 5026 iris_emit_cmd(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) { 5027 topo.PrimitiveTopologyType = 5028 translate_prim_type(draw->mode, draw->vertices_per_patch); 5029 } 5030 } 5031 5032 if (dirty & IRIS_DIRTY_VERTEX_BUFFERS) { 5033 int count = util_bitcount64(ice->state.bound_vertex_buffers); 5034 int dynamic_bound = ice->state.bound_vertex_buffers; 5035 5036 if (ice->state.vs_uses_draw_params) { 5037 if (ice->draw.draw_params_offset == 0) { 5038 u_upload_data(ice->state.dynamic_uploader, 0, sizeof(ice->draw.params), 5039 4, &ice->draw.params, &ice->draw.draw_params_offset, 5040 &ice->draw.draw_params_res); 5041 } 5042 assert(ice->draw.draw_params_res); 5043 5044 struct iris_vertex_buffer_state *state = 5045 &(ice->state.genx->vertex_buffers[count]); 5046 pipe_resource_reference(&state->resource, ice->draw.draw_params_res); 5047 struct iris_resource *res = (void *) state->resource; 5048 5049 iris_pack_state(GENX(VERTEX_BUFFER_STATE), state->state, vb) { 5050 vb.VertexBufferIndex = count; 5051 vb.AddressModifyEnable = true; 5052 vb.BufferPitch = 0; 5053 vb.BufferSize = res->bo->size - ice->draw.draw_params_offset; 5054 vb.BufferStartingAddress = 5055 ro_bo(NULL, res->bo->gtt_offset + 5056 (int) ice->draw.draw_params_offset); 5057 vb.MOCS = mocs(res->bo); 5058 } 5059 dynamic_bound |= 1ull << count; 5060 count++; 5061 } 5062 5063 if (ice->state.vs_uses_derived_draw_params) { 5064 u_upload_data(ice->state.dynamic_uploader, 0, 5065 sizeof(ice->draw.derived_params), 4, 5066 &ice->draw.derived_params, 5067 &ice->draw.derived_draw_params_offset, 5068 &ice->draw.derived_draw_params_res); 5069 5070 struct iris_vertex_buffer_state *state = 5071 &(ice->state.genx->vertex_buffers[count]); 5072 pipe_resource_reference(&state->resource, 5073 ice->draw.derived_draw_params_res); 5074 struct iris_resource *res = (void *) ice->draw.derived_draw_params_res; 5075 5076 iris_pack_state(GENX(VERTEX_BUFFER_STATE), state->state, vb) { 5077 vb.VertexBufferIndex = count; 5078 vb.AddressModifyEnable = true; 5079 vb.BufferPitch = 0; 5080 vb.BufferSize = 5081 res->bo->size - ice->draw.derived_draw_params_offset; 5082 vb.BufferStartingAddress = 5083 ro_bo(NULL, res->bo->gtt_offset + 5084 (int) ice->draw.derived_draw_params_offset); 5085 vb.MOCS = mocs(res->bo); 5086 } 5087 dynamic_bound |= 1ull << count; 5088 count++; 5089 } 5090 5091 if (count) { 5092 /* The VF cache designers cut corners, and made the cache key's 5093 * <VertexBufferIndex, Memory Address> tuple only consider the bottom 5094 * 32 bits of the address. If you have two vertex buffers which get 5095 * placed exactly 4 GiB apart and use them in back-to-back draw calls, 5096 * you can get collisions (even within a single batch). 5097 * 5098 * So, we need to do a VF cache invalidate if the buffer for a VB 5099 * slot slot changes [48:32] address bits from the previous time. 5100 */ 5101 unsigned flush_flags = 0; 5102 5103 uint64_t bound = dynamic_bound; 5104 while (bound) { 5105 const int i = u_bit_scan64(&bound); 5106 uint16_t high_bits = 0; 5107 5108 struct iris_resource *res = 5109 (void *) genx->vertex_buffers[i].resource; 5110 if (res) { 5111 iris_use_pinned_bo(batch, res->bo, false); 5112 5113 high_bits = res->bo->gtt_offset >> 32ull; 5114 if (high_bits != ice->state.last_vbo_high_bits[i]) { 5115 flush_flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE | 5116 PIPE_CONTROL_CS_STALL; 5117 ice->state.last_vbo_high_bits[i] = high_bits; 5118 } 5119 } 5120 } 5121 5122 if (flush_flags) 5123 iris_emit_pipe_control_flush(batch, flush_flags); 5124 5125 const unsigned vb_dwords = GENX(VERTEX_BUFFER_STATE_length); 5126 5127 uint32_t *map = 5128 iris_get_command_space(batch, 4 * (1 + vb_dwords * count)); 5129 _iris_pack_command(batch, GENX(3DSTATE_VERTEX_BUFFERS), map, vb) { 5130 vb.DWordLength = (vb_dwords * count + 1) - 2; 5131 } 5132 map += 1; 5133 5134 bound = dynamic_bound; 5135 while (bound) { 5136 const int i = u_bit_scan64(&bound); 5137 memcpy(map, genx->vertex_buffers[i].state, 5138 sizeof(uint32_t) * vb_dwords); 5139 map += vb_dwords; 5140 } 5141 } 5142 } 5143 5144 if (dirty & IRIS_DIRTY_VERTEX_ELEMENTS) { 5145 struct iris_vertex_element_state *cso = ice->state.cso_vertex_elements; 5146 const unsigned entries = MAX2(cso->count, 1); 5147 if (!(ice->state.vs_needs_sgvs_element || 5148 ice->state.vs_uses_derived_draw_params || 5149 ice->state.vs_needs_edge_flag)) { 5150 iris_batch_emit(batch, cso->vertex_elements, sizeof(uint32_t) * 5151 (1 + entries * GENX(VERTEX_ELEMENT_STATE_length))); 5152 } else { 5153 uint32_t dynamic_ves[1 + 33 * GENX(VERTEX_ELEMENT_STATE_length)]; 5154 const unsigned dyn_count = cso->count + 5155 ice->state.vs_needs_sgvs_element + 5156 ice->state.vs_uses_derived_draw_params; 5157 5158 iris_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), 5159 &dynamic_ves, ve) { 5160 ve.DWordLength = 5161 1 + GENX(VERTEX_ELEMENT_STATE_length) * dyn_count - 2; 5162 } 5163 memcpy(&dynamic_ves[1], &cso->vertex_elements[1], 5164 (cso->count - ice->state.vs_needs_edge_flag) * 5165 GENX(VERTEX_ELEMENT_STATE_length) * sizeof(uint32_t)); 5166 uint32_t *ve_pack_dest = 5167 &dynamic_ves[1 + (cso->count - ice->state.vs_needs_edge_flag) * 5168 GENX(VERTEX_ELEMENT_STATE_length)]; 5169 5170 if (ice->state.vs_needs_sgvs_element) { 5171 uint32_t base_ctrl = ice->state.vs_uses_draw_params ? 5172 VFCOMP_STORE_SRC : VFCOMP_STORE_0; 5173 iris_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) { 5174 ve.Valid = true; 5175 ve.VertexBufferIndex = 5176 util_bitcount64(ice->state.bound_vertex_buffers); 5177 ve.SourceElementFormat = ISL_FORMAT_R32G32_UINT; 5178 ve.Component0Control = base_ctrl; 5179 ve.Component1Control = base_ctrl; 5180 ve.Component2Control = VFCOMP_STORE_0; 5181 ve.Component3Control = VFCOMP_STORE_0; 5182 } 5183 ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length); 5184 } 5185 if (ice->state.vs_uses_derived_draw_params) { 5186 iris_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) { 5187 ve.Valid = true; 5188 ve.VertexBufferIndex = 5189 util_bitcount64(ice->state.bound_vertex_buffers) + 5190 ice->state.vs_uses_draw_params; 5191 ve.SourceElementFormat = ISL_FORMAT_R32G32_UINT; 5192 ve.Component0Control = VFCOMP_STORE_SRC; 5193 ve.Component1Control = VFCOMP_STORE_SRC; 5194 ve.Component2Control = VFCOMP_STORE_0; 5195 ve.Component3Control = VFCOMP_STORE_0; 5196 } 5197 ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length); 5198 } 5199 if (ice->state.vs_needs_edge_flag) { 5200 for (int i = 0; i < GENX(VERTEX_ELEMENT_STATE_length); i++) 5201 ve_pack_dest[i] = cso->edgeflag_ve[i]; 5202 } 5203 5204 iris_batch_emit(batch, &dynamic_ves, sizeof(uint32_t) * 5205 (1 + dyn_count * GENX(VERTEX_ELEMENT_STATE_length))); 5206 } 5207 5208 if (!ice->state.vs_needs_edge_flag) { 5209 iris_batch_emit(batch, cso->vf_instancing, sizeof(uint32_t) * 5210 entries * GENX(3DSTATE_VF_INSTANCING_length)); 5211 } else { 5212 assert(cso->count > 0); 5213 const unsigned edgeflag_index = cso->count - 1; 5214 uint32_t dynamic_vfi[33 * GENX(3DSTATE_VF_INSTANCING_length)]; 5215 memcpy(&dynamic_vfi[0], cso->vf_instancing, edgeflag_index * 5216 GENX(3DSTATE_VF_INSTANCING_length) * sizeof(uint32_t)); 5217 5218 uint32_t *vfi_pack_dest = &dynamic_vfi[0] + 5219 edgeflag_index * GENX(3DSTATE_VF_INSTANCING_length); 5220 iris_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) { 5221 vi.VertexElementIndex = edgeflag_index + 5222 ice->state.vs_needs_sgvs_element + 5223 ice->state.vs_uses_derived_draw_params; 5224 } 5225 for (int i = 0; i < GENX(3DSTATE_VF_INSTANCING_length); i++) 5226 vfi_pack_dest[i] |= cso->edgeflag_vfi[i]; 5227 5228 iris_batch_emit(batch, &dynamic_vfi[0], sizeof(uint32_t) * 5229 entries * GENX(3DSTATE_VF_INSTANCING_length)); 5230 } 5231 } 5232 5233 if (dirty & IRIS_DIRTY_VF_SGVS) { 5234 const struct brw_vs_prog_data *vs_prog_data = (void *) 5235 ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data; 5236 struct iris_vertex_element_state *cso = ice->state.cso_vertex_elements; 5237 5238 iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS), sgv) { 5239 if (vs_prog_data->uses_vertexid) { 5240 sgv.VertexIDEnable = true; 5241 sgv.VertexIDComponentNumber = 2; 5242 sgv.VertexIDElementOffset = 5243 cso->count - ice->state.vs_needs_edge_flag; 5244 } 5245 5246 if (vs_prog_data->uses_instanceid) { 5247 sgv.InstanceIDEnable = true; 5248 sgv.InstanceIDComponentNumber = 3; 5249 sgv.InstanceIDElementOffset = 5250 cso->count - ice->state.vs_needs_edge_flag; 5251 } 5252 } 5253 } 5254 5255 if (dirty & IRIS_DIRTY_VF) { 5256 iris_emit_cmd(batch, GENX(3DSTATE_VF), vf) { 5257 if (draw->primitive_restart) { 5258 vf.IndexedDrawCutIndexEnable = true; 5259 vf.CutIndex = draw->restart_index; 5260 } 5261 } 5262 } 5263 5264 if (dirty & IRIS_DIRTY_VF_STATISTICS) { 5265 iris_emit_cmd(batch, GENX(3DSTATE_VF_STATISTICS), vf) { 5266 vf.StatisticsEnable = true; 5267 } 5268 } 5269 5270 /* TODO: Gen8 PMA fix */ 5271} 5272 5273static void 5274iris_upload_render_state(struct iris_context *ice, 5275 struct iris_batch *batch, 5276 const struct pipe_draw_info *draw) 5277{ 5278 /* Always pin the binder. If we're emitting new binding table pointers, 5279 * we need it. If not, we're probably inheriting old tables via the 5280 * context, and need it anyway. Since true zero-bindings cases are 5281 * practically non-existent, just pin it and avoid last_res tracking. 5282 */ 5283 iris_use_pinned_bo(batch, ice->state.binder.bo, false); 5284 5285 if (!batch->contains_draw) { 5286 iris_restore_render_saved_bos(ice, batch, draw); 5287 batch->contains_draw = true; 5288 } 5289 5290 iris_upload_dirty_render_state(ice, batch, draw); 5291 5292 if (draw->index_size > 0) { 5293 unsigned offset; 5294 5295 if (draw->has_user_indices) { 5296 u_upload_data(ice->ctx.stream_uploader, 0, 5297 draw->count * draw->index_size, 4, draw->index.user, 5298 &offset, &ice->state.last_res.index_buffer); 5299 } else { 5300 struct iris_resource *res = (void *) draw->index.resource; 5301 res->bind_history |= PIPE_BIND_INDEX_BUFFER; 5302 5303 pipe_resource_reference(&ice->state.last_res.index_buffer, 5304 draw->index.resource); 5305 offset = 0; 5306 } 5307 5308 struct iris_bo *bo = iris_resource_bo(ice->state.last_res.index_buffer); 5309 5310 iris_emit_cmd(batch, GENX(3DSTATE_INDEX_BUFFER), ib) { 5311 ib.IndexFormat = draw->index_size >> 1; 5312 ib.MOCS = mocs(bo); 5313 ib.BufferSize = bo->size - offset; 5314 ib.BufferStartingAddress = ro_bo(bo, offset); 5315 } 5316 5317 /* The VF cache key only uses 32-bits, see vertex buffer comment above */ 5318 uint16_t high_bits = bo->gtt_offset >> 32ull; 5319 if (high_bits != ice->state.last_index_bo_high_bits) { 5320 iris_emit_pipe_control_flush(batch, PIPE_CONTROL_VF_CACHE_INVALIDATE | 5321 PIPE_CONTROL_CS_STALL); 5322 ice->state.last_index_bo_high_bits = high_bits; 5323 } 5324 } 5325 5326#define _3DPRIM_END_OFFSET 0x2420 5327#define _3DPRIM_START_VERTEX 0x2430 5328#define _3DPRIM_VERTEX_COUNT 0x2434 5329#define _3DPRIM_INSTANCE_COUNT 0x2438 5330#define _3DPRIM_START_INSTANCE 0x243C 5331#define _3DPRIM_BASE_VERTEX 0x2440 5332 5333 if (draw->indirect) { 5334 /* We don't support this MultidrawIndirect. */ 5335 assert(!draw->indirect->indirect_draw_count); 5336 5337 struct iris_bo *bo = iris_resource_bo(draw->indirect->buffer); 5338 assert(bo); 5339 5340 iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 5341 lrm.RegisterAddress = _3DPRIM_VERTEX_COUNT; 5342 lrm.MemoryAddress = ro_bo(bo, draw->indirect->offset + 0); 5343 } 5344 iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 5345 lrm.RegisterAddress = _3DPRIM_INSTANCE_COUNT; 5346 lrm.MemoryAddress = ro_bo(bo, draw->indirect->offset + 4); 5347 } 5348 iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 5349 lrm.RegisterAddress = _3DPRIM_START_VERTEX; 5350 lrm.MemoryAddress = ro_bo(bo, draw->indirect->offset + 8); 5351 } 5352 if (draw->index_size) { 5353 iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 5354 lrm.RegisterAddress = _3DPRIM_BASE_VERTEX; 5355 lrm.MemoryAddress = ro_bo(bo, draw->indirect->offset + 12); 5356 } 5357 iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 5358 lrm.RegisterAddress = _3DPRIM_START_INSTANCE; 5359 lrm.MemoryAddress = ro_bo(bo, draw->indirect->offset + 16); 5360 } 5361 } else { 5362 iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 5363 lrm.RegisterAddress = _3DPRIM_START_INSTANCE; 5364 lrm.MemoryAddress = ro_bo(bo, draw->indirect->offset + 12); 5365 } 5366 iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { 5367 lri.RegisterOffset = _3DPRIM_BASE_VERTEX; 5368 lri.DataDWord = 0; 5369 } 5370 } 5371 } else if (draw->count_from_stream_output) { 5372 struct iris_stream_output_target *so = 5373 (void *) draw->count_from_stream_output; 5374 5375 /* XXX: Replace with actual cache tracking */ 5376 iris_emit_pipe_control_flush(batch, PIPE_CONTROL_CS_STALL); 5377 5378 iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 5379 lrm.RegisterAddress = CS_GPR(0); 5380 lrm.MemoryAddress = 5381 ro_bo(iris_resource_bo(so->offset.res), so->offset.offset); 5382 } 5383 if (so->base.buffer_offset) 5384 iris_math_add32_gpr0(ice, batch, -so->base.buffer_offset); 5385 iris_math_div32_gpr0(ice, batch, so->stride); 5386 _iris_emit_lrr(batch, _3DPRIM_VERTEX_COUNT, CS_GPR(0)); 5387 5388 _iris_emit_lri(batch, _3DPRIM_START_VERTEX, 0); 5389 _iris_emit_lri(batch, _3DPRIM_BASE_VERTEX, 0); 5390 _iris_emit_lri(batch, _3DPRIM_START_INSTANCE, 0); 5391 _iris_emit_lri(batch, _3DPRIM_INSTANCE_COUNT, draw->instance_count); 5392 } 5393 5394 iris_emit_cmd(batch, GENX(3DPRIMITIVE), prim) { 5395 prim.VertexAccessType = draw->index_size > 0 ? RANDOM : SEQUENTIAL; 5396 prim.PredicateEnable = 5397 ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT; 5398 5399 if (draw->indirect || draw->count_from_stream_output) { 5400 prim.IndirectParameterEnable = true; 5401 } else { 5402 prim.StartInstanceLocation = draw->start_instance; 5403 prim.InstanceCount = draw->instance_count; 5404 prim.VertexCountPerInstance = draw->count; 5405 5406 // XXX: this is probably bonkers. 5407 prim.StartVertexLocation = draw->start; 5408 5409 if (draw->index_size) { 5410 prim.BaseVertexLocation += draw->index_bias; 5411 } else { 5412 prim.StartVertexLocation += draw->index_bias; 5413 } 5414 5415 //prim.BaseVertexLocation = ...; 5416 } 5417 } 5418} 5419 5420static void 5421iris_upload_compute_state(struct iris_context *ice, 5422 struct iris_batch *batch, 5423 const struct pipe_grid_info *grid) 5424{ 5425 const uint64_t dirty = ice->state.dirty; 5426 struct iris_screen *screen = batch->screen; 5427 const struct gen_device_info *devinfo = &screen->devinfo; 5428 struct iris_binder *binder = &ice->state.binder; 5429 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE]; 5430 struct iris_compiled_shader *shader = 5431 ice->shaders.prog[MESA_SHADER_COMPUTE]; 5432 struct brw_stage_prog_data *prog_data = shader->prog_data; 5433 struct brw_cs_prog_data *cs_prog_data = (void *) prog_data; 5434 5435 /* Always pin the binder. If we're emitting new binding table pointers, 5436 * we need it. If not, we're probably inheriting old tables via the 5437 * context, and need it anyway. Since true zero-bindings cases are 5438 * practically non-existent, just pin it and avoid last_res tracking. 5439 */ 5440 iris_use_pinned_bo(batch, ice->state.binder.bo, false); 5441 5442 if ((dirty & IRIS_DIRTY_CONSTANTS_CS) && shs->cbuf0_needs_upload) 5443 upload_uniforms(ice, MESA_SHADER_COMPUTE); 5444 5445 if (dirty & IRIS_DIRTY_BINDINGS_CS) 5446 iris_populate_binding_table(ice, batch, MESA_SHADER_COMPUTE, false); 5447 5448 if (dirty & IRIS_DIRTY_SAMPLER_STATES_CS) 5449 iris_upload_sampler_states(ice, MESA_SHADER_COMPUTE); 5450 5451 iris_use_optional_res(batch, shs->sampler_table.res, false); 5452 iris_use_pinned_bo(batch, iris_resource_bo(shader->assembly.res), false); 5453 5454 if (ice->state.need_border_colors) 5455 iris_use_pinned_bo(batch, ice->state.border_color_pool.bo, false); 5456 5457 if (dirty & IRIS_DIRTY_CS) { 5458 /* The MEDIA_VFE_STATE documentation for Gen8+ says: 5459 * 5460 * "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless 5461 * the only bits that are changed are scoreboard related: Scoreboard 5462 * Enable, Scoreboard Type, Scoreboard Mask, Scoreboard Delta. For 5463 * these scoreboard related states, a MEDIA_STATE_FLUSH is 5464 * sufficient." 5465 */ 5466 iris_emit_pipe_control_flush(batch, PIPE_CONTROL_CS_STALL); 5467 5468 iris_emit_cmd(batch, GENX(MEDIA_VFE_STATE), vfe) { 5469 if (prog_data->total_scratch) { 5470 struct iris_bo *bo = 5471 iris_get_scratch_space(ice, prog_data->total_scratch, 5472 MESA_SHADER_COMPUTE); 5473 vfe.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11; 5474 vfe.ScratchSpaceBasePointer = rw_bo(bo, 0); 5475 } 5476 5477 vfe.MaximumNumberofThreads = 5478 devinfo->max_cs_threads * screen->subslice_total - 1; 5479#if GEN_GEN < 11 5480 vfe.ResetGatewayTimer = 5481 Resettingrelativetimerandlatchingtheglobaltimestamp; 5482#endif 5483#if GEN_GEN == 8 5484 vfe.BypassGatewayControl = true; 5485#endif 5486 vfe.NumberofURBEntries = 2; 5487 vfe.URBEntryAllocationSize = 2; 5488 5489 vfe.CURBEAllocationSize = 5490 ALIGN(cs_prog_data->push.per_thread.regs * cs_prog_data->threads + 5491 cs_prog_data->push.cross_thread.regs, 2); 5492 } 5493 } 5494 5495 /* TODO: Combine subgroup-id with cbuf0 so we can push regular uniforms */ 5496 uint32_t curbe_data_offset = 0; 5497 assert(cs_prog_data->push.cross_thread.dwords == 0 && 5498 cs_prog_data->push.per_thread.dwords == 1 && 5499 cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID); 5500 struct pipe_resource *curbe_data_res = NULL; 5501 uint32_t *curbe_data_map = 5502 stream_state(batch, ice->state.dynamic_uploader, &curbe_data_res, 5503 ALIGN(cs_prog_data->push.total.size, 64), 64, 5504 &curbe_data_offset); 5505 assert(curbe_data_map); 5506 memset(curbe_data_map, 0x5a, ALIGN(cs_prog_data->push.total.size, 64)); 5507 iris_fill_cs_push_const_buffer(cs_prog_data, curbe_data_map); 5508 5509 if (dirty & IRIS_DIRTY_CONSTANTS_CS) { 5510 iris_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) { 5511 curbe.CURBETotalDataLength = 5512 ALIGN(cs_prog_data->push.total.size, 64); 5513 curbe.CURBEDataStartAddress = curbe_data_offset; 5514 } 5515 } 5516 5517 if (dirty & (IRIS_DIRTY_SAMPLER_STATES_CS | 5518 IRIS_DIRTY_BINDINGS_CS | 5519 IRIS_DIRTY_CONSTANTS_CS | 5520 IRIS_DIRTY_CS)) { 5521 struct pipe_resource *desc_res = NULL; 5522 uint32_t desc[GENX(INTERFACE_DESCRIPTOR_DATA_length)]; 5523 5524 iris_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), desc, idd) { 5525 idd.SamplerStatePointer = shs->sampler_table.offset; 5526 idd.BindingTablePointer = binder->bt_offset[MESA_SHADER_COMPUTE]; 5527 } 5528 5529 for (int i = 0; i < GENX(INTERFACE_DESCRIPTOR_DATA_length); i++) 5530 desc[i] |= ((uint32_t *) shader->derived_data)[i]; 5531 5532 iris_emit_cmd(batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), load) { 5533 load.InterfaceDescriptorTotalLength = 5534 GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t); 5535 load.InterfaceDescriptorDataStartAddress = 5536 emit_state(batch, ice->state.dynamic_uploader, 5537 &desc_res, desc, sizeof(desc), 32); 5538 } 5539 5540 pipe_resource_reference(&desc_res, NULL); 5541 } 5542 5543 uint32_t group_size = grid->block[0] * grid->block[1] * grid->block[2]; 5544 uint32_t remainder = group_size & (cs_prog_data->simd_size - 1); 5545 uint32_t right_mask; 5546 5547 if (remainder > 0) 5548 right_mask = ~0u >> (32 - remainder); 5549 else 5550 right_mask = ~0u >> (32 - cs_prog_data->simd_size); 5551 5552#define GPGPU_DISPATCHDIMX 0x2500 5553#define GPGPU_DISPATCHDIMY 0x2504 5554#define GPGPU_DISPATCHDIMZ 0x2508 5555 5556 if (grid->indirect) { 5557 struct iris_state_ref *grid_size = &ice->state.grid_size; 5558 struct iris_bo *bo = iris_resource_bo(grid_size->res); 5559 iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 5560 lrm.RegisterAddress = GPGPU_DISPATCHDIMX; 5561 lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 0); 5562 } 5563 iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 5564 lrm.RegisterAddress = GPGPU_DISPATCHDIMY; 5565 lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 4); 5566 } 5567 iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 5568 lrm.RegisterAddress = GPGPU_DISPATCHDIMZ; 5569 lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 8); 5570 } 5571 } 5572 5573 iris_emit_cmd(batch, GENX(GPGPU_WALKER), ggw) { 5574 ggw.IndirectParameterEnable = grid->indirect != NULL; 5575 ggw.SIMDSize = cs_prog_data->simd_size / 16; 5576 ggw.ThreadDepthCounterMaximum = 0; 5577 ggw.ThreadHeightCounterMaximum = 0; 5578 ggw.ThreadWidthCounterMaximum = cs_prog_data->threads - 1; 5579 ggw.ThreadGroupIDXDimension = grid->grid[0]; 5580 ggw.ThreadGroupIDYDimension = grid->grid[1]; 5581 ggw.ThreadGroupIDZDimension = grid->grid[2]; 5582 ggw.RightExecutionMask = right_mask; 5583 ggw.BottomExecutionMask = 0xffffffff; 5584 } 5585 5586 iris_emit_cmd(batch, GENX(MEDIA_STATE_FLUSH), msf); 5587 5588 if (!batch->contains_draw) { 5589 iris_restore_compute_saved_bos(ice, batch, grid); 5590 batch->contains_draw = true; 5591 } 5592} 5593 5594/** 5595 * State module teardown. 5596 */ 5597static void 5598iris_destroy_state(struct iris_context *ice) 5599{ 5600 struct iris_genx_state *genx = ice->state.genx; 5601 5602 uint64_t bound_vbs = ice->state.bound_vertex_buffers; 5603 while (bound_vbs) { 5604 const int i = u_bit_scan64(&bound_vbs); 5605 pipe_resource_reference(&genx->vertex_buffers[i].resource, NULL); 5606 } 5607 free(ice->state.genx); 5608 5609 for (unsigned i = 0; i < ice->state.framebuffer.nr_cbufs; i++) { 5610 pipe_surface_reference(&ice->state.framebuffer.cbufs[i], NULL); 5611 } 5612 pipe_surface_reference(&ice->state.framebuffer.zsbuf, NULL); 5613 5614 for (int stage = 0; stage < MESA_SHADER_STAGES; stage++) { 5615 struct iris_shader_state *shs = &ice->state.shaders[stage]; 5616 pipe_resource_reference(&shs->sampler_table.res, NULL); 5617 for (int i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { 5618 pipe_resource_reference(&shs->constbuf[i].buffer, NULL); 5619 pipe_resource_reference(&shs->constbuf_surf_state[i].res, NULL); 5620 } 5621 for (int i = 0; i < PIPE_MAX_SHADER_IMAGES; i++) { 5622 pipe_resource_reference(&shs->image[i].base.resource, NULL); 5623 pipe_resource_reference(&shs->image[i].surface_state.res, NULL); 5624 } 5625 for (int i = 0; i < PIPE_MAX_SHADER_BUFFERS; i++) { 5626 pipe_resource_reference(&shs->ssbo[i].buffer, NULL); 5627 pipe_resource_reference(&shs->ssbo_surf_state[i].res, NULL); 5628 } 5629 for (int i = 0; i < IRIS_MAX_TEXTURE_SAMPLERS; i++) { 5630 pipe_sampler_view_reference((struct pipe_sampler_view **) 5631 &shs->textures[i], NULL); 5632 } 5633 } 5634 5635 pipe_resource_reference(&ice->state.grid_size.res, NULL); 5636 pipe_resource_reference(&ice->state.grid_surf_state.res, NULL); 5637 5638 pipe_resource_reference(&ice->state.null_fb.res, NULL); 5639 pipe_resource_reference(&ice->state.unbound_tex.res, NULL); 5640 5641 pipe_resource_reference(&ice->state.last_res.cc_vp, NULL); 5642 pipe_resource_reference(&ice->state.last_res.sf_cl_vp, NULL); 5643 pipe_resource_reference(&ice->state.last_res.color_calc, NULL); 5644 pipe_resource_reference(&ice->state.last_res.scissor, NULL); 5645 pipe_resource_reference(&ice->state.last_res.blend, NULL); 5646 pipe_resource_reference(&ice->state.last_res.index_buffer, NULL); 5647} 5648 5649/* ------------------------------------------------------------------- */ 5650 5651static void 5652iris_rebind_buffer(struct iris_context *ice, 5653 struct iris_resource *res, 5654 uint64_t old_address) 5655{ 5656 struct pipe_context *ctx = &ice->ctx; 5657 struct iris_screen *screen = (void *) ctx->screen; 5658 struct iris_genx_state *genx = ice->state.genx; 5659 5660 assert(res->base.target == PIPE_BUFFER); 5661 5662 /* Buffers can't be framebuffer attachments, nor display related, 5663 * and we don't have upstream Clover support. 5664 */ 5665 assert(!(res->bind_history & (PIPE_BIND_DEPTH_STENCIL | 5666 PIPE_BIND_RENDER_TARGET | 5667 PIPE_BIND_BLENDABLE | 5668 PIPE_BIND_DISPLAY_TARGET | 5669 PIPE_BIND_CURSOR | 5670 PIPE_BIND_COMPUTE_RESOURCE | 5671 PIPE_BIND_GLOBAL))); 5672 5673 if (res->bind_history & PIPE_BIND_VERTEX_BUFFER) { 5674 uint64_t bound_vbs = ice->state.bound_vertex_buffers; 5675 while (bound_vbs) { 5676 const int i = u_bit_scan64(&bound_vbs); 5677 struct iris_vertex_buffer_state *state = &genx->vertex_buffers[i]; 5678 5679 /* Update the CPU struct */ 5680 STATIC_ASSERT(GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_start) == 32); 5681 STATIC_ASSERT(GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_bits) == 64); 5682 uint64_t *addr = (uint64_t *) &state->state[1]; 5683 5684 if (*addr == old_address) { 5685 *addr = res->bo->gtt_offset; 5686 ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS; 5687 } 5688 } 5689 } 5690 5691 /* No need to handle these: 5692 * - PIPE_BIND_INDEX_BUFFER (emitted for every indexed draw) 5693 * - PIPE_BIND_COMMAND_ARGS_BUFFER (emitted for every indirect draw) 5694 * - PIPE_BIND_QUERY_BUFFER (no persistent state references) 5695 */ 5696 5697 if (res->bind_history & PIPE_BIND_STREAM_OUTPUT) { 5698 /* XXX: be careful about resetting vs appending... */ 5699 assert(false); 5700 } 5701 5702 for (int s = MESA_SHADER_VERTEX; s < MESA_SHADER_STAGES; s++) { 5703 struct iris_shader_state *shs = &ice->state.shaders[s]; 5704 enum pipe_shader_type p_stage = stage_to_pipe(s); 5705 5706 if (res->bind_history & PIPE_BIND_CONSTANT_BUFFER) { 5707 /* Skip constant buffer 0, it's for regular uniforms, not UBOs */ 5708 uint32_t bound_cbufs = shs->bound_cbufs & ~1u; 5709 while (bound_cbufs) { 5710 const int i = u_bit_scan(&bound_cbufs); 5711 struct pipe_shader_buffer *cbuf = &shs->constbuf[i]; 5712 struct iris_state_ref *surf_state = &shs->constbuf_surf_state[i]; 5713 5714 if (res->bo == iris_resource_bo(cbuf->buffer)) { 5715 upload_ubo_ssbo_surf_state(ice, cbuf, surf_state, false); 5716 ice->state.dirty |= IRIS_DIRTY_CONSTANTS_VS << s; 5717 } 5718 } 5719 } 5720 5721 if (res->bind_history & PIPE_BIND_SHADER_BUFFER) { 5722 uint32_t bound_ssbos = shs->bound_ssbos; 5723 while (bound_ssbos) { 5724 const int i = u_bit_scan(&bound_ssbos); 5725 struct pipe_shader_buffer *ssbo = &shs->ssbo[i]; 5726 5727 if (res->bo == iris_resource_bo(ssbo->buffer)) { 5728 struct pipe_shader_buffer buf = { 5729 .buffer = &res->base, 5730 .buffer_offset = ssbo->buffer_offset, 5731 .buffer_size = ssbo->buffer_size, 5732 }; 5733 iris_set_shader_buffers(ctx, p_stage, i, 1, &buf, 5734 (shs->writable_ssbos >> i) & 1); 5735 } 5736 } 5737 } 5738 5739 if (res->bind_history & PIPE_BIND_SAMPLER_VIEW) { 5740 uint32_t bound_sampler_views = shs->bound_sampler_views; 5741 while (bound_sampler_views) { 5742 const int i = u_bit_scan(&bound_sampler_views); 5743 struct iris_sampler_view *isv = shs->textures[i]; 5744 5745 if (res->bo == iris_resource_bo(isv->base.texture)) { 5746 void *map = alloc_surface_states(ice->state.surface_uploader, 5747 &isv->surface_state, 5748 isv->res->aux.sampler_usages); 5749 assert(map); 5750 fill_buffer_surface_state(&screen->isl_dev, isv->res->bo, map, 5751 isv->view.format, isv->view.swizzle, 5752 isv->base.u.buf.offset, 5753 isv->base.u.buf.size); 5754 ice->state.dirty |= IRIS_DIRTY_BINDINGS_VS << s; 5755 } 5756 } 5757 } 5758 5759 if (res->bind_history & PIPE_BIND_SHADER_IMAGE) { 5760 uint32_t bound_image_views = shs->bound_image_views; 5761 while (bound_image_views) { 5762 const int i = u_bit_scan(&bound_image_views); 5763 struct iris_image_view *iv = &shs->image[i]; 5764 5765 if (res->bo == iris_resource_bo(iv->base.resource)) { 5766 iris_set_shader_images(ctx, p_stage, i, 1, &iv->base); 5767 } 5768 } 5769 } 5770 } 5771} 5772 5773/* ------------------------------------------------------------------- */ 5774 5775static void 5776iris_load_register_reg32(struct iris_batch *batch, uint32_t dst, 5777 uint32_t src) 5778{ 5779 _iris_emit_lrr(batch, dst, src); 5780} 5781 5782static void 5783iris_load_register_reg64(struct iris_batch *batch, uint32_t dst, 5784 uint32_t src) 5785{ 5786 _iris_emit_lrr(batch, dst, src); 5787 _iris_emit_lrr(batch, dst + 4, src + 4); 5788} 5789 5790static void 5791iris_load_register_imm32(struct iris_batch *batch, uint32_t reg, 5792 uint32_t val) 5793{ 5794 _iris_emit_lri(batch, reg, val); 5795} 5796 5797static void 5798iris_load_register_imm64(struct iris_batch *batch, uint32_t reg, 5799 uint64_t val) 5800{ 5801 _iris_emit_lri(batch, reg + 0, val & 0xffffffff); 5802 _iris_emit_lri(batch, reg + 4, val >> 32); 5803} 5804 5805/** 5806 * Emit MI_LOAD_REGISTER_MEM to load a 32-bit MMIO register from a buffer. 5807 */ 5808static void 5809iris_load_register_mem32(struct iris_batch *batch, uint32_t reg, 5810 struct iris_bo *bo, uint32_t offset) 5811{ 5812 iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 5813 lrm.RegisterAddress = reg; 5814 lrm.MemoryAddress = ro_bo(bo, offset); 5815 } 5816} 5817 5818/** 5819 * Load a 64-bit value from a buffer into a MMIO register via 5820 * two MI_LOAD_REGISTER_MEM commands. 5821 */ 5822static void 5823iris_load_register_mem64(struct iris_batch *batch, uint32_t reg, 5824 struct iris_bo *bo, uint32_t offset) 5825{ 5826 iris_load_register_mem32(batch, reg + 0, bo, offset + 0); 5827 iris_load_register_mem32(batch, reg + 4, bo, offset + 4); 5828} 5829 5830static void 5831iris_store_register_mem32(struct iris_batch *batch, uint32_t reg, 5832 struct iris_bo *bo, uint32_t offset, 5833 bool predicated) 5834{ 5835 iris_emit_cmd(batch, GENX(MI_STORE_REGISTER_MEM), srm) { 5836 srm.RegisterAddress = reg; 5837 srm.MemoryAddress = rw_bo(bo, offset); 5838 srm.PredicateEnable = predicated; 5839 } 5840} 5841 5842static void 5843iris_store_register_mem64(struct iris_batch *batch, uint32_t reg, 5844 struct iris_bo *bo, uint32_t offset, 5845 bool predicated) 5846{ 5847 iris_store_register_mem32(batch, reg + 0, bo, offset + 0, predicated); 5848 iris_store_register_mem32(batch, reg + 4, bo, offset + 4, predicated); 5849} 5850 5851static void 5852iris_store_data_imm32(struct iris_batch *batch, 5853 struct iris_bo *bo, uint32_t offset, 5854 uint32_t imm) 5855{ 5856 iris_emit_cmd(batch, GENX(MI_STORE_DATA_IMM), sdi) { 5857 sdi.Address = rw_bo(bo, offset); 5858 sdi.ImmediateData = imm; 5859 } 5860} 5861 5862static void 5863iris_store_data_imm64(struct iris_batch *batch, 5864 struct iris_bo *bo, uint32_t offset, 5865 uint64_t imm) 5866{ 5867 /* Can't use iris_emit_cmd because MI_STORE_DATA_IMM has a length of 5868 * 2 in genxml but it's actually variable length and we need 5 DWords. 5869 */ 5870 void *map = iris_get_command_space(batch, 4 * 5); 5871 _iris_pack_command(batch, GENX(MI_STORE_DATA_IMM), map, sdi) { 5872 sdi.DWordLength = 5 - 2; 5873 sdi.Address = rw_bo(bo, offset); 5874 sdi.ImmediateData = imm; 5875 } 5876} 5877 5878static void 5879iris_copy_mem_mem(struct iris_batch *batch, 5880 struct iris_bo *dst_bo, uint32_t dst_offset, 5881 struct iris_bo *src_bo, uint32_t src_offset, 5882 unsigned bytes) 5883{ 5884 /* MI_COPY_MEM_MEM operates on DWords. */ 5885 assert(bytes % 4 == 0); 5886 assert(dst_offset % 4 == 0); 5887 assert(src_offset % 4 == 0); 5888 5889 for (unsigned i = 0; i < bytes; i += 4) { 5890 iris_emit_cmd(batch, GENX(MI_COPY_MEM_MEM), cp) { 5891 cp.DestinationMemoryAddress = rw_bo(dst_bo, dst_offset + i); 5892 cp.SourceMemoryAddress = ro_bo(src_bo, src_offset + i); 5893 } 5894 } 5895} 5896 5897/* ------------------------------------------------------------------- */ 5898 5899static unsigned 5900flags_to_post_sync_op(uint32_t flags) 5901{ 5902 if (flags & PIPE_CONTROL_WRITE_IMMEDIATE) 5903 return WriteImmediateData; 5904 5905 if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT) 5906 return WritePSDepthCount; 5907 5908 if (flags & PIPE_CONTROL_WRITE_TIMESTAMP) 5909 return WriteTimestamp; 5910 5911 return 0; 5912} 5913 5914/** 5915 * Do the given flags have a Post Sync or LRI Post Sync operation? 5916 */ 5917static enum pipe_control_flags 5918get_post_sync_flags(enum pipe_control_flags flags) 5919{ 5920 flags &= PIPE_CONTROL_WRITE_IMMEDIATE | 5921 PIPE_CONTROL_WRITE_DEPTH_COUNT | 5922 PIPE_CONTROL_WRITE_TIMESTAMP | 5923 PIPE_CONTROL_LRI_POST_SYNC_OP; 5924 5925 /* Only one "Post Sync Op" is allowed, and it's mutually exclusive with 5926 * "LRI Post Sync Operation". So more than one bit set would be illegal. 5927 */ 5928 assert(util_bitcount(flags) <= 1); 5929 5930 return flags; 5931} 5932 5933#define IS_COMPUTE_PIPELINE(batch) (batch->name == IRIS_BATCH_COMPUTE) 5934 5935/** 5936 * Emit a series of PIPE_CONTROL commands, taking into account any 5937 * workarounds necessary to actually accomplish the caller's request. 5938 * 5939 * Unless otherwise noted, spec quotations in this function come from: 5940 * 5941 * Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming 5942 * Restrictions for PIPE_CONTROL. 5943 * 5944 * You should not use this function directly. Use the helpers in 5945 * iris_pipe_control.c instead, which may split the pipe control further. 5946 */ 5947static void 5948iris_emit_raw_pipe_control(struct iris_batch *batch, uint32_t flags, 5949 struct iris_bo *bo, uint32_t offset, uint64_t imm) 5950{ 5951 UNUSED const struct gen_device_info *devinfo = &batch->screen->devinfo; 5952 enum pipe_control_flags post_sync_flags = get_post_sync_flags(flags); 5953 enum pipe_control_flags non_lri_post_sync_flags = 5954 post_sync_flags & ~PIPE_CONTROL_LRI_POST_SYNC_OP; 5955 5956 /* Recursive PIPE_CONTROL workarounds -------------------------------- 5957 * (http://knowyourmeme.com/memes/xzibit-yo-dawg) 5958 * 5959 * We do these first because we want to look at the original operation, 5960 * rather than any workarounds we set. 5961 */ 5962 if (GEN_GEN == 9 && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) { 5963 /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description 5964 * lists several workarounds: 5965 * 5966 * "Project: SKL, KBL, BXT 5967 * 5968 * If the VF Cache Invalidation Enable is set to a 1 in a 5969 * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields 5970 * sets to 0, with the VF Cache Invalidation Enable set to 0 5971 * needs to be sent prior to the PIPE_CONTROL with VF Cache 5972 * Invalidation Enable set to a 1." 5973 */ 5974 iris_emit_raw_pipe_control(batch, 0, NULL, 0, 0); 5975 } 5976 5977 if (GEN_GEN == 9 && IS_COMPUTE_PIPELINE(batch) && post_sync_flags) { 5978 /* Project: SKL / Argument: LRI Post Sync Operation [23] 5979 * 5980 * "PIPECONTROL command with “Command Streamer Stall Enable” must be 5981 * programmed prior to programming a PIPECONTROL command with "LRI 5982 * Post Sync Operation" in GPGPU mode of operation (i.e when 5983 * PIPELINE_SELECT command is set to GPGPU mode of operation)." 5984 * 5985 * The same text exists a few rows below for Post Sync Op. 5986 */ 5987 iris_emit_raw_pipe_control(batch, PIPE_CONTROL_CS_STALL, bo, offset, imm); 5988 } 5989 5990 if (GEN_GEN == 10 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) { 5991 /* Cannonlake: 5992 * "Before sending a PIPE_CONTROL command with bit 12 set, SW must issue 5993 * another PIPE_CONTROL with Render Target Cache Flush Enable (bit 12) 5994 * = 0 and Pipe Control Flush Enable (bit 7) = 1" 5995 */ 5996 iris_emit_raw_pipe_control(batch, PIPE_CONTROL_FLUSH_ENABLE, bo, 5997 offset, imm); 5998 } 5999 6000 /* "Flush Types" workarounds --------------------------------------------- 6001 * We do these now because they may add post-sync operations or CS stalls. 6002 */ 6003 6004 if (GEN_GEN < 11 && flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) { 6005 /* Project: BDW, SKL+ (stopping at CNL) / Argument: VF Invalidate 6006 * 6007 * "'Post Sync Operation' must be enabled to 'Write Immediate Data' or 6008 * 'Write PS Depth Count' or 'Write Timestamp'." 6009 */ 6010 if (!bo) { 6011 flags |= PIPE_CONTROL_WRITE_IMMEDIATE; 6012 post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE; 6013 non_lri_post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE; 6014 bo = batch->screen->workaround_bo; 6015 } 6016 } 6017 6018 /* #1130 from Gen10 workarounds page: 6019 * 6020 * "Enable Depth Stall on every Post Sync Op if Render target Cache 6021 * Flush is not enabled in same PIPE CONTROL and Enable Pixel score 6022 * board stall if Render target cache flush is enabled." 6023 * 6024 * Applicable to CNL B0 and C0 steppings only. 6025 * 6026 * The wording here is unclear, and this workaround doesn't look anything 6027 * like the internal bug report recommendations, but leave it be for now... 6028 */ 6029 if (GEN_GEN == 10) { 6030 if (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) { 6031 flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; 6032 } else if (flags & non_lri_post_sync_flags) { 6033 flags |= PIPE_CONTROL_DEPTH_STALL; 6034 } 6035 } 6036 6037 if (flags & PIPE_CONTROL_DEPTH_STALL) { 6038 /* From the PIPE_CONTROL instruction table, bit 13 (Depth Stall Enable): 6039 * 6040 * "This bit must be DISABLED for operations other than writing 6041 * PS_DEPTH_COUNT." 6042 * 6043 * This seems like nonsense. An Ivybridge workaround requires us to 6044 * emit a PIPE_CONTROL with a depth stall and write immediate post-sync 6045 * operation. Gen8+ requires us to emit depth stalls and depth cache 6046 * flushes together. So, it's hard to imagine this means anything other 6047 * than "we originally intended this to be used for PS_DEPTH_COUNT". 6048 * 6049 * We ignore the supposed restriction and do nothing. 6050 */ 6051 } 6052 6053 if (flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH | 6054 PIPE_CONTROL_STALL_AT_SCOREBOARD)) { 6055 /* From the PIPE_CONTROL instruction table, bit 12 and bit 1: 6056 * 6057 * "This bit must be DISABLED for End-of-pipe (Read) fences, 6058 * PS_DEPTH_COUNT or TIMESTAMP queries." 6059 * 6060 * TODO: Implement end-of-pipe checking. 6061 */ 6062 assert(!(post_sync_flags & (PIPE_CONTROL_WRITE_DEPTH_COUNT | 6063 PIPE_CONTROL_WRITE_TIMESTAMP))); 6064 } 6065 6066 if (GEN_GEN < 11 && (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD)) { 6067 /* From the PIPE_CONTROL instruction table, bit 1: 6068 * 6069 * "This bit is ignored if Depth Stall Enable is set. 6070 * Further, the render cache is not flushed even if Write Cache 6071 * Flush Enable bit is set." 6072 * 6073 * We assert that the caller doesn't do this combination, to try and 6074 * prevent mistakes. It shouldn't hurt the GPU, though. 6075 * 6076 * We skip this check on Gen11+ as the "Stall at Pixel Scoreboard" 6077 * and "Render Target Flush" combo is explicitly required for BTI 6078 * update workarounds. 6079 */ 6080 assert(!(flags & (PIPE_CONTROL_DEPTH_STALL | 6081 PIPE_CONTROL_RENDER_TARGET_FLUSH))); 6082 } 6083 6084 /* PIPE_CONTROL page workarounds ------------------------------------- */ 6085 6086 if (GEN_GEN <= 8 && (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE)) { 6087 /* From the PIPE_CONTROL page itself: 6088 * 6089 * "IVB, HSW, BDW 6090 * Restriction: Pipe_control with CS-stall bit set must be issued 6091 * before a pipe-control command that has the State Cache 6092 * Invalidate bit set." 6093 */ 6094 flags |= PIPE_CONTROL_CS_STALL; 6095 } 6096 6097 if (flags & PIPE_CONTROL_FLUSH_LLC) { 6098 /* From the PIPE_CONTROL instruction table, bit 26 (Flush LLC): 6099 * 6100 * "Project: ALL 6101 * SW must always program Post-Sync Operation to "Write Immediate 6102 * Data" when Flush LLC is set." 6103 * 6104 * For now, we just require the caller to do it. 6105 */ 6106 assert(flags & PIPE_CONTROL_WRITE_IMMEDIATE); 6107 } 6108 6109 /* "Post-Sync Operation" workarounds -------------------------------- */ 6110 6111 /* Project: All / Argument: Global Snapshot Count Reset [19] 6112 * 6113 * "This bit must not be exercised on any product. 6114 * Requires stall bit ([20] of DW1) set." 6115 * 6116 * We don't use this, so we just assert that it isn't used. The 6117 * PIPE_CONTROL instruction page indicates that they intended this 6118 * as a debug feature and don't think it is useful in production, 6119 * but it may actually be usable, should we ever want to. 6120 */ 6121 assert((flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) == 0); 6122 6123 if (flags & (PIPE_CONTROL_MEDIA_STATE_CLEAR | 6124 PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE)) { 6125 /* Project: All / Arguments: 6126 * 6127 * - Generic Media State Clear [16] 6128 * - Indirect State Pointers Disable [16] 6129 * 6130 * "Requires stall bit ([20] of DW1) set." 6131 * 6132 * Also, the PIPE_CONTROL instruction table, bit 16 (Generic Media 6133 * State Clear) says: 6134 * 6135 * "PIPECONTROL command with “Command Streamer Stall Enable” must be 6136 * programmed prior to programming a PIPECONTROL command with "Media 6137 * State Clear" set in GPGPU mode of operation" 6138 * 6139 * This is a subset of the earlier rule, so there's nothing to do. 6140 */ 6141 flags |= PIPE_CONTROL_CS_STALL; 6142 } 6143 6144 if (flags & PIPE_CONTROL_STORE_DATA_INDEX) { 6145 /* Project: All / Argument: Store Data Index 6146 * 6147 * "Post-Sync Operation ([15:14] of DW1) must be set to something other 6148 * than '0'." 6149 * 6150 * For now, we just assert that the caller does this. We might want to 6151 * automatically add a write to the workaround BO... 6152 */ 6153 assert(non_lri_post_sync_flags != 0); 6154 } 6155 6156 if (flags & PIPE_CONTROL_SYNC_GFDT) { 6157 /* Project: All / Argument: Sync GFDT 6158 * 6159 * "Post-Sync Operation ([15:14] of DW1) must be set to something other 6160 * than '0' or 0x2520[13] must be set." 6161 * 6162 * For now, we just assert that the caller does this. 6163 */ 6164 assert(non_lri_post_sync_flags != 0); 6165 } 6166 6167 if (flags & PIPE_CONTROL_TLB_INVALIDATE) { 6168 /* Project: IVB+ / Argument: TLB inv 6169 * 6170 * "Requires stall bit ([20] of DW1) set." 6171 * 6172 * Also, from the PIPE_CONTROL instruction table: 6173 * 6174 * "Project: SKL+ 6175 * Post Sync Operation or CS stall must be set to ensure a TLB 6176 * invalidation occurs. Otherwise no cycle will occur to the TLB 6177 * cache to invalidate." 6178 * 6179 * This is not a subset of the earlier rule, so there's nothing to do. 6180 */ 6181 flags |= PIPE_CONTROL_CS_STALL; 6182 } 6183 6184 if (GEN_GEN == 9 && devinfo->gt == 4) { 6185 /* TODO: The big Skylake GT4 post sync op workaround */ 6186 } 6187 6188 /* "GPGPU specific workarounds" (both post-sync and flush) ------------ */ 6189 6190 if (IS_COMPUTE_PIPELINE(batch)) { 6191 if (GEN_GEN >= 9 && (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE)) { 6192 /* Project: SKL+ / Argument: Tex Invalidate 6193 * "Requires stall bit ([20] of DW) set for all GPGPU Workloads." 6194 */ 6195 flags |= PIPE_CONTROL_CS_STALL; 6196 } 6197 6198 if (GEN_GEN == 8 && (post_sync_flags || 6199 (flags & (PIPE_CONTROL_NOTIFY_ENABLE | 6200 PIPE_CONTROL_DEPTH_STALL | 6201 PIPE_CONTROL_RENDER_TARGET_FLUSH | 6202 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 6203 PIPE_CONTROL_DATA_CACHE_FLUSH)))) { 6204 /* Project: BDW / Arguments: 6205 * 6206 * - LRI Post Sync Operation [23] 6207 * - Post Sync Op [15:14] 6208 * - Notify En [8] 6209 * - Depth Stall [13] 6210 * - Render Target Cache Flush [12] 6211 * - Depth Cache Flush [0] 6212 * - DC Flush Enable [5] 6213 * 6214 * "Requires stall bit ([20] of DW) set for all GPGPU and Media 6215 * Workloads." 6216 */ 6217 flags |= PIPE_CONTROL_CS_STALL; 6218 6219 /* Also, from the PIPE_CONTROL instruction table, bit 20: 6220 * 6221 * "Project: BDW 6222 * This bit must be always set when PIPE_CONTROL command is 6223 * programmed by GPGPU and MEDIA workloads, except for the cases 6224 * when only Read Only Cache Invalidation bits are set (State 6225 * Cache Invalidation Enable, Instruction cache Invalidation 6226 * Enable, Texture Cache Invalidation Enable, Constant Cache 6227 * Invalidation Enable). This is to WA FFDOP CG issue, this WA 6228 * need not implemented when FF_DOP_CG is disable via "Fixed 6229 * Function DOP Clock Gate Disable" bit in RC_PSMI_CTRL register." 6230 * 6231 * It sounds like we could avoid CS stalls in some cases, but we 6232 * don't currently bother. This list isn't exactly the list above, 6233 * either... 6234 */ 6235 } 6236 } 6237 6238 /* "Stall" workarounds ---------------------------------------------- 6239 * These have to come after the earlier ones because we may have added 6240 * some additional CS stalls above. 6241 */ 6242 6243 if (GEN_GEN < 9 && (flags & PIPE_CONTROL_CS_STALL)) { 6244 /* Project: PRE-SKL, VLV, CHV 6245 * 6246 * "[All Stepping][All SKUs]: 6247 * 6248 * One of the following must also be set: 6249 * 6250 * - Render Target Cache Flush Enable ([12] of DW1) 6251 * - Depth Cache Flush Enable ([0] of DW1) 6252 * - Stall at Pixel Scoreboard ([1] of DW1) 6253 * - Depth Stall ([13] of DW1) 6254 * - Post-Sync Operation ([13] of DW1) 6255 * - DC Flush Enable ([5] of DW1)" 6256 * 6257 * If we don't already have one of those bits set, we choose to add 6258 * "Stall at Pixel Scoreboard". Some of the other bits require a 6259 * CS stall as a workaround (see above), which would send us into 6260 * an infinite recursion of PIPE_CONTROLs. "Stall at Pixel Scoreboard" 6261 * appears to be safe, so we choose that. 6262 */ 6263 const uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH | 6264 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 6265 PIPE_CONTROL_WRITE_IMMEDIATE | 6266 PIPE_CONTROL_WRITE_DEPTH_COUNT | 6267 PIPE_CONTROL_WRITE_TIMESTAMP | 6268 PIPE_CONTROL_STALL_AT_SCOREBOARD | 6269 PIPE_CONTROL_DEPTH_STALL | 6270 PIPE_CONTROL_DATA_CACHE_FLUSH; 6271 if (!(flags & wa_bits)) 6272 flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; 6273 } 6274 6275 /* Emit --------------------------------------------------------------- */ 6276 6277 iris_emit_cmd(batch, GENX(PIPE_CONTROL), pc) { 6278 pc.LRIPostSyncOperation = NoLRIOperation; 6279 pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE; 6280 pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH; 6281 pc.StoreDataIndex = 0; 6282 pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL; 6283 pc.GlobalSnapshotCountReset = 6284 flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET; 6285 pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE; 6286 pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR; 6287 pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD; 6288 pc.RenderTargetCacheFlushEnable = 6289 flags & PIPE_CONTROL_RENDER_TARGET_FLUSH; 6290 pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH; 6291 pc.StateCacheInvalidationEnable = 6292 flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE; 6293 pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE; 6294 pc.ConstantCacheInvalidationEnable = 6295 flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE; 6296 pc.PostSyncOperation = flags_to_post_sync_op(flags); 6297 pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL; 6298 pc.InstructionCacheInvalidateEnable = 6299 flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE; 6300 pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE; 6301 pc.IndirectStatePointersDisable = 6302 flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE; 6303 pc.TextureCacheInvalidationEnable = 6304 flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 6305 pc.Address = rw_bo(bo, offset); 6306 pc.ImmediateData = imm; 6307 } 6308} 6309 6310void 6311genX(emit_urb_setup)(struct iris_context *ice, 6312 struct iris_batch *batch, 6313 const unsigned size[4], 6314 bool tess_present, bool gs_present) 6315{ 6316 const struct gen_device_info *devinfo = &batch->screen->devinfo; 6317 const unsigned push_size_kB = 32; 6318 unsigned entries[4]; 6319 unsigned start[4]; 6320 6321 ice->shaders.last_vs_entry_size = size[MESA_SHADER_VERTEX]; 6322 6323 gen_get_urb_config(devinfo, 1024 * push_size_kB, 6324 1024 * ice->shaders.urb_size, 6325 tess_present, gs_present, 6326 size, entries, start); 6327 6328 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { 6329 iris_emit_cmd(batch, GENX(3DSTATE_URB_VS), urb) { 6330 urb._3DCommandSubOpcode += i; 6331 urb.VSURBStartingAddress = start[i]; 6332 urb.VSURBEntryAllocationSize = size[i] - 1; 6333 urb.VSNumberofURBEntries = entries[i]; 6334 } 6335 } 6336} 6337 6338#if GEN_GEN == 9 6339/** 6340 * Preemption on Gen9 has to be enabled or disabled in various cases. 6341 * 6342 * See these workarounds for preemption: 6343 * - WaDisableMidObjectPreemptionForGSLineStripAdj 6344 * - WaDisableMidObjectPreemptionForTrifanOrPolygon 6345 * - WaDisableMidObjectPreemptionForLineLoop 6346 * - WA#0798 6347 * 6348 * We don't put this in the vtable because it's only used on Gen9. 6349 */ 6350void 6351gen9_toggle_preemption(struct iris_context *ice, 6352 struct iris_batch *batch, 6353 const struct pipe_draw_info *draw) 6354{ 6355 struct iris_genx_state *genx = ice->state.genx; 6356 bool object_preemption = true; 6357 6358 /* WaDisableMidObjectPreemptionForGSLineStripAdj 6359 * 6360 * "WA: Disable mid-draw preemption when draw-call is a linestrip_adj 6361 * and GS is enabled." 6362 */ 6363 if (draw->mode == PIPE_PRIM_LINE_STRIP_ADJACENCY && 6364 ice->shaders.prog[MESA_SHADER_GEOMETRY]) 6365 object_preemption = false; 6366 6367 /* WaDisableMidObjectPreemptionForTrifanOrPolygon 6368 * 6369 * "TriFan miscompare in Execlist Preemption test. Cut index that is 6370 * on a previous context. End the previous, the resume another context 6371 * with a tri-fan or polygon, and the vertex count is corrupted. If we 6372 * prempt again we will cause corruption. 6373 * 6374 * WA: Disable mid-draw preemption when draw-call has a tri-fan." 6375 */ 6376 if (draw->mode == PIPE_PRIM_TRIANGLE_FAN) 6377 object_preemption = false; 6378 6379 /* WaDisableMidObjectPreemptionForLineLoop 6380 * 6381 * "VF Stats Counters Missing a vertex when preemption enabled. 6382 * 6383 * WA: Disable mid-draw preemption when the draw uses a lineloop 6384 * topology." 6385 */ 6386 if (draw->mode == PIPE_PRIM_LINE_LOOP) 6387 object_preemption = false; 6388 6389 /* WA#0798 6390 * 6391 * "VF is corrupting GAFS data when preempted on an instance boundary 6392 * and replayed with instancing enabled. 6393 * 6394 * WA: Disable preemption when using instanceing." 6395 */ 6396 if (draw->instance_count > 1) 6397 object_preemption = false; 6398 6399 if (genx->object_preemption != object_preemption) { 6400 iris_enable_obj_preemption(batch, object_preemption); 6401 genx->object_preemption = object_preemption; 6402 } 6403} 6404#endif 6405 6406void 6407genX(init_state)(struct iris_context *ice) 6408{ 6409 struct pipe_context *ctx = &ice->ctx; 6410 struct iris_screen *screen = (struct iris_screen *)ctx->screen; 6411 6412 ctx->create_blend_state = iris_create_blend_state; 6413 ctx->create_depth_stencil_alpha_state = iris_create_zsa_state; 6414 ctx->create_rasterizer_state = iris_create_rasterizer_state; 6415 ctx->create_sampler_state = iris_create_sampler_state; 6416 ctx->create_sampler_view = iris_create_sampler_view; 6417 ctx->create_surface = iris_create_surface; 6418 ctx->create_vertex_elements_state = iris_create_vertex_elements; 6419 ctx->bind_blend_state = iris_bind_blend_state; 6420 ctx->bind_depth_stencil_alpha_state = iris_bind_zsa_state; 6421 ctx->bind_sampler_states = iris_bind_sampler_states; 6422 ctx->bind_rasterizer_state = iris_bind_rasterizer_state; 6423 ctx->bind_vertex_elements_state = iris_bind_vertex_elements_state; 6424 ctx->delete_blend_state = iris_delete_state; 6425 ctx->delete_depth_stencil_alpha_state = iris_delete_state; 6426 ctx->delete_rasterizer_state = iris_delete_state; 6427 ctx->delete_sampler_state = iris_delete_state; 6428 ctx->delete_vertex_elements_state = iris_delete_state; 6429 ctx->set_blend_color = iris_set_blend_color; 6430 ctx->set_clip_state = iris_set_clip_state; 6431 ctx->set_constant_buffer = iris_set_constant_buffer; 6432 ctx->set_shader_buffers = iris_set_shader_buffers; 6433 ctx->set_shader_images = iris_set_shader_images; 6434 ctx->set_sampler_views = iris_set_sampler_views; 6435 ctx->set_tess_state = iris_set_tess_state; 6436 ctx->set_framebuffer_state = iris_set_framebuffer_state; 6437 ctx->set_polygon_stipple = iris_set_polygon_stipple; 6438 ctx->set_sample_mask = iris_set_sample_mask; 6439 ctx->set_scissor_states = iris_set_scissor_states; 6440 ctx->set_stencil_ref = iris_set_stencil_ref; 6441 ctx->set_vertex_buffers = iris_set_vertex_buffers; 6442 ctx->set_viewport_states = iris_set_viewport_states; 6443 ctx->sampler_view_destroy = iris_sampler_view_destroy; 6444 ctx->surface_destroy = iris_surface_destroy; 6445 ctx->draw_vbo = iris_draw_vbo; 6446 ctx->launch_grid = iris_launch_grid; 6447 ctx->create_stream_output_target = iris_create_stream_output_target; 6448 ctx->stream_output_target_destroy = iris_stream_output_target_destroy; 6449 ctx->set_stream_output_targets = iris_set_stream_output_targets; 6450 6451 ice->vtbl.destroy_state = iris_destroy_state; 6452 ice->vtbl.init_render_context = iris_init_render_context; 6453 ice->vtbl.init_compute_context = iris_init_compute_context; 6454 ice->vtbl.upload_render_state = iris_upload_render_state; 6455 ice->vtbl.update_surface_base_address = iris_update_surface_base_address; 6456 ice->vtbl.upload_compute_state = iris_upload_compute_state; 6457 ice->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control; 6458 ice->vtbl.rebind_buffer = iris_rebind_buffer; 6459 ice->vtbl.load_register_reg32 = iris_load_register_reg32; 6460 ice->vtbl.load_register_reg64 = iris_load_register_reg64; 6461 ice->vtbl.load_register_imm32 = iris_load_register_imm32; 6462 ice->vtbl.load_register_imm64 = iris_load_register_imm64; 6463 ice->vtbl.load_register_mem32 = iris_load_register_mem32; 6464 ice->vtbl.load_register_mem64 = iris_load_register_mem64; 6465 ice->vtbl.store_register_mem32 = iris_store_register_mem32; 6466 ice->vtbl.store_register_mem64 = iris_store_register_mem64; 6467 ice->vtbl.store_data_imm32 = iris_store_data_imm32; 6468 ice->vtbl.store_data_imm64 = iris_store_data_imm64; 6469 ice->vtbl.copy_mem_mem = iris_copy_mem_mem; 6470 ice->vtbl.derived_program_state_size = iris_derived_program_state_size; 6471 ice->vtbl.store_derived_program_state = iris_store_derived_program_state; 6472 ice->vtbl.create_so_decl_list = iris_create_so_decl_list; 6473 ice->vtbl.populate_vs_key = iris_populate_vs_key; 6474 ice->vtbl.populate_tcs_key = iris_populate_tcs_key; 6475 ice->vtbl.populate_tes_key = iris_populate_tes_key; 6476 ice->vtbl.populate_gs_key = iris_populate_gs_key; 6477 ice->vtbl.populate_fs_key = iris_populate_fs_key; 6478 ice->vtbl.populate_cs_key = iris_populate_cs_key; 6479 ice->vtbl.mocs = mocs; 6480 6481 ice->state.dirty = ~0ull; 6482 6483 ice->state.statistics_counters_enabled = true; 6484 6485 ice->state.sample_mask = 0xffff; 6486 ice->state.num_viewports = 1; 6487 ice->state.genx = calloc(1, sizeof(struct iris_genx_state)); 6488 6489 /* Make a 1x1x1 null surface for unbound textures */ 6490 void *null_surf_map = 6491 upload_state(ice->state.surface_uploader, &ice->state.unbound_tex, 6492 4 * GENX(RENDER_SURFACE_STATE_length), 64); 6493 isl_null_fill_state(&screen->isl_dev, null_surf_map, isl_extent3d(1, 1, 1)); 6494 ice->state.unbound_tex.offset += 6495 iris_bo_offset_from_base_address(iris_resource_bo(ice->state.unbound_tex.res)); 6496 6497 /* Default all scissor rectangles to be empty regions. */ 6498 for (int i = 0; i < IRIS_MAX_VIEWPORTS; i++) { 6499 ice->state.scissors[i] = (struct pipe_scissor_state) { 6500 .minx = 1, .maxx = 0, .miny = 1, .maxy = 0, 6501 }; 6502 } 6503} 6504