19f464c52Smaya/* 29f464c52Smaya * Copyright © 2017 Intel Corporation 39f464c52Smaya * 49f464c52Smaya * Permission is hereby granted, free of charge, to any person obtaining a 59f464c52Smaya * copy of this software and associated documentation files (the "Software"), 69f464c52Smaya * to deal in the Software without restriction, including without limitation 79f464c52Smaya * the rights to use, copy, modify, merge, publish, distribute, sublicense, 89f464c52Smaya * and/or sell copies of the Software, and to permit persons to whom the 99f464c52Smaya * Software is furnished to do so, subject to the following conditions: 109f464c52Smaya * 119f464c52Smaya * The above copyright notice and this permission notice shall be included 129f464c52Smaya * in all copies or substantial portions of the Software. 139f464c52Smaya * 149f464c52Smaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 159f464c52Smaya * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 169f464c52Smaya * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 179f464c52Smaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 189f464c52Smaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 199f464c52Smaya * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 209f464c52Smaya * DEALINGS IN THE SOFTWARE. 219f464c52Smaya */ 229f464c52Smaya 239f464c52Smaya/** 249f464c52Smaya * @file iris_state.c 259f464c52Smaya * 269f464c52Smaya * ============================= GENXML CODE ============================= 279f464c52Smaya * [This file is compiled once per generation.] 289f464c52Smaya * ======================================================================= 299f464c52Smaya * 309f464c52Smaya * This is the main state upload code. 319f464c52Smaya * 329f464c52Smaya * Gallium uses Constant State Objects, or CSOs, for most state. Large, 339f464c52Smaya * complex, or highly reusable state can be created once, and bound and 349f464c52Smaya * rebound multiple times. This is modeled with the pipe->create_*_state() 359f464c52Smaya * and pipe->bind_*_state() hooks. Highly dynamic or inexpensive state is 369f464c52Smaya * streamed out on the fly, via pipe->set_*_state() hooks. 379f464c52Smaya * 389f464c52Smaya * OpenGL involves frequently mutating context state, which is mirrored in 399f464c52Smaya * core Mesa by highly mutable data structures. However, most applications 409f464c52Smaya * typically draw the same things over and over - from frame to frame, most 419f464c52Smaya * of the same objects are still visible and need to be redrawn. So, rather 429f464c52Smaya * than inventing new state all the time, applications usually mutate to swap 439f464c52Smaya * between known states that we've seen before. 449f464c52Smaya * 459f464c52Smaya * Gallium isolates us from this mutation by tracking API state, and 469f464c52Smaya * distilling it into a set of Constant State Objects, or CSOs. Large, 479f464c52Smaya * complex, or typically reusable state can be created once, then reused 489f464c52Smaya * multiple times. Drivers can create and store their own associated data. 499f464c52Smaya * This create/bind model corresponds to the pipe->create_*_state() and 509f464c52Smaya * pipe->bind_*_state() driver hooks. 519f464c52Smaya * 529f464c52Smaya * Some state is cheap to create, or expected to be highly dynamic. Rather 539f464c52Smaya * than creating and caching piles of CSOs for these, Gallium simply streams 549f464c52Smaya * them out, via the pipe->set_*_state() driver hooks. 559f464c52Smaya * 569f464c52Smaya * To reduce draw time overhead, we try to compute as much state at create 579f464c52Smaya * time as possible. Wherever possible, we translate the Gallium pipe state 589f464c52Smaya * to 3DSTATE commands, and store those commands in the CSO. At draw time, 599f464c52Smaya * we can simply memcpy them into a batch buffer. 609f464c52Smaya * 619f464c52Smaya * No hardware matches the abstraction perfectly, so some commands require 629f464c52Smaya * information from multiple CSOs. In this case, we can store two copies 639f464c52Smaya * of the packet (one in each CSO), and simply | together their DWords at 649f464c52Smaya * draw time. Sometimes the second set is trivial (one or two fields), so 659f464c52Smaya * we simply pack it at draw time. 669f464c52Smaya * 679f464c52Smaya * There are two main components in the file below. First, the CSO hooks 689f464c52Smaya * create/bind/track state. The second are the draw-time upload functions, 699f464c52Smaya * iris_upload_render_state() and iris_upload_compute_state(), which read 709f464c52Smaya * the context state and emit the commands into the actual batch. 719f464c52Smaya */ 729f464c52Smaya 739f464c52Smaya#include <stdio.h> 749f464c52Smaya#include <errno.h> 759f464c52Smaya 769f464c52Smaya#if HAVE_VALGRIND 779f464c52Smaya#include <valgrind.h> 789f464c52Smaya#include <memcheck.h> 799f464c52Smaya#define VG(x) x 809f464c52Smaya#ifdef DEBUG 819f464c52Smaya#define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x)) 829f464c52Smaya#endif 839f464c52Smaya#else 849f464c52Smaya#define VG(x) 859f464c52Smaya#endif 869f464c52Smaya 879f464c52Smaya#include "pipe/p_defines.h" 889f464c52Smaya#include "pipe/p_state.h" 899f464c52Smaya#include "pipe/p_context.h" 909f464c52Smaya#include "pipe/p_screen.h" 919f464c52Smaya#include "util/u_dual_blend.h" 929f464c52Smaya#include "util/u_inlines.h" 937ec681f3Smrg#include "util/format/u_format.h" 949f464c52Smaya#include "util/u_framebuffer.h" 959f464c52Smaya#include "util/u_transfer.h" 969f464c52Smaya#include "util/u_upload_mgr.h" 979f464c52Smaya#include "util/u_viewport.h" 987ec681f3Smrg#include "util/u_memory.h" 999f464c52Smaya#include "drm-uapi/i915_drm.h" 1009f464c52Smaya#include "nir.h" 1019f464c52Smaya#include "intel/compiler/brw_compiler.h" 1027ec681f3Smrg#include "intel/common/intel_aux_map.h" 1037ec681f3Smrg#include "intel/common/intel_l3_config.h" 1047ec681f3Smrg#include "intel/common/intel_sample_positions.h" 1059f464c52Smaya#include "iris_batch.h" 1069f464c52Smaya#include "iris_context.h" 1079f464c52Smaya#include "iris_defines.h" 1089f464c52Smaya#include "iris_pipe.h" 1099f464c52Smaya#include "iris_resource.h" 1109f464c52Smaya 1117ec681f3Smrg#include "iris_genx_macros.h" 1127ec681f3Smrg#include "intel/common/intel_guardband.h" 1139f464c52Smaya 1149f464c52Smaya/** 1159f464c52Smaya * Statically assert that PIPE_* enums match the hardware packets. 1169f464c52Smaya * (As long as they match, we don't need to translate them.) 1179f464c52Smaya */ 1189f464c52SmayaUNUSED static void pipe_asserts() 1199f464c52Smaya{ 1209f464c52Smaya#define PIPE_ASSERT(x) STATIC_ASSERT((int)x) 1219f464c52Smaya 1229f464c52Smaya /* pipe_logicop happens to match the hardware. */ 1239f464c52Smaya PIPE_ASSERT(PIPE_LOGICOP_CLEAR == LOGICOP_CLEAR); 1249f464c52Smaya PIPE_ASSERT(PIPE_LOGICOP_NOR == LOGICOP_NOR); 1259f464c52Smaya PIPE_ASSERT(PIPE_LOGICOP_AND_INVERTED == LOGICOP_AND_INVERTED); 1269f464c52Smaya PIPE_ASSERT(PIPE_LOGICOP_COPY_INVERTED == LOGICOP_COPY_INVERTED); 1279f464c52Smaya PIPE_ASSERT(PIPE_LOGICOP_AND_REVERSE == LOGICOP_AND_REVERSE); 1289f464c52Smaya PIPE_ASSERT(PIPE_LOGICOP_INVERT == LOGICOP_INVERT); 1299f464c52Smaya PIPE_ASSERT(PIPE_LOGICOP_XOR == LOGICOP_XOR); 1309f464c52Smaya PIPE_ASSERT(PIPE_LOGICOP_NAND == LOGICOP_NAND); 1319f464c52Smaya PIPE_ASSERT(PIPE_LOGICOP_AND == LOGICOP_AND); 1329f464c52Smaya PIPE_ASSERT(PIPE_LOGICOP_EQUIV == LOGICOP_EQUIV); 1339f464c52Smaya PIPE_ASSERT(PIPE_LOGICOP_NOOP == LOGICOP_NOOP); 1349f464c52Smaya PIPE_ASSERT(PIPE_LOGICOP_OR_INVERTED == LOGICOP_OR_INVERTED); 1359f464c52Smaya PIPE_ASSERT(PIPE_LOGICOP_COPY == LOGICOP_COPY); 1369f464c52Smaya PIPE_ASSERT(PIPE_LOGICOP_OR_REVERSE == LOGICOP_OR_REVERSE); 1379f464c52Smaya PIPE_ASSERT(PIPE_LOGICOP_OR == LOGICOP_OR); 1389f464c52Smaya PIPE_ASSERT(PIPE_LOGICOP_SET == LOGICOP_SET); 1399f464c52Smaya 1409f464c52Smaya /* pipe_blend_func happens to match the hardware. */ 1419f464c52Smaya PIPE_ASSERT(PIPE_BLENDFACTOR_ONE == BLENDFACTOR_ONE); 1429f464c52Smaya PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_COLOR == BLENDFACTOR_SRC_COLOR); 1439f464c52Smaya PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_ALPHA == BLENDFACTOR_SRC_ALPHA); 1449f464c52Smaya PIPE_ASSERT(PIPE_BLENDFACTOR_DST_ALPHA == BLENDFACTOR_DST_ALPHA); 1459f464c52Smaya PIPE_ASSERT(PIPE_BLENDFACTOR_DST_COLOR == BLENDFACTOR_DST_COLOR); 1469f464c52Smaya PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE == BLENDFACTOR_SRC_ALPHA_SATURATE); 1479f464c52Smaya PIPE_ASSERT(PIPE_BLENDFACTOR_CONST_COLOR == BLENDFACTOR_CONST_COLOR); 1489f464c52Smaya PIPE_ASSERT(PIPE_BLENDFACTOR_CONST_ALPHA == BLENDFACTOR_CONST_ALPHA); 1499f464c52Smaya PIPE_ASSERT(PIPE_BLENDFACTOR_SRC1_COLOR == BLENDFACTOR_SRC1_COLOR); 1509f464c52Smaya PIPE_ASSERT(PIPE_BLENDFACTOR_SRC1_ALPHA == BLENDFACTOR_SRC1_ALPHA); 1519f464c52Smaya PIPE_ASSERT(PIPE_BLENDFACTOR_ZERO == BLENDFACTOR_ZERO); 1529f464c52Smaya PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC_COLOR == BLENDFACTOR_INV_SRC_COLOR); 1539f464c52Smaya PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC_ALPHA == BLENDFACTOR_INV_SRC_ALPHA); 1549f464c52Smaya PIPE_ASSERT(PIPE_BLENDFACTOR_INV_DST_ALPHA == BLENDFACTOR_INV_DST_ALPHA); 1559f464c52Smaya PIPE_ASSERT(PIPE_BLENDFACTOR_INV_DST_COLOR == BLENDFACTOR_INV_DST_COLOR); 1569f464c52Smaya PIPE_ASSERT(PIPE_BLENDFACTOR_INV_CONST_COLOR == BLENDFACTOR_INV_CONST_COLOR); 1579f464c52Smaya PIPE_ASSERT(PIPE_BLENDFACTOR_INV_CONST_ALPHA == BLENDFACTOR_INV_CONST_ALPHA); 1589f464c52Smaya PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC1_COLOR == BLENDFACTOR_INV_SRC1_COLOR); 1599f464c52Smaya PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC1_ALPHA == BLENDFACTOR_INV_SRC1_ALPHA); 1609f464c52Smaya 1619f464c52Smaya /* pipe_blend_func happens to match the hardware. */ 1629f464c52Smaya PIPE_ASSERT(PIPE_BLEND_ADD == BLENDFUNCTION_ADD); 1639f464c52Smaya PIPE_ASSERT(PIPE_BLEND_SUBTRACT == BLENDFUNCTION_SUBTRACT); 1649f464c52Smaya PIPE_ASSERT(PIPE_BLEND_REVERSE_SUBTRACT == BLENDFUNCTION_REVERSE_SUBTRACT); 1659f464c52Smaya PIPE_ASSERT(PIPE_BLEND_MIN == BLENDFUNCTION_MIN); 1669f464c52Smaya PIPE_ASSERT(PIPE_BLEND_MAX == BLENDFUNCTION_MAX); 1679f464c52Smaya 1689f464c52Smaya /* pipe_stencil_op happens to match the hardware. */ 1699f464c52Smaya PIPE_ASSERT(PIPE_STENCIL_OP_KEEP == STENCILOP_KEEP); 1709f464c52Smaya PIPE_ASSERT(PIPE_STENCIL_OP_ZERO == STENCILOP_ZERO); 1719f464c52Smaya PIPE_ASSERT(PIPE_STENCIL_OP_REPLACE == STENCILOP_REPLACE); 1729f464c52Smaya PIPE_ASSERT(PIPE_STENCIL_OP_INCR == STENCILOP_INCRSAT); 1739f464c52Smaya PIPE_ASSERT(PIPE_STENCIL_OP_DECR == STENCILOP_DECRSAT); 1749f464c52Smaya PIPE_ASSERT(PIPE_STENCIL_OP_INCR_WRAP == STENCILOP_INCR); 1759f464c52Smaya PIPE_ASSERT(PIPE_STENCIL_OP_DECR_WRAP == STENCILOP_DECR); 1769f464c52Smaya PIPE_ASSERT(PIPE_STENCIL_OP_INVERT == STENCILOP_INVERT); 1779f464c52Smaya 1789f464c52Smaya /* pipe_sprite_coord_mode happens to match 3DSTATE_SBE */ 1799f464c52Smaya PIPE_ASSERT(PIPE_SPRITE_COORD_UPPER_LEFT == UPPERLEFT); 1809f464c52Smaya PIPE_ASSERT(PIPE_SPRITE_COORD_LOWER_LEFT == LOWERLEFT); 1819f464c52Smaya#undef PIPE_ASSERT 1829f464c52Smaya} 1839f464c52Smaya 1849f464c52Smayastatic unsigned 1859f464c52Smayatranslate_prim_type(enum pipe_prim_type prim, uint8_t verts_per_patch) 1869f464c52Smaya{ 1879f464c52Smaya static const unsigned map[] = { 1889f464c52Smaya [PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST, 1899f464c52Smaya [PIPE_PRIM_LINES] = _3DPRIM_LINELIST, 1909f464c52Smaya [PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP, 1919f464c52Smaya [PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP, 1929f464c52Smaya [PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST, 1939f464c52Smaya [PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, 1949f464c52Smaya [PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN, 1959f464c52Smaya [PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST, 1969f464c52Smaya [PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP, 1979f464c52Smaya [PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON, 1989f464c52Smaya [PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ, 1999f464c52Smaya [PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, 2009f464c52Smaya [PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ, 2019f464c52Smaya [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, 2029f464c52Smaya [PIPE_PRIM_PATCHES] = _3DPRIM_PATCHLIST_1 - 1, 2039f464c52Smaya }; 2049f464c52Smaya 2059f464c52Smaya return map[prim] + (prim == PIPE_PRIM_PATCHES ? verts_per_patch : 0); 2069f464c52Smaya} 2079f464c52Smaya 2089f464c52Smayastatic unsigned 2099f464c52Smayatranslate_compare_func(enum pipe_compare_func pipe_func) 2109f464c52Smaya{ 2119f464c52Smaya static const unsigned map[] = { 2129f464c52Smaya [PIPE_FUNC_NEVER] = COMPAREFUNCTION_NEVER, 2139f464c52Smaya [PIPE_FUNC_LESS] = COMPAREFUNCTION_LESS, 2149f464c52Smaya [PIPE_FUNC_EQUAL] = COMPAREFUNCTION_EQUAL, 2159f464c52Smaya [PIPE_FUNC_LEQUAL] = COMPAREFUNCTION_LEQUAL, 2169f464c52Smaya [PIPE_FUNC_GREATER] = COMPAREFUNCTION_GREATER, 2179f464c52Smaya [PIPE_FUNC_NOTEQUAL] = COMPAREFUNCTION_NOTEQUAL, 2189f464c52Smaya [PIPE_FUNC_GEQUAL] = COMPAREFUNCTION_GEQUAL, 2199f464c52Smaya [PIPE_FUNC_ALWAYS] = COMPAREFUNCTION_ALWAYS, 2209f464c52Smaya }; 2219f464c52Smaya return map[pipe_func]; 2229f464c52Smaya} 2239f464c52Smaya 2249f464c52Smayastatic unsigned 2259f464c52Smayatranslate_shadow_func(enum pipe_compare_func pipe_func) 2269f464c52Smaya{ 2279f464c52Smaya /* Gallium specifies the result of shadow comparisons as: 2289f464c52Smaya * 2299f464c52Smaya * 1 if ref <op> texel, 2309f464c52Smaya * 0 otherwise. 2319f464c52Smaya * 2329f464c52Smaya * The hardware does: 2339f464c52Smaya * 2349f464c52Smaya * 0 if texel <op> ref, 2359f464c52Smaya * 1 otherwise. 2369f464c52Smaya * 2379f464c52Smaya * So we need to flip the operator and also negate. 2389f464c52Smaya */ 2399f464c52Smaya static const unsigned map[] = { 2407ec681f3Smrg [PIPE_FUNC_NEVER] = PREFILTEROP_ALWAYS, 2417ec681f3Smrg [PIPE_FUNC_LESS] = PREFILTEROP_LEQUAL, 2427ec681f3Smrg [PIPE_FUNC_EQUAL] = PREFILTEROP_NOTEQUAL, 2437ec681f3Smrg [PIPE_FUNC_LEQUAL] = PREFILTEROP_LESS, 2447ec681f3Smrg [PIPE_FUNC_GREATER] = PREFILTEROP_GEQUAL, 2457ec681f3Smrg [PIPE_FUNC_NOTEQUAL] = PREFILTEROP_EQUAL, 2467ec681f3Smrg [PIPE_FUNC_GEQUAL] = PREFILTEROP_GREATER, 2477ec681f3Smrg [PIPE_FUNC_ALWAYS] = PREFILTEROP_NEVER, 2489f464c52Smaya }; 2499f464c52Smaya return map[pipe_func]; 2509f464c52Smaya} 2519f464c52Smaya 2529f464c52Smayastatic unsigned 2539f464c52Smayatranslate_cull_mode(unsigned pipe_face) 2549f464c52Smaya{ 2559f464c52Smaya static const unsigned map[4] = { 2569f464c52Smaya [PIPE_FACE_NONE] = CULLMODE_NONE, 2579f464c52Smaya [PIPE_FACE_FRONT] = CULLMODE_FRONT, 2589f464c52Smaya [PIPE_FACE_BACK] = CULLMODE_BACK, 2599f464c52Smaya [PIPE_FACE_FRONT_AND_BACK] = CULLMODE_BOTH, 2609f464c52Smaya }; 2619f464c52Smaya return map[pipe_face]; 2629f464c52Smaya} 2639f464c52Smaya 2649f464c52Smayastatic unsigned 2659f464c52Smayatranslate_fill_mode(unsigned pipe_polymode) 2669f464c52Smaya{ 2679f464c52Smaya static const unsigned map[4] = { 2689f464c52Smaya [PIPE_POLYGON_MODE_FILL] = FILL_MODE_SOLID, 2699f464c52Smaya [PIPE_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME, 2709f464c52Smaya [PIPE_POLYGON_MODE_POINT] = FILL_MODE_POINT, 2719f464c52Smaya [PIPE_POLYGON_MODE_FILL_RECTANGLE] = FILL_MODE_SOLID, 2729f464c52Smaya }; 2739f464c52Smaya return map[pipe_polymode]; 2749f464c52Smaya} 2759f464c52Smaya 2769f464c52Smayastatic unsigned 2779f464c52Smayatranslate_mip_filter(enum pipe_tex_mipfilter pipe_mip) 2789f464c52Smaya{ 2799f464c52Smaya static const unsigned map[] = { 2809f464c52Smaya [PIPE_TEX_MIPFILTER_NEAREST] = MIPFILTER_NEAREST, 2819f464c52Smaya [PIPE_TEX_MIPFILTER_LINEAR] = MIPFILTER_LINEAR, 2829f464c52Smaya [PIPE_TEX_MIPFILTER_NONE] = MIPFILTER_NONE, 2839f464c52Smaya }; 2849f464c52Smaya return map[pipe_mip]; 2859f464c52Smaya} 2869f464c52Smaya 2879f464c52Smayastatic uint32_t 2889f464c52Smayatranslate_wrap(unsigned pipe_wrap) 2899f464c52Smaya{ 2909f464c52Smaya static const unsigned map[] = { 2919f464c52Smaya [PIPE_TEX_WRAP_REPEAT] = TCM_WRAP, 2929f464c52Smaya [PIPE_TEX_WRAP_CLAMP] = TCM_HALF_BORDER, 2939f464c52Smaya [PIPE_TEX_WRAP_CLAMP_TO_EDGE] = TCM_CLAMP, 2949f464c52Smaya [PIPE_TEX_WRAP_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER, 2959f464c52Smaya [PIPE_TEX_WRAP_MIRROR_REPEAT] = TCM_MIRROR, 2969f464c52Smaya [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE, 2979f464c52Smaya 2989f464c52Smaya /* These are unsupported. */ 2999f464c52Smaya [PIPE_TEX_WRAP_MIRROR_CLAMP] = -1, 3009f464c52Smaya [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER] = -1, 3019f464c52Smaya }; 3029f464c52Smaya return map[pipe_wrap]; 3039f464c52Smaya} 3049f464c52Smaya 3059f464c52Smaya/** 3069f464c52Smaya * Allocate space for some indirect state. 3079f464c52Smaya * 3089f464c52Smaya * Return a pointer to the map (to fill it out) and a state ref (for 3099f464c52Smaya * referring to the state in GPU commands). 3109f464c52Smaya */ 3119f464c52Smayastatic void * 3129f464c52Smayaupload_state(struct u_upload_mgr *uploader, 3139f464c52Smaya struct iris_state_ref *ref, 3149f464c52Smaya unsigned size, 3159f464c52Smaya unsigned alignment) 3169f464c52Smaya{ 3179f464c52Smaya void *p = NULL; 3189f464c52Smaya u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p); 3199f464c52Smaya return p; 3209f464c52Smaya} 3219f464c52Smaya 3229f464c52Smaya/** 3239f464c52Smaya * Stream out temporary/short-lived state. 3249f464c52Smaya * 3259f464c52Smaya * This allocates space, pins the BO, and includes the BO address in the 3269f464c52Smaya * returned offset (which works because all state lives in 32-bit memory 3279f464c52Smaya * zones). 3289f464c52Smaya */ 3299f464c52Smayastatic uint32_t * 3309f464c52Smayastream_state(struct iris_batch *batch, 3319f464c52Smaya struct u_upload_mgr *uploader, 3329f464c52Smaya struct pipe_resource **out_res, 3339f464c52Smaya unsigned size, 3349f464c52Smaya unsigned alignment, 3359f464c52Smaya uint32_t *out_offset) 3369f464c52Smaya{ 3379f464c52Smaya void *ptr = NULL; 3389f464c52Smaya 3399f464c52Smaya u_upload_alloc(uploader, 0, size, alignment, out_offset, out_res, &ptr); 3409f464c52Smaya 3419f464c52Smaya struct iris_bo *bo = iris_resource_bo(*out_res); 3427ec681f3Smrg iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_NONE); 3437ec681f3Smrg 3447ec681f3Smrg iris_record_state_size(batch->state_sizes, 3457ec681f3Smrg bo->address + *out_offset, size); 3469f464c52Smaya 3479f464c52Smaya *out_offset += iris_bo_offset_from_base_address(bo); 3489f464c52Smaya 3499f464c52Smaya return ptr; 3509f464c52Smaya} 3519f464c52Smaya 3529f464c52Smaya/** 3539f464c52Smaya * stream_state() + memcpy. 3549f464c52Smaya */ 3559f464c52Smayastatic uint32_t 3569f464c52Smayaemit_state(struct iris_batch *batch, 3579f464c52Smaya struct u_upload_mgr *uploader, 3589f464c52Smaya struct pipe_resource **out_res, 3599f464c52Smaya const void *data, 3609f464c52Smaya unsigned size, 3619f464c52Smaya unsigned alignment) 3629f464c52Smaya{ 3639f464c52Smaya unsigned offset = 0; 3649f464c52Smaya uint32_t *map = 3659f464c52Smaya stream_state(batch, uploader, out_res, size, alignment, &offset); 3669f464c52Smaya 3679f464c52Smaya if (map) 3689f464c52Smaya memcpy(map, data, size); 3699f464c52Smaya 3709f464c52Smaya return offset; 3719f464c52Smaya} 3729f464c52Smaya 3739f464c52Smaya/** 3749f464c52Smaya * Did field 'x' change between 'old_cso' and 'new_cso'? 3759f464c52Smaya * 3769f464c52Smaya * (If so, we may want to set some dirty flags.) 3779f464c52Smaya */ 3789f464c52Smaya#define cso_changed(x) (!old_cso || (old_cso->x != new_cso->x)) 3799f464c52Smaya#define cso_changed_memcmp(x) \ 3809f464c52Smaya (!old_cso || memcmp(old_cso->x, new_cso->x, sizeof(old_cso->x)) != 0) 3819f464c52Smaya 3829f464c52Smayastatic void 3837ec681f3Smrgflush_before_state_base_change(struct iris_batch *batch) 3849f464c52Smaya{ 3857ec681f3Smrg const struct intel_device_info *devinfo = &batch->screen->devinfo; 3867ec681f3Smrg 3879f464c52Smaya /* Flush before emitting STATE_BASE_ADDRESS. 3889f464c52Smaya * 3899f464c52Smaya * This isn't documented anywhere in the PRM. However, it seems to be 3907ec681f3Smrg * necessary prior to changing the surface state base address. We've 3919f464c52Smaya * seen issues in Vulkan where we get GPU hangs when using multi-level 3929f464c52Smaya * command buffers which clear depth, reset state base address, and then 3939f464c52Smaya * go render stuff. 3949f464c52Smaya * 3959f464c52Smaya * Normally, in GL, we would trust the kernel to do sufficient stalls 3969f464c52Smaya * and flushes prior to executing our batch. However, it doesn't seem 3979f464c52Smaya * as if the kernel's flushing is always sufficient and we don't want to 3989f464c52Smaya * rely on it. 3999f464c52Smaya * 4009f464c52Smaya * We make this an end-of-pipe sync instead of a normal flush because we 4019f464c52Smaya * do not know the current status of the GPU. On Haswell at least, 4029f464c52Smaya * having a fast-clear operation in flight at the same time as a normal 4039f464c52Smaya * rendering operation can cause hangs. Since the kernel's flushing is 4049f464c52Smaya * insufficient, we need to ensure that any rendering operations from 4059f464c52Smaya * other processes are definitely complete before we try to do our own 4069f464c52Smaya * rendering. It's a bit of a big hammer but it appears to work. 4079f464c52Smaya */ 4089f464c52Smaya iris_emit_end_of_pipe_sync(batch, 4097ec681f3Smrg "change STATE_BASE_ADDRESS (flushes)", 4109f464c52Smaya PIPE_CONTROL_RENDER_TARGET_FLUSH | 4119f464c52Smaya PIPE_CONTROL_DEPTH_CACHE_FLUSH | 4127ec681f3Smrg PIPE_CONTROL_DATA_CACHE_FLUSH | 4137ec681f3Smrg /* Wa_1606662791: 4147ec681f3Smrg * 4157ec681f3Smrg * Software must program PIPE_CONTROL command 4167ec681f3Smrg * with "HDC Pipeline Flush" prior to 4177ec681f3Smrg * programming of the below two non-pipeline 4187ec681f3Smrg * state : 4197ec681f3Smrg * * STATE_BASE_ADDRESS 4207ec681f3Smrg * * 3DSTATE_BINDING_TABLE_POOL_ALLOC 4217ec681f3Smrg */ 4227ec681f3Smrg ((GFX_VER == 12 && devinfo->revision == 0 /* A0 */ ? 4237ec681f3Smrg PIPE_CONTROL_FLUSH_HDC : 0))); 4247ec681f3Smrg} 4257ec681f3Smrg 4267ec681f3Smrgstatic void 4277ec681f3Smrgflush_after_state_base_change(struct iris_batch *batch) 4287ec681f3Smrg{ 4297ec681f3Smrg /* After re-setting the surface state base address, we have to do some 4307ec681f3Smrg * cache flusing so that the sampler engine will pick up the new 4317ec681f3Smrg * SURFACE_STATE objects and binding tables. From the Broadwell PRM, 4327ec681f3Smrg * Shared Function > 3D Sampler > State > State Caching (page 96): 4337ec681f3Smrg * 4347ec681f3Smrg * Coherency with system memory in the state cache, like the texture 4357ec681f3Smrg * cache is handled partially by software. It is expected that the 4367ec681f3Smrg * command stream or shader will issue Cache Flush operation or 4377ec681f3Smrg * Cache_Flush sampler message to ensure that the L1 cache remains 4387ec681f3Smrg * coherent with system memory. 4397ec681f3Smrg * 4407ec681f3Smrg * [...] 4417ec681f3Smrg * 4427ec681f3Smrg * Whenever the value of the Dynamic_State_Base_Addr, 4437ec681f3Smrg * Surface_State_Base_Addr are altered, the L1 state cache must be 4447ec681f3Smrg * invalidated to ensure the new surface or sampler state is fetched 4457ec681f3Smrg * from system memory. 4467ec681f3Smrg * 4477ec681f3Smrg * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit 4487ec681f3Smrg * which, according the PIPE_CONTROL instruction documentation in the 4497ec681f3Smrg * Broadwell PRM: 4507ec681f3Smrg * 4517ec681f3Smrg * Setting this bit is independent of any other bit in this packet. 4527ec681f3Smrg * This bit controls the invalidation of the L1 and L2 state caches 4537ec681f3Smrg * at the top of the pipe i.e. at the parsing time. 4547ec681f3Smrg * 4557ec681f3Smrg * Unfortunately, experimentation seems to indicate that state cache 4567ec681f3Smrg * invalidation through a PIPE_CONTROL does nothing whatsoever in 4577ec681f3Smrg * regards to surface state and binding tables. In stead, it seems that 4587ec681f3Smrg * invalidating the texture cache is what is actually needed. 4597ec681f3Smrg * 4607ec681f3Smrg * XXX: As far as we have been able to determine through 4617ec681f3Smrg * experimentation, shows that flush the texture cache appears to be 4627ec681f3Smrg * sufficient. The theory here is that all of the sampling/rendering 4637ec681f3Smrg * units cache the binding table in the texture cache. However, we have 4647ec681f3Smrg * yet to be able to actually confirm this. 4657ec681f3Smrg */ 4667ec681f3Smrg iris_emit_end_of_pipe_sync(batch, 4677ec681f3Smrg "change STATE_BASE_ADDRESS (invalidates)", 4687ec681f3Smrg PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 4697ec681f3Smrg PIPE_CONTROL_CONST_CACHE_INVALIDATE | 4707ec681f3Smrg PIPE_CONTROL_STATE_CACHE_INVALIDATE); 4719f464c52Smaya} 4729f464c52Smaya 4739f464c52Smayastatic void 4749f464c52Smaya_iris_emit_lri(struct iris_batch *batch, uint32_t reg, uint32_t val) 4759f464c52Smaya{ 4769f464c52Smaya iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { 4779f464c52Smaya lri.RegisterOffset = reg; 4789f464c52Smaya lri.DataDWord = val; 4799f464c52Smaya } 4809f464c52Smaya} 4819f464c52Smaya#define iris_emit_lri(b, r, v) _iris_emit_lri(b, GENX(r##_num), v) 4829f464c52Smaya 4839f464c52Smayastatic void 4849f464c52Smaya_iris_emit_lrr(struct iris_batch *batch, uint32_t dst, uint32_t src) 4859f464c52Smaya{ 4869f464c52Smaya iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_REG), lrr) { 4879f464c52Smaya lrr.SourceRegisterAddress = src; 4889f464c52Smaya lrr.DestinationRegisterAddress = dst; 4899f464c52Smaya } 4909f464c52Smaya} 4919f464c52Smaya 4927ec681f3Smrgstatic void 4937ec681f3Smrgiris_load_register_reg32(struct iris_batch *batch, uint32_t dst, 4947ec681f3Smrg uint32_t src) 4957ec681f3Smrg{ 4967ec681f3Smrg _iris_emit_lrr(batch, dst, src); 4977ec681f3Smrg} 4987ec681f3Smrg 4997ec681f3Smrgstatic void 5007ec681f3Smrgiris_load_register_reg64(struct iris_batch *batch, uint32_t dst, 5017ec681f3Smrg uint32_t src) 5027ec681f3Smrg{ 5037ec681f3Smrg _iris_emit_lrr(batch, dst, src); 5047ec681f3Smrg _iris_emit_lrr(batch, dst + 4, src + 4); 5057ec681f3Smrg} 5067ec681f3Smrg 5077ec681f3Smrgstatic void 5087ec681f3Smrgiris_load_register_imm32(struct iris_batch *batch, uint32_t reg, 5097ec681f3Smrg uint32_t val) 5107ec681f3Smrg{ 5117ec681f3Smrg _iris_emit_lri(batch, reg, val); 5127ec681f3Smrg} 5137ec681f3Smrg 5147ec681f3Smrgstatic void 5157ec681f3Smrgiris_load_register_imm64(struct iris_batch *batch, uint32_t reg, 5167ec681f3Smrg uint64_t val) 5177ec681f3Smrg{ 5187ec681f3Smrg _iris_emit_lri(batch, reg + 0, val & 0xffffffff); 5197ec681f3Smrg _iris_emit_lri(batch, reg + 4, val >> 32); 5207ec681f3Smrg} 5217ec681f3Smrg 5227ec681f3Smrg/** 5237ec681f3Smrg * Emit MI_LOAD_REGISTER_MEM to load a 32-bit MMIO register from a buffer. 5247ec681f3Smrg */ 5257ec681f3Smrgstatic void 5267ec681f3Smrgiris_load_register_mem32(struct iris_batch *batch, uint32_t reg, 5277ec681f3Smrg struct iris_bo *bo, uint32_t offset) 5287ec681f3Smrg{ 5297ec681f3Smrg iris_batch_sync_region_start(batch); 5307ec681f3Smrg iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 5317ec681f3Smrg lrm.RegisterAddress = reg; 5327ec681f3Smrg lrm.MemoryAddress = ro_bo(bo, offset); 5337ec681f3Smrg } 5347ec681f3Smrg iris_batch_sync_region_end(batch); 5357ec681f3Smrg} 5367ec681f3Smrg 5377ec681f3Smrg/** 5387ec681f3Smrg * Load a 64-bit value from a buffer into a MMIO register via 5397ec681f3Smrg * two MI_LOAD_REGISTER_MEM commands. 5407ec681f3Smrg */ 5417ec681f3Smrgstatic void 5427ec681f3Smrgiris_load_register_mem64(struct iris_batch *batch, uint32_t reg, 5437ec681f3Smrg struct iris_bo *bo, uint32_t offset) 5447ec681f3Smrg{ 5457ec681f3Smrg iris_load_register_mem32(batch, reg + 0, bo, offset + 0); 5467ec681f3Smrg iris_load_register_mem32(batch, reg + 4, bo, offset + 4); 5477ec681f3Smrg} 5487ec681f3Smrg 5497ec681f3Smrgstatic void 5507ec681f3Smrgiris_store_register_mem32(struct iris_batch *batch, uint32_t reg, 5517ec681f3Smrg struct iris_bo *bo, uint32_t offset, 5527ec681f3Smrg bool predicated) 5537ec681f3Smrg{ 5547ec681f3Smrg iris_batch_sync_region_start(batch); 5557ec681f3Smrg iris_emit_cmd(batch, GENX(MI_STORE_REGISTER_MEM), srm) { 5567ec681f3Smrg srm.RegisterAddress = reg; 5577ec681f3Smrg srm.MemoryAddress = rw_bo(bo, offset, IRIS_DOMAIN_OTHER_WRITE); 5587ec681f3Smrg srm.PredicateEnable = predicated; 5597ec681f3Smrg } 5607ec681f3Smrg iris_batch_sync_region_end(batch); 5617ec681f3Smrg} 5627ec681f3Smrg 5637ec681f3Smrgstatic void 5647ec681f3Smrgiris_store_register_mem64(struct iris_batch *batch, uint32_t reg, 5657ec681f3Smrg struct iris_bo *bo, uint32_t offset, 5667ec681f3Smrg bool predicated) 5677ec681f3Smrg{ 5687ec681f3Smrg iris_store_register_mem32(batch, reg + 0, bo, offset + 0, predicated); 5697ec681f3Smrg iris_store_register_mem32(batch, reg + 4, bo, offset + 4, predicated); 5707ec681f3Smrg} 5717ec681f3Smrg 5727ec681f3Smrgstatic void 5737ec681f3Smrgiris_store_data_imm32(struct iris_batch *batch, 5747ec681f3Smrg struct iris_bo *bo, uint32_t offset, 5757ec681f3Smrg uint32_t imm) 5767ec681f3Smrg{ 5777ec681f3Smrg iris_batch_sync_region_start(batch); 5787ec681f3Smrg iris_emit_cmd(batch, GENX(MI_STORE_DATA_IMM), sdi) { 5797ec681f3Smrg sdi.Address = rw_bo(bo, offset, IRIS_DOMAIN_OTHER_WRITE); 5807ec681f3Smrg sdi.ImmediateData = imm; 5817ec681f3Smrg } 5827ec681f3Smrg iris_batch_sync_region_end(batch); 5837ec681f3Smrg} 5847ec681f3Smrg 5857ec681f3Smrgstatic void 5867ec681f3Smrgiris_store_data_imm64(struct iris_batch *batch, 5877ec681f3Smrg struct iris_bo *bo, uint32_t offset, 5887ec681f3Smrg uint64_t imm) 5897ec681f3Smrg{ 5907ec681f3Smrg /* Can't use iris_emit_cmd because MI_STORE_DATA_IMM has a length of 5917ec681f3Smrg * 2 in genxml but it's actually variable length and we need 5 DWords. 5927ec681f3Smrg */ 5937ec681f3Smrg void *map = iris_get_command_space(batch, 4 * 5); 5947ec681f3Smrg iris_batch_sync_region_start(batch); 5957ec681f3Smrg _iris_pack_command(batch, GENX(MI_STORE_DATA_IMM), map, sdi) { 5967ec681f3Smrg sdi.DWordLength = 5 - 2; 5977ec681f3Smrg sdi.Address = rw_bo(bo, offset, IRIS_DOMAIN_OTHER_WRITE); 5987ec681f3Smrg sdi.ImmediateData = imm; 5997ec681f3Smrg } 6007ec681f3Smrg iris_batch_sync_region_end(batch); 6017ec681f3Smrg} 6027ec681f3Smrg 6037ec681f3Smrgstatic void 6047ec681f3Smrgiris_copy_mem_mem(struct iris_batch *batch, 6057ec681f3Smrg struct iris_bo *dst_bo, uint32_t dst_offset, 6067ec681f3Smrg struct iris_bo *src_bo, uint32_t src_offset, 6077ec681f3Smrg unsigned bytes) 6087ec681f3Smrg{ 6097ec681f3Smrg /* MI_COPY_MEM_MEM operates on DWords. */ 6107ec681f3Smrg assert(bytes % 4 == 0); 6117ec681f3Smrg assert(dst_offset % 4 == 0); 6127ec681f3Smrg assert(src_offset % 4 == 0); 6137ec681f3Smrg iris_batch_sync_region_start(batch); 6147ec681f3Smrg 6157ec681f3Smrg for (unsigned i = 0; i < bytes; i += 4) { 6167ec681f3Smrg iris_emit_cmd(batch, GENX(MI_COPY_MEM_MEM), cp) { 6177ec681f3Smrg cp.DestinationMemoryAddress = rw_bo(dst_bo, dst_offset + i, 6187ec681f3Smrg IRIS_DOMAIN_OTHER_WRITE); 6197ec681f3Smrg cp.SourceMemoryAddress = ro_bo(src_bo, src_offset + i); 6207ec681f3Smrg } 6217ec681f3Smrg } 6227ec681f3Smrg 6237ec681f3Smrg iris_batch_sync_region_end(batch); 6247ec681f3Smrg} 6257ec681f3Smrg 6269f464c52Smayastatic void 6279f464c52Smayaemit_pipeline_select(struct iris_batch *batch, uint32_t pipeline) 6289f464c52Smaya{ 6297ec681f3Smrg#if GFX_VER >= 8 && GFX_VER < 10 6309f464c52Smaya /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT: 6319f464c52Smaya * 6329f464c52Smaya * Software must clear the COLOR_CALC_STATE Valid field in 6339f464c52Smaya * 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT 6349f464c52Smaya * with Pipeline Select set to GPGPU. 6359f464c52Smaya * 6367ec681f3Smrg * The internal hardware docs recommend the same workaround for Gfx9 6379f464c52Smaya * hardware too. 6389f464c52Smaya */ 6399f464c52Smaya if (pipeline == GPGPU) 6409f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), t); 6419f464c52Smaya#endif 6429f464c52Smaya 6439f464c52Smaya 6449f464c52Smaya /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction] 6459f464c52Smaya * PIPELINE_SELECT [DevBWR+]": 6469f464c52Smaya * 6479f464c52Smaya * "Project: DEVSNB+ 6489f464c52Smaya * 6499f464c52Smaya * Software must ensure all the write caches are flushed through a 6509f464c52Smaya * stalling PIPE_CONTROL command followed by another PIPE_CONTROL 6519f464c52Smaya * command to invalidate read only caches prior to programming 6529f464c52Smaya * MI_PIPELINE_SELECT command to change the Pipeline Select Mode." 6539f464c52Smaya */ 6549f464c52Smaya iris_emit_pipe_control_flush(batch, 6557ec681f3Smrg "workaround: PIPELINE_SELECT flushes (1/2)", 6569f464c52Smaya PIPE_CONTROL_RENDER_TARGET_FLUSH | 6579f464c52Smaya PIPE_CONTROL_DEPTH_CACHE_FLUSH | 6589f464c52Smaya PIPE_CONTROL_DATA_CACHE_FLUSH | 6599f464c52Smaya PIPE_CONTROL_CS_STALL); 6609f464c52Smaya 6619f464c52Smaya iris_emit_pipe_control_flush(batch, 6627ec681f3Smrg "workaround: PIPELINE_SELECT flushes (2/2)", 6639f464c52Smaya PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 6649f464c52Smaya PIPE_CONTROL_CONST_CACHE_INVALIDATE | 6659f464c52Smaya PIPE_CONTROL_STATE_CACHE_INVALIDATE | 6669f464c52Smaya PIPE_CONTROL_INSTRUCTION_INVALIDATE); 6679f464c52Smaya 6689f464c52Smaya iris_emit_cmd(batch, GENX(PIPELINE_SELECT), sel) { 6697ec681f3Smrg#if GFX_VER >= 9 6707ec681f3Smrg sel.MaskBits = GFX_VER >= 12 ? 0x13 : 3; 6717ec681f3Smrg sel.MediaSamplerDOPClockGateEnable = GFX_VER >= 12; 6729f464c52Smaya#endif 6739f464c52Smaya sel.PipelineSelection = pipeline; 6749f464c52Smaya } 6759f464c52Smaya} 6769f464c52Smaya 6779f464c52SmayaUNUSED static void 6789f464c52Smayainit_glk_barrier_mode(struct iris_batch *batch, uint32_t value) 6799f464c52Smaya{ 6807ec681f3Smrg#if GFX_VER == 9 6819f464c52Smaya /* Project: DevGLK 6829f464c52Smaya * 6839f464c52Smaya * "This chicken bit works around a hardware issue with barrier 6849f464c52Smaya * logic encountered when switching between GPGPU and 3D pipelines. 6859f464c52Smaya * To workaround the issue, this mode bit should be set after a 6869f464c52Smaya * pipeline is selected." 6879f464c52Smaya */ 6887ec681f3Smrg iris_emit_reg(batch, GENX(SLICE_COMMON_ECO_CHICKEN1), reg) { 6899f464c52Smaya reg.GLKBarrierMode = value; 6909f464c52Smaya reg.GLKBarrierModeMask = 1; 6919f464c52Smaya } 6929f464c52Smaya#endif 6939f464c52Smaya} 6949f464c52Smaya 6959f464c52Smayastatic void 6969f464c52Smayainit_state_base_address(struct iris_batch *batch) 6979f464c52Smaya{ 6987ec681f3Smrg struct isl_device *isl_dev = &batch->screen->isl_dev; 6997ec681f3Smrg uint32_t mocs = isl_mocs(isl_dev, 0, false); 7007ec681f3Smrg flush_before_state_base_change(batch); 7019f464c52Smaya 7029f464c52Smaya /* We program most base addresses once at context initialization time. 7039f464c52Smaya * Each base address points at a 4GB memory zone, and never needs to 7049f464c52Smaya * change. See iris_bufmgr.h for a description of the memory zones. 7059f464c52Smaya * 7069f464c52Smaya * The one exception is Surface State Base Address, which needs to be 7079f464c52Smaya * updated occasionally. See iris_binder.c for the details there. 7089f464c52Smaya */ 7099f464c52Smaya iris_emit_cmd(batch, GENX(STATE_BASE_ADDRESS), sba) { 7107ec681f3Smrg sba.GeneralStateMOCS = mocs; 7117ec681f3Smrg sba.StatelessDataPortAccessMOCS = mocs; 7127ec681f3Smrg sba.DynamicStateMOCS = mocs; 7137ec681f3Smrg sba.IndirectObjectMOCS = mocs; 7147ec681f3Smrg sba.InstructionMOCS = mocs; 7157ec681f3Smrg sba.SurfaceStateMOCS = mocs; 7169f464c52Smaya 7179f464c52Smaya sba.GeneralStateBaseAddressModifyEnable = true; 7189f464c52Smaya sba.DynamicStateBaseAddressModifyEnable = true; 7199f464c52Smaya sba.IndirectObjectBaseAddressModifyEnable = true; 7209f464c52Smaya sba.InstructionBaseAddressModifyEnable = true; 7219f464c52Smaya sba.GeneralStateBufferSizeModifyEnable = true; 7229f464c52Smaya sba.DynamicStateBufferSizeModifyEnable = true; 7237ec681f3Smrg#if (GFX_VER >= 9) 7247ec681f3Smrg sba.BindlessSurfaceStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_BINDLESS_START); 7257ec681f3Smrg sba.BindlessSurfaceStateSize = (IRIS_BINDLESS_SIZE >> 12) - 1; 7269f464c52Smaya sba.BindlessSurfaceStateBaseAddressModifyEnable = true; 7277ec681f3Smrg sba.BindlessSurfaceStateMOCS = mocs; 7289f464c52Smaya#endif 7299f464c52Smaya sba.IndirectObjectBufferSizeModifyEnable = true; 7309f464c52Smaya sba.InstructionBuffersizeModifyEnable = true; 7319f464c52Smaya 7329f464c52Smaya sba.InstructionBaseAddress = ro_bo(NULL, IRIS_MEMZONE_SHADER_START); 7339f464c52Smaya sba.DynamicStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_DYNAMIC_START); 7349f464c52Smaya 7359f464c52Smaya sba.GeneralStateBufferSize = 0xfffff; 7369f464c52Smaya sba.IndirectObjectBufferSize = 0xfffff; 7379f464c52Smaya sba.InstructionBufferSize = 0xfffff; 7389f464c52Smaya sba.DynamicStateBufferSize = 0xfffff; 7399f464c52Smaya } 7407ec681f3Smrg 7417ec681f3Smrg flush_after_state_base_change(batch); 7429f464c52Smaya} 7439f464c52Smaya 7449f464c52Smayastatic void 7457ec681f3Smrgiris_emit_l3_config(struct iris_batch *batch, 7467ec681f3Smrg const struct intel_l3_config *cfg) 7477ec681f3Smrg{ 7487ec681f3Smrg assert(cfg || GFX_VER >= 12); 7497ec681f3Smrg 7507ec681f3Smrg#if GFX_VER >= 12 7517ec681f3Smrg#define L3_ALLOCATION_REG GENX(L3ALLOC) 7527ec681f3Smrg#define L3_ALLOCATION_REG_num GENX(L3ALLOC_num) 7537ec681f3Smrg#else 7547ec681f3Smrg#define L3_ALLOCATION_REG GENX(L3CNTLREG) 7557ec681f3Smrg#define L3_ALLOCATION_REG_num GENX(L3CNTLREG_num) 7567ec681f3Smrg#endif 7577ec681f3Smrg 7587ec681f3Smrg iris_emit_reg(batch, L3_ALLOCATION_REG, reg) { 7597ec681f3Smrg#if GFX_VER < 11 7607ec681f3Smrg reg.SLMEnable = cfg->n[INTEL_L3P_SLM] > 0; 7617ec681f3Smrg#endif 7627ec681f3Smrg#if GFX_VER == 11 7637ec681f3Smrg /* Wa_1406697149: Bit 9 "Error Detection Behavior Control" must be set 7649f464c52Smaya * in L3CNTLREG register. The default setting of the bit is not the 7659f464c52Smaya * desirable behavior. 7669f464c52Smaya */ 7679f464c52Smaya reg.ErrorDetectionBehaviorControl = true; 7689f464c52Smaya reg.UseFullWays = true; 7699f464c52Smaya#endif 7707ec681f3Smrg if (GFX_VER < 12 || cfg) { 7717ec681f3Smrg reg.URBAllocation = cfg->n[INTEL_L3P_URB]; 7727ec681f3Smrg reg.ROAllocation = cfg->n[INTEL_L3P_RO]; 7737ec681f3Smrg reg.DCAllocation = cfg->n[INTEL_L3P_DC]; 7747ec681f3Smrg reg.AllAllocation = cfg->n[INTEL_L3P_ALL]; 7757ec681f3Smrg } else { 7767ec681f3Smrg#if GFX_VER >= 12 7777ec681f3Smrg reg.L3FullWayAllocationEnable = true; 7787ec681f3Smrg#endif 7797ec681f3Smrg } 7809f464c52Smaya } 7819f464c52Smaya} 7829f464c52Smaya 7837ec681f3Smrg#if GFX_VER == 9 7849f464c52Smayastatic void 7859f464c52Smayairis_enable_obj_preemption(struct iris_batch *batch, bool enable) 7869f464c52Smaya{ 7879f464c52Smaya /* A fixed function pipe flush is required before modifying this field */ 7887ec681f3Smrg iris_emit_end_of_pipe_sync(batch, enable ? "enable preemption" 7897ec681f3Smrg : "disable preemption", 7907ec681f3Smrg PIPE_CONTROL_RENDER_TARGET_FLUSH); 7919f464c52Smaya 7929f464c52Smaya /* enable object level preemption */ 7937ec681f3Smrg iris_emit_reg(batch, GENX(CS_CHICKEN1), reg) { 7949f464c52Smaya reg.ReplayMode = enable; 7959f464c52Smaya reg.ReplayModeMask = true; 7969f464c52Smaya } 7979f464c52Smaya} 7989f464c52Smaya#endif 7999f464c52Smaya 8007ec681f3Smrg/** 8017ec681f3Smrg * Compute an \p n x \p m pixel hashing table usable as slice, subslice or 8027ec681f3Smrg * pixel pipe hashing table. The resulting table is the cyclic repetition of 8037ec681f3Smrg * a fixed pattern with periodicity equal to \p period. 8047ec681f3Smrg * 8057ec681f3Smrg * If \p index is specified to be equal to \p period, a 2-way hashing table 8067ec681f3Smrg * will be generated such that indices 0 and 1 are returned for the following 8077ec681f3Smrg * fractions of entries respectively: 8087ec681f3Smrg * 8097ec681f3Smrg * p_0 = ceil(period / 2) / period 8107ec681f3Smrg * p_1 = floor(period / 2) / period 8117ec681f3Smrg * 8127ec681f3Smrg * If \p index is even and less than \p period, a 3-way hashing table will be 8137ec681f3Smrg * generated such that indices 0, 1 and 2 are returned for the following 8147ec681f3Smrg * fractions of entries: 8157ec681f3Smrg * 8167ec681f3Smrg * p_0 = (ceil(period / 2) - 1) / period 8177ec681f3Smrg * p_1 = floor(period / 2) / period 8187ec681f3Smrg * p_2 = 1 / period 8197ec681f3Smrg * 8207ec681f3Smrg * The equations above apply if \p flip is equal to 0, if it is equal to 1 p_0 8217ec681f3Smrg * and p_1 will be swapped for the result. Note that in the context of pixel 8227ec681f3Smrg * pipe hashing this can be always 0 on Gfx12 platforms, since the hardware 8237ec681f3Smrg * transparently remaps logical indices found on the table to physical pixel 8247ec681f3Smrg * pipe indices from the highest to lowest EU count. 8257ec681f3Smrg */ 8267ec681f3SmrgUNUSED static void 8277ec681f3Smrgcalculate_pixel_hashing_table(unsigned n, unsigned m, 8287ec681f3Smrg unsigned period, unsigned index, bool flip, 8297ec681f3Smrg uint32_t *p) 8307ec681f3Smrg{ 8317ec681f3Smrg for (unsigned i = 0; i < n; i++) { 8327ec681f3Smrg for (unsigned j = 0; j < m; j++) { 8337ec681f3Smrg const unsigned k = (i + j) % period; 8347ec681f3Smrg p[j + m * i] = (k == index ? 2 : (k & 1) ^ flip); 8357ec681f3Smrg } 8367ec681f3Smrg } 8377ec681f3Smrg} 8387ec681f3Smrg 8397ec681f3Smrg#if GFX_VER == 11 8407ec681f3Smrgstatic void 8417ec681f3Smrggfx11_upload_pixel_hashing_tables(struct iris_batch *batch) 8427ec681f3Smrg{ 8437ec681f3Smrg const struct intel_device_info *devinfo = &batch->screen->devinfo; 8447ec681f3Smrg assert(devinfo->ppipe_subslices[2] == 0); 8457ec681f3Smrg 8467ec681f3Smrg if (devinfo->ppipe_subslices[0] == devinfo->ppipe_subslices[1]) 8477ec681f3Smrg return; 8487ec681f3Smrg 8497ec681f3Smrg struct iris_context *ice = batch->ice; 8507ec681f3Smrg assert(&ice->batches[IRIS_BATCH_RENDER] == batch); 8517ec681f3Smrg 8527ec681f3Smrg unsigned size = GENX(SLICE_HASH_TABLE_length) * 4; 8537ec681f3Smrg uint32_t hash_address; 8547ec681f3Smrg struct pipe_resource *tmp = NULL; 8557ec681f3Smrg uint32_t *map = 8567ec681f3Smrg stream_state(batch, ice->state.dynamic_uploader, &tmp, 8577ec681f3Smrg size, 64, &hash_address); 8587ec681f3Smrg pipe_resource_reference(&tmp, NULL); 8597ec681f3Smrg 8607ec681f3Smrg const bool flip = devinfo->ppipe_subslices[0] < devinfo->ppipe_subslices[1]; 8617ec681f3Smrg struct GENX(SLICE_HASH_TABLE) table; 8627ec681f3Smrg calculate_pixel_hashing_table(16, 16, 3, 3, flip, table.Entry[0]); 8637ec681f3Smrg 8647ec681f3Smrg GENX(SLICE_HASH_TABLE_pack)(NULL, map, &table); 8657ec681f3Smrg 8667ec681f3Smrg iris_emit_cmd(batch, GENX(3DSTATE_SLICE_TABLE_STATE_POINTERS), ptr) { 8677ec681f3Smrg ptr.SliceHashStatePointerValid = true; 8687ec681f3Smrg ptr.SliceHashTableStatePointer = hash_address; 8697ec681f3Smrg } 8707ec681f3Smrg 8717ec681f3Smrg iris_emit_cmd(batch, GENX(3DSTATE_3D_MODE), mode) { 8727ec681f3Smrg mode.SliceHashingTableEnable = true; 8737ec681f3Smrg } 8747ec681f3Smrg} 8757ec681f3Smrg#elif GFX_VERx10 == 120 8767ec681f3Smrgstatic void 8777ec681f3Smrggfx12_upload_pixel_hashing_tables(struct iris_batch *batch) 8787ec681f3Smrg{ 8797ec681f3Smrg const struct intel_device_info *devinfo = &batch->screen->devinfo; 8807ec681f3Smrg /* For each n calculate ppipes_of[n], equal to the number of pixel pipes 8817ec681f3Smrg * present with n active dual subslices. 8827ec681f3Smrg */ 8837ec681f3Smrg unsigned ppipes_of[3] = {}; 8847ec681f3Smrg 8857ec681f3Smrg for (unsigned n = 0; n < ARRAY_SIZE(ppipes_of); n++) { 8867ec681f3Smrg for (unsigned p = 0; p < ARRAY_SIZE(devinfo->ppipe_subslices); p++) 8877ec681f3Smrg ppipes_of[n] += (devinfo->ppipe_subslices[p] == n); 8887ec681f3Smrg } 8897ec681f3Smrg 8907ec681f3Smrg /* Gfx12 has three pixel pipes. */ 8917ec681f3Smrg assert(ppipes_of[0] + ppipes_of[1] + ppipes_of[2] == 3); 8927ec681f3Smrg 8937ec681f3Smrg if (ppipes_of[2] == 3 || ppipes_of[0] == 2) { 8947ec681f3Smrg /* All three pixel pipes have the maximum number of active dual 8957ec681f3Smrg * subslices, or there is only one active pixel pipe: Nothing to do. 8967ec681f3Smrg */ 8977ec681f3Smrg return; 8987ec681f3Smrg } 8997ec681f3Smrg 9007ec681f3Smrg iris_emit_cmd(batch, GENX(3DSTATE_SUBSLICE_HASH_TABLE), p) { 9017ec681f3Smrg p.SliceHashControl[0] = TABLE_0; 9027ec681f3Smrg 9037ec681f3Smrg if (ppipes_of[2] == 2 && ppipes_of[0] == 1) 9047ec681f3Smrg calculate_pixel_hashing_table(8, 16, 2, 2, 0, p.TwoWayTableEntry[0]); 9057ec681f3Smrg else if (ppipes_of[2] == 1 && ppipes_of[1] == 1 && ppipes_of[0] == 1) 9067ec681f3Smrg calculate_pixel_hashing_table(8, 16, 3, 3, 0, p.TwoWayTableEntry[0]); 9077ec681f3Smrg 9087ec681f3Smrg if (ppipes_of[2] == 2 && ppipes_of[1] == 1) 9097ec681f3Smrg calculate_pixel_hashing_table(8, 16, 5, 4, 0, p.ThreeWayTableEntry[0]); 9107ec681f3Smrg else if (ppipes_of[2] == 2 && ppipes_of[0] == 1) 9117ec681f3Smrg calculate_pixel_hashing_table(8, 16, 2, 2, 0, p.ThreeWayTableEntry[0]); 9127ec681f3Smrg else if (ppipes_of[2] == 1 && ppipes_of[1] == 1 && ppipes_of[0] == 1) 9137ec681f3Smrg calculate_pixel_hashing_table(8, 16, 3, 3, 0, p.ThreeWayTableEntry[0]); 9147ec681f3Smrg else 9157ec681f3Smrg unreachable("Illegal fusing."); 9167ec681f3Smrg } 9177ec681f3Smrg 9187ec681f3Smrg iris_emit_cmd(batch, GENX(3DSTATE_3D_MODE), p) { 9197ec681f3Smrg p.SubsliceHashingTableEnable = true; 9207ec681f3Smrg p.SubsliceHashingTableEnableMask = true; 9217ec681f3Smrg } 9227ec681f3Smrg} 9237ec681f3Smrg#endif 9247ec681f3Smrg 9257ec681f3Smrgstatic void 9267ec681f3Smrgiris_alloc_push_constants(struct iris_batch *batch) 9277ec681f3Smrg{ 9287ec681f3Smrg const struct intel_device_info *devinfo = &batch->screen->devinfo; 9297ec681f3Smrg 9307ec681f3Smrg /* For now, we set a static partitioning of the push constant area, 9317ec681f3Smrg * assuming that all stages could be in use. 9327ec681f3Smrg * 9337ec681f3Smrg * TODO: Try lazily allocating the HS/DS/GS sections as needed, and 9347ec681f3Smrg * see if that improves performance by offering more space to 9357ec681f3Smrg * the VS/FS when those aren't in use. Also, try dynamically 9367ec681f3Smrg * enabling/disabling it like i965 does. This would be more 9377ec681f3Smrg * stalls and may not actually help; we don't know yet. 9387ec681f3Smrg */ 9397ec681f3Smrg 9407ec681f3Smrg /* Divide as equally as possible with any remainder given to FRAGMENT. */ 9417ec681f3Smrg const unsigned push_constant_kb = devinfo->max_constant_urb_size_kb; 9427ec681f3Smrg const unsigned stage_size = push_constant_kb / 5; 9437ec681f3Smrg const unsigned frag_size = push_constant_kb - 4 * stage_size; 9447ec681f3Smrg 9457ec681f3Smrg for (int i = 0; i <= MESA_SHADER_FRAGMENT; i++) { 9467ec681f3Smrg iris_emit_cmd(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), alloc) { 9477ec681f3Smrg alloc._3DCommandSubOpcode = 18 + i; 9487ec681f3Smrg alloc.ConstantBufferOffset = stage_size * i; 9497ec681f3Smrg alloc.ConstantBufferSize = i == MESA_SHADER_FRAGMENT ? frag_size : stage_size; 9507ec681f3Smrg } 9517ec681f3Smrg } 9527ec681f3Smrg} 9537ec681f3Smrg 9547ec681f3Smrg#if GFX_VER >= 12 9557ec681f3Smrgstatic void 9567ec681f3Smrginit_aux_map_state(struct iris_batch *batch); 9577ec681f3Smrg#endif 9587ec681f3Smrg 9597ec681f3Smrg/** 9607ec681f3Smrg * Upload initial GPU state for any kind of context. 9617ec681f3Smrg * 9627ec681f3Smrg * These need to happen for both render and compute. 9637ec681f3Smrg */ 9647ec681f3Smrgstatic void 9657ec681f3Smrgiris_init_common_context(struct iris_batch *batch) 9667ec681f3Smrg{ 9677ec681f3Smrg#if GFX_VER == 11 9687ec681f3Smrg iris_emit_reg(batch, GENX(SAMPLER_MODE), reg) { 9697ec681f3Smrg reg.HeaderlessMessageforPreemptableContexts = 1; 9707ec681f3Smrg reg.HeaderlessMessageforPreemptableContextsMask = 1; 9717ec681f3Smrg } 9727ec681f3Smrg 9737ec681f3Smrg /* Bit 1 must be set in HALF_SLICE_CHICKEN7. */ 9747ec681f3Smrg iris_emit_reg(batch, GENX(HALF_SLICE_CHICKEN7), reg) { 9757ec681f3Smrg reg.EnabledTexelOffsetPrecisionFix = 1; 9767ec681f3Smrg reg.EnabledTexelOffsetPrecisionFixMask = 1; 9777ec681f3Smrg } 9787ec681f3Smrg#endif 9797ec681f3Smrg} 9807ec681f3Smrg 9819f464c52Smaya/** 9829f464c52Smaya * Upload the initial GPU state for a render context. 9839f464c52Smaya * 9849f464c52Smaya * This sets some invariant state that needs to be programmed a particular 9859f464c52Smaya * way, but we never actually change. 9869f464c52Smaya */ 9879f464c52Smayastatic void 9887ec681f3Smrgiris_init_render_context(struct iris_batch *batch) 9899f464c52Smaya{ 9907ec681f3Smrg UNUSED const struct intel_device_info *devinfo = &batch->screen->devinfo; 9917ec681f3Smrg 9927ec681f3Smrg iris_batch_sync_region_start(batch); 9939f464c52Smaya 9949f464c52Smaya emit_pipeline_select(batch, _3D); 9959f464c52Smaya 9967ec681f3Smrg iris_emit_l3_config(batch, batch->screen->l3_config_3d); 9979f464c52Smaya 9989f464c52Smaya init_state_base_address(batch); 9999f464c52Smaya 10007ec681f3Smrg iris_init_common_context(batch); 10017ec681f3Smrg 10027ec681f3Smrg#if GFX_VER >= 9 10037ec681f3Smrg iris_emit_reg(batch, GENX(CS_DEBUG_MODE2), reg) { 10049f464c52Smaya reg.CONSTANT_BUFFERAddressOffsetDisable = true; 10059f464c52Smaya reg.CONSTANT_BUFFERAddressOffsetDisableMask = true; 10069f464c52Smaya } 10079f464c52Smaya#else 10087ec681f3Smrg iris_emit_reg(batch, GENX(INSTPM), reg) { 10099f464c52Smaya reg.CONSTANT_BUFFERAddressOffsetDisable = true; 10109f464c52Smaya reg.CONSTANT_BUFFERAddressOffsetDisableMask = true; 10119f464c52Smaya } 10129f464c52Smaya#endif 10139f464c52Smaya 10147ec681f3Smrg#if GFX_VER == 9 10157ec681f3Smrg iris_emit_reg(batch, GENX(CACHE_MODE_1), reg) { 10169f464c52Smaya reg.FloatBlendOptimizationEnable = true; 10179f464c52Smaya reg.FloatBlendOptimizationEnableMask = true; 10187ec681f3Smrg reg.MSCRAWHazardAvoidanceBit = true; 10197ec681f3Smrg reg.MSCRAWHazardAvoidanceBitMask = true; 10209f464c52Smaya reg.PartialResolveDisableInVC = true; 10219f464c52Smaya reg.PartialResolveDisableInVCMask = true; 10229f464c52Smaya } 10239f464c52Smaya 10249f464c52Smaya if (devinfo->is_geminilake) 10259f464c52Smaya init_glk_barrier_mode(batch, GLK_BARRIER_MODE_3D_HULL); 10269f464c52Smaya#endif 10279f464c52Smaya 10287ec681f3Smrg#if GFX_VER == 11 10297ec681f3Smrg iris_emit_reg(batch, GENX(TCCNTLREG), reg) { 10307ec681f3Smrg reg.L3DataPartialWriteMergingEnable = true; 10317ec681f3Smrg reg.ColorZPartialWriteMergingEnable = true; 10327ec681f3Smrg reg.URBPartialWriteMergingEnable = true; 10337ec681f3Smrg reg.TCDisable = true; 10347ec681f3Smrg } 10359f464c52Smaya 10367ec681f3Smrg /* Hardware specification recommends disabling repacking for the 10377ec681f3Smrg * compatibility with decompression mechanism in display controller. 10387ec681f3Smrg */ 10397ec681f3Smrg if (devinfo->disable_ccs_repack) { 10407ec681f3Smrg iris_emit_reg(batch, GENX(CACHE_MODE_0), reg) { 10417ec681f3Smrg reg.DisableRepackingforCompression = true; 10427ec681f3Smrg reg.DisableRepackingforCompressionMask = true; 10439f464c52Smaya } 10447ec681f3Smrg } 10459f464c52Smaya 10467ec681f3Smrg gfx11_upload_pixel_hashing_tables(batch); 10477ec681f3Smrg#endif 10489f464c52Smaya 10497ec681f3Smrg#if GFX_VERx10 == 120 10507ec681f3Smrg gfx12_upload_pixel_hashing_tables(batch); 10519f464c52Smaya#endif 10529f464c52Smaya 10539f464c52Smaya /* 3DSTATE_DRAWING_RECTANGLE is non-pipelined, so we want to avoid 10549f464c52Smaya * changing it dynamically. We set it to the maximum size here, and 10559f464c52Smaya * instead include the render target dimensions in the viewport, so 10569f464c52Smaya * viewport extents clipping takes care of pruning stray geometry. 10579f464c52Smaya */ 10589f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) { 10599f464c52Smaya rect.ClippedDrawingRectangleXMax = UINT16_MAX; 10609f464c52Smaya rect.ClippedDrawingRectangleYMax = UINT16_MAX; 10619f464c52Smaya } 10629f464c52Smaya 10639f464c52Smaya /* Set the initial MSAA sample positions. */ 10649f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_PATTERN), pat) { 10657ec681f3Smrg INTEL_SAMPLE_POS_1X(pat._1xSample); 10667ec681f3Smrg INTEL_SAMPLE_POS_2X(pat._2xSample); 10677ec681f3Smrg INTEL_SAMPLE_POS_4X(pat._4xSample); 10687ec681f3Smrg INTEL_SAMPLE_POS_8X(pat._8xSample); 10697ec681f3Smrg#if GFX_VER >= 9 10707ec681f3Smrg INTEL_SAMPLE_POS_16X(pat._16xSample); 10719f464c52Smaya#endif 10729f464c52Smaya } 10739f464c52Smaya 10749f464c52Smaya /* Use the legacy AA line coverage computation. */ 10759f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_AA_LINE_PARAMETERS), foo); 10769f464c52Smaya 10779f464c52Smaya /* Disable chromakeying (it's for media) */ 10789f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_WM_CHROMAKEY), foo); 10799f464c52Smaya 10809f464c52Smaya /* We want regular rendering, not special HiZ operations. */ 10819f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_WM_HZ_OP), foo); 10829f464c52Smaya 10839f464c52Smaya /* No polygon stippling offsets are necessary. */ 10849f464c52Smaya /* TODO: may need to set an offset for origin-UL framebuffers */ 10859f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_OFFSET), foo); 10869f464c52Smaya 10877ec681f3Smrg iris_alloc_push_constants(batch); 10889f464c52Smaya 10897ec681f3Smrg 10907ec681f3Smrg#if GFX_VER >= 12 10917ec681f3Smrg init_aux_map_state(batch); 10929f464c52Smaya#endif 10937ec681f3Smrg 10947ec681f3Smrg iris_batch_sync_region_end(batch); 10959f464c52Smaya} 10969f464c52Smaya 10979f464c52Smayastatic void 10987ec681f3Smrgiris_init_compute_context(struct iris_batch *batch) 10999f464c52Smaya{ 11007ec681f3Smrg UNUSED const struct intel_device_info *devinfo = &batch->screen->devinfo; 11019f464c52Smaya 11027ec681f3Smrg iris_batch_sync_region_start(batch); 11037ec681f3Smrg 11047ec681f3Smrg /* Wa_1607854226: 11057ec681f3Smrg * 11067ec681f3Smrg * Start with pipeline in 3D mode to set the STATE_BASE_ADDRESS. 11077ec681f3Smrg */ 11087ec681f3Smrg#if GFX_VER == 12 11097ec681f3Smrg emit_pipeline_select(batch, _3D); 11107ec681f3Smrg#else 11119f464c52Smaya emit_pipeline_select(batch, GPGPU); 11127ec681f3Smrg#endif 11139f464c52Smaya 11147ec681f3Smrg iris_emit_l3_config(batch, batch->screen->l3_config_cs); 11159f464c52Smaya 11169f464c52Smaya init_state_base_address(batch); 11179f464c52Smaya 11187ec681f3Smrg iris_init_common_context(batch); 11197ec681f3Smrg 11207ec681f3Smrg#if GFX_VER == 12 11217ec681f3Smrg emit_pipeline_select(batch, GPGPU); 11227ec681f3Smrg#endif 11237ec681f3Smrg 11247ec681f3Smrg#if GFX_VER == 9 11259f464c52Smaya if (devinfo->is_geminilake) 11269f464c52Smaya init_glk_barrier_mode(batch, GLK_BARRIER_MODE_GPGPU); 11279f464c52Smaya#endif 11287ec681f3Smrg 11297ec681f3Smrg#if GFX_VER >= 12 11307ec681f3Smrg init_aux_map_state(batch); 11317ec681f3Smrg#endif 11327ec681f3Smrg 11337ec681f3Smrg iris_batch_sync_region_end(batch); 11349f464c52Smaya} 11359f464c52Smaya 11369f464c52Smayastruct iris_vertex_buffer_state { 11379f464c52Smaya /** The VERTEX_BUFFER_STATE hardware structure. */ 11389f464c52Smaya uint32_t state[GENX(VERTEX_BUFFER_STATE_length)]; 11399f464c52Smaya 11409f464c52Smaya /** The resource to source vertex data from. */ 11419f464c52Smaya struct pipe_resource *resource; 11427ec681f3Smrg 11437ec681f3Smrg int offset; 11449f464c52Smaya}; 11459f464c52Smaya 11469f464c52Smayastruct iris_depth_buffer_state { 11479f464c52Smaya /* Depth/HiZ/Stencil related hardware packets. */ 11489f464c52Smaya uint32_t packets[GENX(3DSTATE_DEPTH_BUFFER_length) + 11499f464c52Smaya GENX(3DSTATE_STENCIL_BUFFER_length) + 11509f464c52Smaya GENX(3DSTATE_HIER_DEPTH_BUFFER_length) + 11519f464c52Smaya GENX(3DSTATE_CLEAR_PARAMS_length)]; 11529f464c52Smaya}; 11539f464c52Smaya 11547ec681f3Smrg#if GFX_VERx10 == 120 11557ec681f3Smrg enum iris_depth_reg_mode { 11567ec681f3Smrg IRIS_DEPTH_REG_MODE_HW_DEFAULT = 0, 11577ec681f3Smrg IRIS_DEPTH_REG_MODE_D16, 11587ec681f3Smrg IRIS_DEPTH_REG_MODE_UNKNOWN, 11597ec681f3Smrg }; 11607ec681f3Smrg#endif 11617ec681f3Smrg 11629f464c52Smaya/** 11639f464c52Smaya * Generation-specific context state (ice->state.genx->...). 11649f464c52Smaya * 11659f464c52Smaya * Most state can go in iris_context directly, but these encode hardware 11669f464c52Smaya * packets which vary by generation. 11679f464c52Smaya */ 11689f464c52Smayastruct iris_genx_state { 11699f464c52Smaya struct iris_vertex_buffer_state vertex_buffers[33]; 11707ec681f3Smrg uint32_t last_index_buffer[GENX(3DSTATE_INDEX_BUFFER_length)]; 11719f464c52Smaya 11729f464c52Smaya struct iris_depth_buffer_state depth_buffer; 11739f464c52Smaya 11749f464c52Smaya uint32_t so_buffers[4 * GENX(3DSTATE_SO_BUFFER_length)]; 11759f464c52Smaya 11767ec681f3Smrg#if GFX_VER == 8 11777ec681f3Smrg bool pma_fix_enabled; 11787ec681f3Smrg#endif 11797ec681f3Smrg 11807ec681f3Smrg#if GFX_VER == 9 11819f464c52Smaya /* Is object level preemption enabled? */ 11829f464c52Smaya bool object_preemption; 11839f464c52Smaya#endif 11849f464c52Smaya 11857ec681f3Smrg#if GFX_VERx10 == 120 11867ec681f3Smrg enum iris_depth_reg_mode depth_reg_mode; 11877ec681f3Smrg#endif 11887ec681f3Smrg 11899f464c52Smaya struct { 11907ec681f3Smrg#if GFX_VER == 8 11919f464c52Smaya struct brw_image_param image_param[PIPE_MAX_SHADER_IMAGES]; 11929f464c52Smaya#endif 11939f464c52Smaya } shaders[MESA_SHADER_STAGES]; 11949f464c52Smaya}; 11959f464c52Smaya 11969f464c52Smaya/** 11979f464c52Smaya * The pipe->set_blend_color() driver hook. 11989f464c52Smaya * 11999f464c52Smaya * This corresponds to our COLOR_CALC_STATE. 12009f464c52Smaya */ 12019f464c52Smayastatic void 12029f464c52Smayairis_set_blend_color(struct pipe_context *ctx, 12039f464c52Smaya const struct pipe_blend_color *state) 12049f464c52Smaya{ 12059f464c52Smaya struct iris_context *ice = (struct iris_context *) ctx; 12069f464c52Smaya 12079f464c52Smaya /* Our COLOR_CALC_STATE is exactly pipe_blend_color, so just memcpy */ 12089f464c52Smaya memcpy(&ice->state.blend_color, state, sizeof(struct pipe_blend_color)); 12099f464c52Smaya ice->state.dirty |= IRIS_DIRTY_COLOR_CALC_STATE; 12109f464c52Smaya} 12119f464c52Smaya 12129f464c52Smaya/** 12139f464c52Smaya * Gallium CSO for blend state (see pipe_blend_state). 12149f464c52Smaya */ 12159f464c52Smayastruct iris_blend_state { 12169f464c52Smaya /** Partial 3DSTATE_PS_BLEND */ 12179f464c52Smaya uint32_t ps_blend[GENX(3DSTATE_PS_BLEND_length)]; 12189f464c52Smaya 12199f464c52Smaya /** Partial BLEND_STATE */ 12209f464c52Smaya uint32_t blend_state[GENX(BLEND_STATE_length) + 12219f464c52Smaya BRW_MAX_DRAW_BUFFERS * GENX(BLEND_STATE_ENTRY_length)]; 12229f464c52Smaya 12239f464c52Smaya bool alpha_to_coverage; /* for shader key */ 12249f464c52Smaya 12259f464c52Smaya /** Bitfield of whether blending is enabled for RT[i] - for aux resolves */ 12269f464c52Smaya uint8_t blend_enables; 12279f464c52Smaya 12289f464c52Smaya /** Bitfield of whether color writes are enabled for RT[i] */ 12299f464c52Smaya uint8_t color_write_enables; 12309f464c52Smaya 12319f464c52Smaya /** Does RT[0] use dual color blending? */ 12329f464c52Smaya bool dual_color_blending; 12339f464c52Smaya}; 12349f464c52Smaya 12359f464c52Smayastatic enum pipe_blendfactor 12369f464c52Smayafix_blendfactor(enum pipe_blendfactor f, bool alpha_to_one) 12379f464c52Smaya{ 12389f464c52Smaya if (alpha_to_one) { 12399f464c52Smaya if (f == PIPE_BLENDFACTOR_SRC1_ALPHA) 12409f464c52Smaya return PIPE_BLENDFACTOR_ONE; 12419f464c52Smaya 12429f464c52Smaya if (f == PIPE_BLENDFACTOR_INV_SRC1_ALPHA) 12439f464c52Smaya return PIPE_BLENDFACTOR_ZERO; 12449f464c52Smaya } 12459f464c52Smaya 12469f464c52Smaya return f; 12479f464c52Smaya} 12489f464c52Smaya 12499f464c52Smaya/** 12509f464c52Smaya * The pipe->create_blend_state() driver hook. 12519f464c52Smaya * 12529f464c52Smaya * Translates a pipe_blend_state into iris_blend_state. 12539f464c52Smaya */ 12549f464c52Smayastatic void * 12559f464c52Smayairis_create_blend_state(struct pipe_context *ctx, 12569f464c52Smaya const struct pipe_blend_state *state) 12579f464c52Smaya{ 12589f464c52Smaya struct iris_blend_state *cso = malloc(sizeof(struct iris_blend_state)); 12599f464c52Smaya uint32_t *blend_entry = cso->blend_state + GENX(BLEND_STATE_length); 12609f464c52Smaya 12619f464c52Smaya cso->blend_enables = 0; 12629f464c52Smaya cso->color_write_enables = 0; 12639f464c52Smaya STATIC_ASSERT(BRW_MAX_DRAW_BUFFERS <= 8); 12649f464c52Smaya 12659f464c52Smaya cso->alpha_to_coverage = state->alpha_to_coverage; 12669f464c52Smaya 12679f464c52Smaya bool indep_alpha_blend = false; 12689f464c52Smaya 12699f464c52Smaya for (int i = 0; i < BRW_MAX_DRAW_BUFFERS; i++) { 12709f464c52Smaya const struct pipe_rt_blend_state *rt = 12719f464c52Smaya &state->rt[state->independent_blend_enable ? i : 0]; 12729f464c52Smaya 12739f464c52Smaya enum pipe_blendfactor src_rgb = 12749f464c52Smaya fix_blendfactor(rt->rgb_src_factor, state->alpha_to_one); 12759f464c52Smaya enum pipe_blendfactor src_alpha = 12769f464c52Smaya fix_blendfactor(rt->alpha_src_factor, state->alpha_to_one); 12779f464c52Smaya enum pipe_blendfactor dst_rgb = 12789f464c52Smaya fix_blendfactor(rt->rgb_dst_factor, state->alpha_to_one); 12799f464c52Smaya enum pipe_blendfactor dst_alpha = 12809f464c52Smaya fix_blendfactor(rt->alpha_dst_factor, state->alpha_to_one); 12819f464c52Smaya 12829f464c52Smaya if (rt->rgb_func != rt->alpha_func || 12839f464c52Smaya src_rgb != src_alpha || dst_rgb != dst_alpha) 12849f464c52Smaya indep_alpha_blend = true; 12859f464c52Smaya 12869f464c52Smaya if (rt->blend_enable) 12879f464c52Smaya cso->blend_enables |= 1u << i; 12889f464c52Smaya 12899f464c52Smaya if (rt->colormask) 12909f464c52Smaya cso->color_write_enables |= 1u << i; 12919f464c52Smaya 12929f464c52Smaya iris_pack_state(GENX(BLEND_STATE_ENTRY), blend_entry, be) { 12939f464c52Smaya be.LogicOpEnable = state->logicop_enable; 12949f464c52Smaya be.LogicOpFunction = state->logicop_func; 12959f464c52Smaya 12969f464c52Smaya be.PreBlendSourceOnlyClampEnable = false; 12979f464c52Smaya be.ColorClampRange = COLORCLAMP_RTFORMAT; 12989f464c52Smaya be.PreBlendColorClampEnable = true; 12999f464c52Smaya be.PostBlendColorClampEnable = true; 13009f464c52Smaya 13019f464c52Smaya be.ColorBufferBlendEnable = rt->blend_enable; 13029f464c52Smaya 13039f464c52Smaya be.ColorBlendFunction = rt->rgb_func; 13049f464c52Smaya be.AlphaBlendFunction = rt->alpha_func; 13057ec681f3Smrg 13067ec681f3Smrg /* The casts prevent warnings about implicit enum type conversions. */ 13077ec681f3Smrg be.SourceBlendFactor = (int) src_rgb; 13087ec681f3Smrg be.SourceAlphaBlendFactor = (int) src_alpha; 13097ec681f3Smrg be.DestinationBlendFactor = (int) dst_rgb; 13107ec681f3Smrg be.DestinationAlphaBlendFactor = (int) dst_alpha; 13119f464c52Smaya 13129f464c52Smaya be.WriteDisableRed = !(rt->colormask & PIPE_MASK_R); 13139f464c52Smaya be.WriteDisableGreen = !(rt->colormask & PIPE_MASK_G); 13149f464c52Smaya be.WriteDisableBlue = !(rt->colormask & PIPE_MASK_B); 13159f464c52Smaya be.WriteDisableAlpha = !(rt->colormask & PIPE_MASK_A); 13169f464c52Smaya } 13179f464c52Smaya blend_entry += GENX(BLEND_STATE_ENTRY_length); 13189f464c52Smaya } 13199f464c52Smaya 13209f464c52Smaya iris_pack_command(GENX(3DSTATE_PS_BLEND), cso->ps_blend, pb) { 13219f464c52Smaya /* pb.HasWriteableRT is filled in at draw time. 13229f464c52Smaya * pb.AlphaTestEnable is filled in at draw time. 13239f464c52Smaya * 13249f464c52Smaya * pb.ColorBufferBlendEnable is filled in at draw time so we can avoid 13259f464c52Smaya * setting it when dual color blending without an appropriate shader. 13269f464c52Smaya */ 13279f464c52Smaya 13289f464c52Smaya pb.AlphaToCoverageEnable = state->alpha_to_coverage; 13299f464c52Smaya pb.IndependentAlphaBlendEnable = indep_alpha_blend; 13309f464c52Smaya 13317ec681f3Smrg /* The casts prevent warnings about implicit enum type conversions. */ 13329f464c52Smaya pb.SourceBlendFactor = 13337ec681f3Smrg (int) fix_blendfactor(state->rt[0].rgb_src_factor, state->alpha_to_one); 13349f464c52Smaya pb.SourceAlphaBlendFactor = 13357ec681f3Smrg (int) fix_blendfactor(state->rt[0].alpha_src_factor, state->alpha_to_one); 13369f464c52Smaya pb.DestinationBlendFactor = 13377ec681f3Smrg (int) fix_blendfactor(state->rt[0].rgb_dst_factor, state->alpha_to_one); 13389f464c52Smaya pb.DestinationAlphaBlendFactor = 13397ec681f3Smrg (int) fix_blendfactor(state->rt[0].alpha_dst_factor, state->alpha_to_one); 13409f464c52Smaya } 13419f464c52Smaya 13429f464c52Smaya iris_pack_state(GENX(BLEND_STATE), cso->blend_state, bs) { 13439f464c52Smaya bs.AlphaToCoverageEnable = state->alpha_to_coverage; 13449f464c52Smaya bs.IndependentAlphaBlendEnable = indep_alpha_blend; 13459f464c52Smaya bs.AlphaToOneEnable = state->alpha_to_one; 13469f464c52Smaya bs.AlphaToCoverageDitherEnable = state->alpha_to_coverage; 13479f464c52Smaya bs.ColorDitherEnable = state->dither; 13489f464c52Smaya /* bl.AlphaTestEnable and bs.AlphaTestFunction are filled in later. */ 13499f464c52Smaya } 13509f464c52Smaya 13519f464c52Smaya cso->dual_color_blending = util_blend_state_is_dual(state, 0); 13529f464c52Smaya 13539f464c52Smaya return cso; 13549f464c52Smaya} 13559f464c52Smaya 13569f464c52Smaya/** 13579f464c52Smaya * The pipe->bind_blend_state() driver hook. 13589f464c52Smaya * 13599f464c52Smaya * Bind a blending CSO and flag related dirty bits. 13609f464c52Smaya */ 13619f464c52Smayastatic void 13629f464c52Smayairis_bind_blend_state(struct pipe_context *ctx, void *state) 13639f464c52Smaya{ 13649f464c52Smaya struct iris_context *ice = (struct iris_context *) ctx; 13659f464c52Smaya struct iris_blend_state *cso = state; 13669f464c52Smaya 13679f464c52Smaya ice->state.cso_blend = cso; 13689f464c52Smaya 13699f464c52Smaya ice->state.dirty |= IRIS_DIRTY_PS_BLEND; 13709f464c52Smaya ice->state.dirty |= IRIS_DIRTY_BLEND_STATE; 13717ec681f3Smrg ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[IRIS_NOS_BLEND]; 13727ec681f3Smrg 13737ec681f3Smrg if (GFX_VER == 8) 13747ec681f3Smrg ice->state.dirty |= IRIS_DIRTY_PMA_FIX; 13759f464c52Smaya} 13769f464c52Smaya 13779f464c52Smaya/** 13789f464c52Smaya * Return true if the FS writes to any color outputs which are not disabled 13799f464c52Smaya * via color masking. 13809f464c52Smaya */ 13819f464c52Smayastatic bool 13829f464c52Smayahas_writeable_rt(const struct iris_blend_state *cso_blend, 13839f464c52Smaya const struct shader_info *fs_info) 13849f464c52Smaya{ 13859f464c52Smaya if (!fs_info) 13869f464c52Smaya return false; 13879f464c52Smaya 13889f464c52Smaya unsigned rt_outputs = fs_info->outputs_written >> FRAG_RESULT_DATA0; 13899f464c52Smaya 13909f464c52Smaya if (fs_info->outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR)) 13919f464c52Smaya rt_outputs = (1 << BRW_MAX_DRAW_BUFFERS) - 1; 13929f464c52Smaya 13939f464c52Smaya return cso_blend->color_write_enables & rt_outputs; 13949f464c52Smaya} 13959f464c52Smaya 13969f464c52Smaya/** 13979f464c52Smaya * Gallium CSO for depth, stencil, and alpha testing state. 13989f464c52Smaya */ 13999f464c52Smayastruct iris_depth_stencil_alpha_state { 14009f464c52Smaya /** Partial 3DSTATE_WM_DEPTH_STENCIL. */ 14019f464c52Smaya uint32_t wmds[GENX(3DSTATE_WM_DEPTH_STENCIL_length)]; 14029f464c52Smaya 14037ec681f3Smrg#if GFX_VER >= 12 14047ec681f3Smrg uint32_t depth_bounds[GENX(3DSTATE_DEPTH_BOUNDS_length)]; 14057ec681f3Smrg#endif 14067ec681f3Smrg 14079f464c52Smaya /** Outbound to BLEND_STATE, 3DSTATE_PS_BLEND, COLOR_CALC_STATE. */ 14087ec681f3Smrg unsigned alpha_enabled:1; 14097ec681f3Smrg unsigned alpha_func:3; /**< PIPE_FUNC_x */ 14107ec681f3Smrg float alpha_ref_value; /**< reference value */ 14119f464c52Smaya 14129f464c52Smaya /** Outbound to resolve and cache set tracking. */ 14139f464c52Smaya bool depth_writes_enabled; 14149f464c52Smaya bool stencil_writes_enabled; 14157ec681f3Smrg 14167ec681f3Smrg /** Outbound to Gfx8-9 PMA stall equations */ 14177ec681f3Smrg bool depth_test_enabled; 14189f464c52Smaya}; 14199f464c52Smaya 14209f464c52Smaya/** 14219f464c52Smaya * The pipe->create_depth_stencil_alpha_state() driver hook. 14229f464c52Smaya * 14239f464c52Smaya * We encode most of 3DSTATE_WM_DEPTH_STENCIL, and just save off the alpha 14249f464c52Smaya * testing state since we need pieces of it in a variety of places. 14259f464c52Smaya */ 14269f464c52Smayastatic void * 14279f464c52Smayairis_create_zsa_state(struct pipe_context *ctx, 14289f464c52Smaya const struct pipe_depth_stencil_alpha_state *state) 14299f464c52Smaya{ 14309f464c52Smaya struct iris_depth_stencil_alpha_state *cso = 14319f464c52Smaya malloc(sizeof(struct iris_depth_stencil_alpha_state)); 14329f464c52Smaya 14339f464c52Smaya bool two_sided_stencil = state->stencil[1].enabled; 14349f464c52Smaya 14357ec681f3Smrg cso->alpha_enabled = state->alpha_enabled; 14367ec681f3Smrg cso->alpha_func = state->alpha_func; 14377ec681f3Smrg cso->alpha_ref_value = state->alpha_ref_value; 14387ec681f3Smrg cso->depth_writes_enabled = state->depth_writemask; 14397ec681f3Smrg cso->depth_test_enabled = state->depth_enabled; 14409f464c52Smaya cso->stencil_writes_enabled = 14419f464c52Smaya state->stencil[0].writemask != 0 || 14429f464c52Smaya (two_sided_stencil && state->stencil[1].writemask != 0); 14439f464c52Smaya 14447ec681f3Smrg /* gallium frontends need to optimize away EQUAL writes for us. */ 14457ec681f3Smrg assert(!(state->depth_func == PIPE_FUNC_EQUAL && state->depth_writemask)); 14469f464c52Smaya 14479f464c52Smaya iris_pack_command(GENX(3DSTATE_WM_DEPTH_STENCIL), cso->wmds, wmds) { 14489f464c52Smaya wmds.StencilFailOp = state->stencil[0].fail_op; 14499f464c52Smaya wmds.StencilPassDepthFailOp = state->stencil[0].zfail_op; 14509f464c52Smaya wmds.StencilPassDepthPassOp = state->stencil[0].zpass_op; 14519f464c52Smaya wmds.StencilTestFunction = 14529f464c52Smaya translate_compare_func(state->stencil[0].func); 14539f464c52Smaya wmds.BackfaceStencilFailOp = state->stencil[1].fail_op; 14549f464c52Smaya wmds.BackfaceStencilPassDepthFailOp = state->stencil[1].zfail_op; 14559f464c52Smaya wmds.BackfaceStencilPassDepthPassOp = state->stencil[1].zpass_op; 14569f464c52Smaya wmds.BackfaceStencilTestFunction = 14579f464c52Smaya translate_compare_func(state->stencil[1].func); 14587ec681f3Smrg wmds.DepthTestFunction = translate_compare_func(state->depth_func); 14599f464c52Smaya wmds.DoubleSidedStencilEnable = two_sided_stencil; 14609f464c52Smaya wmds.StencilTestEnable = state->stencil[0].enabled; 14619f464c52Smaya wmds.StencilBufferWriteEnable = 14629f464c52Smaya state->stencil[0].writemask != 0 || 14639f464c52Smaya (two_sided_stencil && state->stencil[1].writemask != 0); 14647ec681f3Smrg wmds.DepthTestEnable = state->depth_enabled; 14657ec681f3Smrg wmds.DepthBufferWriteEnable = state->depth_writemask; 14669f464c52Smaya wmds.StencilTestMask = state->stencil[0].valuemask; 14679f464c52Smaya wmds.StencilWriteMask = state->stencil[0].writemask; 14689f464c52Smaya wmds.BackfaceStencilTestMask = state->stencil[1].valuemask; 14699f464c52Smaya wmds.BackfaceStencilWriteMask = state->stencil[1].writemask; 14709f464c52Smaya /* wmds.[Backface]StencilReferenceValue are merged later */ 14717ec681f3Smrg#if GFX_VER >= 12 14727ec681f3Smrg wmds.StencilReferenceValueModifyDisable = true; 14737ec681f3Smrg#endif 14747ec681f3Smrg } 14757ec681f3Smrg 14767ec681f3Smrg#if GFX_VER >= 12 14777ec681f3Smrg iris_pack_command(GENX(3DSTATE_DEPTH_BOUNDS), cso->depth_bounds, depth_bounds) { 14787ec681f3Smrg depth_bounds.DepthBoundsTestValueModifyDisable = false; 14797ec681f3Smrg depth_bounds.DepthBoundsTestEnableModifyDisable = false; 14807ec681f3Smrg depth_bounds.DepthBoundsTestEnable = state->depth_bounds_test; 14817ec681f3Smrg depth_bounds.DepthBoundsTestMinValue = state->depth_bounds_min; 14827ec681f3Smrg depth_bounds.DepthBoundsTestMaxValue = state->depth_bounds_max; 14839f464c52Smaya } 14847ec681f3Smrg#endif 14857ec681f3Smrg 14867ec681f3Smrg return cso; 14877ec681f3Smrg} 14887ec681f3Smrg 14897ec681f3Smrg/** 14907ec681f3Smrg * The pipe->bind_depth_stencil_alpha_state() driver hook. 14917ec681f3Smrg * 14927ec681f3Smrg * Bind a depth/stencil/alpha CSO and flag related dirty bits. 14937ec681f3Smrg */ 14947ec681f3Smrgstatic void 14957ec681f3Smrgiris_bind_zsa_state(struct pipe_context *ctx, void *state) 14967ec681f3Smrg{ 14977ec681f3Smrg struct iris_context *ice = (struct iris_context *) ctx; 14987ec681f3Smrg struct iris_depth_stencil_alpha_state *old_cso = ice->state.cso_zsa; 14997ec681f3Smrg struct iris_depth_stencil_alpha_state *new_cso = state; 15007ec681f3Smrg 15017ec681f3Smrg if (new_cso) { 15027ec681f3Smrg if (cso_changed(alpha_ref_value)) 15037ec681f3Smrg ice->state.dirty |= IRIS_DIRTY_COLOR_CALC_STATE; 15047ec681f3Smrg 15057ec681f3Smrg if (cso_changed(alpha_enabled)) 15067ec681f3Smrg ice->state.dirty |= IRIS_DIRTY_PS_BLEND | IRIS_DIRTY_BLEND_STATE; 15077ec681f3Smrg 15087ec681f3Smrg if (cso_changed(alpha_func)) 15097ec681f3Smrg ice->state.dirty |= IRIS_DIRTY_BLEND_STATE; 15107ec681f3Smrg 15117ec681f3Smrg if (cso_changed(depth_writes_enabled) || cso_changed(stencil_writes_enabled)) 15127ec681f3Smrg ice->state.dirty |= IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES; 15137ec681f3Smrg 15147ec681f3Smrg ice->state.depth_writes_enabled = new_cso->depth_writes_enabled; 15157ec681f3Smrg ice->state.stencil_writes_enabled = new_cso->stencil_writes_enabled; 15167ec681f3Smrg 15177ec681f3Smrg#if GFX_VER >= 12 15187ec681f3Smrg if (cso_changed(depth_bounds)) 15197ec681f3Smrg ice->state.dirty |= IRIS_DIRTY_DEPTH_BOUNDS; 15207ec681f3Smrg#endif 15217ec681f3Smrg } 15227ec681f3Smrg 15237ec681f3Smrg ice->state.cso_zsa = new_cso; 15247ec681f3Smrg ice->state.dirty |= IRIS_DIRTY_CC_VIEWPORT; 15257ec681f3Smrg ice->state.dirty |= IRIS_DIRTY_WM_DEPTH_STENCIL; 15267ec681f3Smrg ice->state.stage_dirty |= 15277ec681f3Smrg ice->state.stage_dirty_for_nos[IRIS_NOS_DEPTH_STENCIL_ALPHA]; 15287ec681f3Smrg 15297ec681f3Smrg if (GFX_VER == 8) 15307ec681f3Smrg ice->state.dirty |= IRIS_DIRTY_PMA_FIX; 15317ec681f3Smrg} 15327ec681f3Smrg 15337ec681f3Smrg#if GFX_VER == 8 15347ec681f3Smrgstatic bool 15357ec681f3Smrgwant_pma_fix(struct iris_context *ice) 15367ec681f3Smrg{ 15377ec681f3Smrg UNUSED struct iris_screen *screen = (void *) ice->ctx.screen; 15387ec681f3Smrg UNUSED const struct intel_device_info *devinfo = &screen->devinfo; 15397ec681f3Smrg const struct brw_wm_prog_data *wm_prog_data = (void *) 15407ec681f3Smrg ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data; 15417ec681f3Smrg const struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 15427ec681f3Smrg const struct iris_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa; 15437ec681f3Smrg const struct iris_blend_state *cso_blend = ice->state.cso_blend; 15447ec681f3Smrg 15457ec681f3Smrg /* In very specific combinations of state, we can instruct Gfx8-9 hardware 15467ec681f3Smrg * to avoid stalling at the pixel mask array. The state equations are 15477ec681f3Smrg * documented in these places: 15487ec681f3Smrg * 15497ec681f3Smrg * - Gfx8 Depth PMA Fix: CACHE_MODE_1::NP_PMA_FIX_ENABLE 15507ec681f3Smrg * - Gfx9 Stencil PMA Fix: CACHE_MODE_0::STC PMA Optimization Enable 15517ec681f3Smrg * 15527ec681f3Smrg * Both equations share some common elements: 15537ec681f3Smrg * 15547ec681f3Smrg * no_hiz_op = 15557ec681f3Smrg * !(3DSTATE_WM_HZ_OP::DepthBufferClear || 15567ec681f3Smrg * 3DSTATE_WM_HZ_OP::DepthBufferResolve || 15577ec681f3Smrg * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable || 15587ec681f3Smrg * 3DSTATE_WM_HZ_OP::StencilBufferClear) && 15597ec681f3Smrg * 15607ec681f3Smrg * killpixels = 15617ec681f3Smrg * 3DSTATE_WM::ForceKillPix != ForceOff && 15627ec681f3Smrg * (3DSTATE_PS_EXTRA::PixelShaderKillsPixels || 15637ec681f3Smrg * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget || 15647ec681f3Smrg * 3DSTATE_PS_BLEND::AlphaToCoverageEnable || 15657ec681f3Smrg * 3DSTATE_PS_BLEND::AlphaTestEnable || 15667ec681f3Smrg * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) 15677ec681f3Smrg * 15687ec681f3Smrg * (Technically the stencil PMA treats ForceKillPix differently, 15697ec681f3Smrg * but I think this is a documentation oversight, and we don't 15707ec681f3Smrg * ever use it in this way, so it doesn't matter). 15717ec681f3Smrg * 15727ec681f3Smrg * common_pma_fix = 15737ec681f3Smrg * 3DSTATE_WM::ForceThreadDispatch != 1 && 15747ec681f3Smrg * 3DSTATE_RASTER::ForceSampleCount == NUMRASTSAMPLES_0 && 15757ec681f3Smrg * 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL && 15767ec681f3Smrg * 3DSTATE_DEPTH_BUFFER::HIZ Enable && 15777ec681f3Smrg * 3DSTATE_WM::EDSC_Mode != EDSC_PREPS && 15787ec681f3Smrg * 3DSTATE_PS_EXTRA::PixelShaderValid && 15797ec681f3Smrg * no_hiz_op 15807ec681f3Smrg * 15817ec681f3Smrg * These are always true: 15827ec681f3Smrg * 15837ec681f3Smrg * 3DSTATE_RASTER::ForceSampleCount == NUMRASTSAMPLES_0 15847ec681f3Smrg * 3DSTATE_PS_EXTRA::PixelShaderValid 15857ec681f3Smrg * 15867ec681f3Smrg * Also, we never use the normal drawing path for HiZ ops; these are true: 15877ec681f3Smrg * 15887ec681f3Smrg * !(3DSTATE_WM_HZ_OP::DepthBufferClear || 15897ec681f3Smrg * 3DSTATE_WM_HZ_OP::DepthBufferResolve || 15907ec681f3Smrg * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable || 15917ec681f3Smrg * 3DSTATE_WM_HZ_OP::StencilBufferClear) 15927ec681f3Smrg * 15937ec681f3Smrg * This happens sometimes: 15947ec681f3Smrg * 15957ec681f3Smrg * 3DSTATE_WM::ForceThreadDispatch != 1 15967ec681f3Smrg * 15977ec681f3Smrg * However, we choose to ignore it as it either agrees with the signal 15987ec681f3Smrg * (dispatch was already enabled, so nothing out of the ordinary), or 15997ec681f3Smrg * there are no framebuffer attachments (so no depth or HiZ anyway, 16007ec681f3Smrg * meaning the PMA signal will already be disabled). 16017ec681f3Smrg */ 16027ec681f3Smrg 16037ec681f3Smrg if (!cso_fb->zsbuf) 16047ec681f3Smrg return false; 16057ec681f3Smrg 16067ec681f3Smrg struct iris_resource *zres, *sres; 16077ec681f3Smrg iris_get_depth_stencil_resources(cso_fb->zsbuf->texture, &zres, &sres); 16087ec681f3Smrg 16097ec681f3Smrg /* 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL && 16107ec681f3Smrg * 3DSTATE_DEPTH_BUFFER::HIZ Enable && 16117ec681f3Smrg */ 16127ec681f3Smrg if (!zres || !iris_resource_level_has_hiz(zres, cso_fb->zsbuf->u.tex.level)) 16137ec681f3Smrg return false; 16147ec681f3Smrg 16157ec681f3Smrg /* 3DSTATE_WM::EDSC_Mode != EDSC_PREPS */ 16167ec681f3Smrg if (wm_prog_data->early_fragment_tests) 16177ec681f3Smrg return false; 16187ec681f3Smrg 16197ec681f3Smrg /* 3DSTATE_WM::ForceKillPix != ForceOff && 16207ec681f3Smrg * (3DSTATE_PS_EXTRA::PixelShaderKillsPixels || 16217ec681f3Smrg * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget || 16227ec681f3Smrg * 3DSTATE_PS_BLEND::AlphaToCoverageEnable || 16237ec681f3Smrg * 3DSTATE_PS_BLEND::AlphaTestEnable || 16247ec681f3Smrg * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) 16257ec681f3Smrg */ 16267ec681f3Smrg bool killpixels = wm_prog_data->uses_kill || wm_prog_data->uses_omask || 16277ec681f3Smrg cso_blend->alpha_to_coverage || cso_zsa->alpha_enabled; 16287ec681f3Smrg 16297ec681f3Smrg /* The Gfx8 depth PMA equation becomes: 16307ec681f3Smrg * 16317ec681f3Smrg * depth_writes = 16327ec681f3Smrg * 3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable && 16337ec681f3Smrg * 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE 16347ec681f3Smrg * 16357ec681f3Smrg * stencil_writes = 16367ec681f3Smrg * 3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable && 16377ec681f3Smrg * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE && 16387ec681f3Smrg * 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE 16397ec681f3Smrg * 16407ec681f3Smrg * Z_PMA_OPT = 16417ec681f3Smrg * common_pma_fix && 16427ec681f3Smrg * 3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable && 16437ec681f3Smrg * ((killpixels && (depth_writes || stencil_writes)) || 16447ec681f3Smrg * 3DSTATE_PS_EXTRA::PixelShaderComputedDepthMode != PSCDEPTH_OFF) 16457ec681f3Smrg * 16467ec681f3Smrg */ 16477ec681f3Smrg if (!cso_zsa->depth_test_enabled) 16487ec681f3Smrg return false; 16499f464c52Smaya 16507ec681f3Smrg return wm_prog_data->computed_depth_mode != PSCDEPTH_OFF || 16517ec681f3Smrg (killpixels && (cso_zsa->depth_writes_enabled || 16527ec681f3Smrg (sres && cso_zsa->stencil_writes_enabled))); 16539f464c52Smaya} 16547ec681f3Smrg#endif 16559f464c52Smaya 16567ec681f3Smrgvoid 16577ec681f3SmrggenX(update_pma_fix)(struct iris_context *ice, 16587ec681f3Smrg struct iris_batch *batch, 16597ec681f3Smrg bool enable) 16609f464c52Smaya{ 16617ec681f3Smrg#if GFX_VER == 8 16627ec681f3Smrg struct iris_genx_state *genx = ice->state.genx; 16639f464c52Smaya 16647ec681f3Smrg if (genx->pma_fix_enabled == enable) 16657ec681f3Smrg return; 16669f464c52Smaya 16677ec681f3Smrg genx->pma_fix_enabled = enable; 16689f464c52Smaya 16697ec681f3Smrg /* According to the Broadwell PIPE_CONTROL documentation, software should 16707ec681f3Smrg * emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set 16717ec681f3Smrg * prior to the LRI. If stencil buffer writes are enabled, then a Render * Cache Flush is also necessary. 16727ec681f3Smrg * 16737ec681f3Smrg * The Gfx9 docs say to use a depth stall rather than a command streamer 16747ec681f3Smrg * stall. However, the hardware seems to violently disagree. A full 16757ec681f3Smrg * command streamer stall seems to be needed in both cases. 16767ec681f3Smrg */ 16777ec681f3Smrg iris_emit_pipe_control_flush(batch, "PMA fix change (1/2)", 16787ec681f3Smrg PIPE_CONTROL_CS_STALL | 16797ec681f3Smrg PIPE_CONTROL_DEPTH_CACHE_FLUSH | 16807ec681f3Smrg PIPE_CONTROL_RENDER_TARGET_FLUSH); 16819f464c52Smaya 16827ec681f3Smrg iris_emit_reg(batch, GENX(CACHE_MODE_1), reg) { 16837ec681f3Smrg reg.NPPMAFixEnable = enable; 16847ec681f3Smrg reg.NPEarlyZFailsDisable = enable; 16857ec681f3Smrg reg.NPPMAFixEnableMask = true; 16867ec681f3Smrg reg.NPEarlyZFailsDisableMask = true; 16879f464c52Smaya } 16889f464c52Smaya 16897ec681f3Smrg /* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache 16907ec681f3Smrg * Flush bits is often necessary. We do it regardless because it's easier. 16917ec681f3Smrg * The render cache flush is also necessary if stencil writes are enabled. 16927ec681f3Smrg * 16937ec681f3Smrg * Again, the Gfx9 docs give a different set of flushes but the Broadwell 16947ec681f3Smrg * flushes seem to work just as well. 16957ec681f3Smrg */ 16967ec681f3Smrg iris_emit_pipe_control_flush(batch, "PMA fix change (1/2)", 16977ec681f3Smrg PIPE_CONTROL_DEPTH_STALL | 16987ec681f3Smrg PIPE_CONTROL_DEPTH_CACHE_FLUSH | 16997ec681f3Smrg PIPE_CONTROL_RENDER_TARGET_FLUSH); 17007ec681f3Smrg#endif 17019f464c52Smaya} 17029f464c52Smaya 17039f464c52Smaya/** 17049f464c52Smaya * Gallium CSO for rasterizer state. 17059f464c52Smaya */ 17069f464c52Smayastruct iris_rasterizer_state { 17079f464c52Smaya uint32_t sf[GENX(3DSTATE_SF_length)]; 17089f464c52Smaya uint32_t clip[GENX(3DSTATE_CLIP_length)]; 17099f464c52Smaya uint32_t raster[GENX(3DSTATE_RASTER_length)]; 17109f464c52Smaya uint32_t wm[GENX(3DSTATE_WM_length)]; 17119f464c52Smaya uint32_t line_stipple[GENX(3DSTATE_LINE_STIPPLE_length)]; 17129f464c52Smaya 17139f464c52Smaya uint8_t num_clip_plane_consts; 17149f464c52Smaya bool clip_halfz; /* for CC_VIEWPORT */ 17159f464c52Smaya bool depth_clip_near; /* for CC_VIEWPORT */ 17169f464c52Smaya bool depth_clip_far; /* for CC_VIEWPORT */ 17179f464c52Smaya bool flatshade; /* for shader state */ 17189f464c52Smaya bool flatshade_first; /* for stream output */ 17199f464c52Smaya bool clamp_fragment_color; /* for shader state */ 17209f464c52Smaya bool light_twoside; /* for shader state */ 17219f464c52Smaya bool rasterizer_discard; /* for 3DSTATE_STREAMOUT and 3DSTATE_CLIP */ 17229f464c52Smaya bool half_pixel_center; /* for 3DSTATE_MULTISAMPLE */ 17239f464c52Smaya bool line_stipple_enable; 17249f464c52Smaya bool poly_stipple_enable; 17259f464c52Smaya bool multisample; 17269f464c52Smaya bool force_persample_interp; 17279f464c52Smaya bool conservative_rasterization; 17287ec681f3Smrg bool fill_mode_point; 17297ec681f3Smrg bool fill_mode_line; 17309f464c52Smaya bool fill_mode_point_or_line; 17319f464c52Smaya enum pipe_sprite_coord_mode sprite_coord_mode; /* PIPE_SPRITE_* */ 17329f464c52Smaya uint16_t sprite_coord_enable; 17339f464c52Smaya}; 17349f464c52Smaya 17359f464c52Smayastatic float 17369f464c52Smayaget_line_width(const struct pipe_rasterizer_state *state) 17379f464c52Smaya{ 17389f464c52Smaya float line_width = state->line_width; 17399f464c52Smaya 17409f464c52Smaya /* From the OpenGL 4.4 spec: 17419f464c52Smaya * 17429f464c52Smaya * "The actual width of non-antialiased lines is determined by rounding 17439f464c52Smaya * the supplied width to the nearest integer, then clamping it to the 17449f464c52Smaya * implementation-dependent maximum non-antialiased line width." 17459f464c52Smaya */ 17469f464c52Smaya if (!state->multisample && !state->line_smooth) 17479f464c52Smaya line_width = roundf(state->line_width); 17489f464c52Smaya 17499f464c52Smaya if (!state->multisample && state->line_smooth && line_width < 1.5f) { 17509f464c52Smaya /* For 1 pixel line thickness or less, the general anti-aliasing 17519f464c52Smaya * algorithm gives up, and a garbage line is generated. Setting a 17529f464c52Smaya * Line Width of 0.0 specifies the rasterization of the "thinnest" 17539f464c52Smaya * (one-pixel-wide), non-antialiased lines. 17549f464c52Smaya * 17559f464c52Smaya * Lines rendered with zero Line Width are rasterized using the 17569f464c52Smaya * "Grid Intersection Quantization" rules as specified by the 17579f464c52Smaya * "Zero-Width (Cosmetic) Line Rasterization" section of the docs. 17589f464c52Smaya */ 17599f464c52Smaya line_width = 0.0f; 17609f464c52Smaya } 17619f464c52Smaya 17629f464c52Smaya return line_width; 17639f464c52Smaya} 17649f464c52Smaya 17659f464c52Smaya/** 17669f464c52Smaya * The pipe->create_rasterizer_state() driver hook. 17679f464c52Smaya */ 17689f464c52Smayastatic void * 17699f464c52Smayairis_create_rasterizer_state(struct pipe_context *ctx, 17709f464c52Smaya const struct pipe_rasterizer_state *state) 17719f464c52Smaya{ 17729f464c52Smaya struct iris_rasterizer_state *cso = 17739f464c52Smaya malloc(sizeof(struct iris_rasterizer_state)); 17749f464c52Smaya 17759f464c52Smaya cso->multisample = state->multisample; 17769f464c52Smaya cso->force_persample_interp = state->force_persample_interp; 17779f464c52Smaya cso->clip_halfz = state->clip_halfz; 17789f464c52Smaya cso->depth_clip_near = state->depth_clip_near; 17799f464c52Smaya cso->depth_clip_far = state->depth_clip_far; 17809f464c52Smaya cso->flatshade = state->flatshade; 17819f464c52Smaya cso->flatshade_first = state->flatshade_first; 17829f464c52Smaya cso->clamp_fragment_color = state->clamp_fragment_color; 17839f464c52Smaya cso->light_twoside = state->light_twoside; 17849f464c52Smaya cso->rasterizer_discard = state->rasterizer_discard; 17859f464c52Smaya cso->half_pixel_center = state->half_pixel_center; 17869f464c52Smaya cso->sprite_coord_mode = state->sprite_coord_mode; 17879f464c52Smaya cso->sprite_coord_enable = state->sprite_coord_enable; 17889f464c52Smaya cso->line_stipple_enable = state->line_stipple_enable; 17899f464c52Smaya cso->poly_stipple_enable = state->poly_stipple_enable; 17909f464c52Smaya cso->conservative_rasterization = 17919f464c52Smaya state->conservative_raster_mode == PIPE_CONSERVATIVE_RASTER_POST_SNAP; 17929f464c52Smaya 17937ec681f3Smrg cso->fill_mode_point = 17949f464c52Smaya state->fill_front == PIPE_POLYGON_MODE_POINT || 17959f464c52Smaya state->fill_back == PIPE_POLYGON_MODE_POINT; 17967ec681f3Smrg cso->fill_mode_line = 17977ec681f3Smrg state->fill_front == PIPE_POLYGON_MODE_LINE || 17987ec681f3Smrg state->fill_back == PIPE_POLYGON_MODE_LINE; 17997ec681f3Smrg cso->fill_mode_point_or_line = 18007ec681f3Smrg cso->fill_mode_point || 18017ec681f3Smrg cso->fill_mode_line; 18029f464c52Smaya 18039f464c52Smaya if (state->clip_plane_enable != 0) 18049f464c52Smaya cso->num_clip_plane_consts = util_logbase2(state->clip_plane_enable) + 1; 18059f464c52Smaya else 18069f464c52Smaya cso->num_clip_plane_consts = 0; 18079f464c52Smaya 18089f464c52Smaya float line_width = get_line_width(state); 18099f464c52Smaya 18109f464c52Smaya iris_pack_command(GENX(3DSTATE_SF), cso->sf, sf) { 18119f464c52Smaya sf.StatisticsEnable = true; 18129f464c52Smaya sf.AALineDistanceMode = AALINEDISTANCE_TRUE; 18139f464c52Smaya sf.LineEndCapAntialiasingRegionWidth = 18149f464c52Smaya state->line_smooth ? _10pixels : _05pixels; 18159f464c52Smaya sf.LastPixelEnable = state->line_last_pixel; 18169f464c52Smaya sf.LineWidth = line_width; 18179f464c52Smaya sf.SmoothPointEnable = (state->point_smooth || state->multisample) && 18189f464c52Smaya !state->point_quad_rasterization; 18199f464c52Smaya sf.PointWidthSource = state->point_size_per_vertex ? Vertex : State; 18207ec681f3Smrg sf.PointWidth = CLAMP(state->point_size, 0.125f, 255.875f); 18219f464c52Smaya 18229f464c52Smaya if (state->flatshade_first) { 18239f464c52Smaya sf.TriangleFanProvokingVertexSelect = 1; 18249f464c52Smaya } else { 18259f464c52Smaya sf.TriangleStripListProvokingVertexSelect = 2; 18269f464c52Smaya sf.TriangleFanProvokingVertexSelect = 2; 18279f464c52Smaya sf.LineStripListProvokingVertexSelect = 1; 18289f464c52Smaya } 18299f464c52Smaya } 18309f464c52Smaya 18319f464c52Smaya iris_pack_command(GENX(3DSTATE_RASTER), cso->raster, rr) { 18329f464c52Smaya rr.FrontWinding = state->front_ccw ? CounterClockwise : Clockwise; 18339f464c52Smaya rr.CullMode = translate_cull_mode(state->cull_face); 18349f464c52Smaya rr.FrontFaceFillMode = translate_fill_mode(state->fill_front); 18359f464c52Smaya rr.BackFaceFillMode = translate_fill_mode(state->fill_back); 18369f464c52Smaya rr.DXMultisampleRasterizationEnable = state->multisample; 18379f464c52Smaya rr.GlobalDepthOffsetEnableSolid = state->offset_tri; 18389f464c52Smaya rr.GlobalDepthOffsetEnableWireframe = state->offset_line; 18399f464c52Smaya rr.GlobalDepthOffsetEnablePoint = state->offset_point; 18409f464c52Smaya rr.GlobalDepthOffsetConstant = state->offset_units * 2; 18419f464c52Smaya rr.GlobalDepthOffsetScale = state->offset_scale; 18429f464c52Smaya rr.GlobalDepthOffsetClamp = state->offset_clamp; 18439f464c52Smaya rr.SmoothPointEnable = state->point_smooth; 18449f464c52Smaya rr.AntialiasingEnable = state->line_smooth; 18459f464c52Smaya rr.ScissorRectangleEnable = state->scissor; 18467ec681f3Smrg#if GFX_VER >= 9 18479f464c52Smaya rr.ViewportZNearClipTestEnable = state->depth_clip_near; 18489f464c52Smaya rr.ViewportZFarClipTestEnable = state->depth_clip_far; 18499f464c52Smaya rr.ConservativeRasterizationEnable = 18509f464c52Smaya cso->conservative_rasterization; 18519f464c52Smaya#else 18529f464c52Smaya rr.ViewportZClipTestEnable = (state->depth_clip_near || state->depth_clip_far); 18539f464c52Smaya#endif 18549f464c52Smaya } 18559f464c52Smaya 18569f464c52Smaya iris_pack_command(GENX(3DSTATE_CLIP), cso->clip, cl) { 18579f464c52Smaya /* cl.NonPerspectiveBarycentricEnable is filled in at draw time from 18589f464c52Smaya * the FS program; cl.ForceZeroRTAIndexEnable is filled in from the FB. 18599f464c52Smaya */ 18609f464c52Smaya cl.EarlyCullEnable = true; 18619f464c52Smaya cl.UserClipDistanceClipTestEnableBitmask = state->clip_plane_enable; 18629f464c52Smaya cl.ForceUserClipDistanceClipTestEnableBitmask = true; 18639f464c52Smaya cl.APIMode = state->clip_halfz ? APIMODE_D3D : APIMODE_OGL; 18649f464c52Smaya cl.GuardbandClipTestEnable = true; 18659f464c52Smaya cl.ClipEnable = true; 18669f464c52Smaya cl.MinimumPointWidth = 0.125; 18679f464c52Smaya cl.MaximumPointWidth = 255.875; 18689f464c52Smaya 18699f464c52Smaya if (state->flatshade_first) { 18709f464c52Smaya cl.TriangleFanProvokingVertexSelect = 1; 18719f464c52Smaya } else { 18729f464c52Smaya cl.TriangleStripListProvokingVertexSelect = 2; 18739f464c52Smaya cl.TriangleFanProvokingVertexSelect = 2; 18749f464c52Smaya cl.LineStripListProvokingVertexSelect = 1; 18759f464c52Smaya } 18769f464c52Smaya } 18779f464c52Smaya 18789f464c52Smaya iris_pack_command(GENX(3DSTATE_WM), cso->wm, wm) { 18799f464c52Smaya /* wm.BarycentricInterpolationMode and wm.EarlyDepthStencilControl are 18809f464c52Smaya * filled in at draw time from the FS program. 18819f464c52Smaya */ 18829f464c52Smaya wm.LineAntialiasingRegionWidth = _10pixels; 18839f464c52Smaya wm.LineEndCapAntialiasingRegionWidth = _05pixels; 18849f464c52Smaya wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT; 18859f464c52Smaya wm.LineStippleEnable = state->line_stipple_enable; 18869f464c52Smaya wm.PolygonStippleEnable = state->poly_stipple_enable; 18879f464c52Smaya } 18889f464c52Smaya 18899f464c52Smaya /* Remap from 0..255 back to 1..256 */ 18909f464c52Smaya const unsigned line_stipple_factor = state->line_stipple_factor + 1; 18919f464c52Smaya 18929f464c52Smaya iris_pack_command(GENX(3DSTATE_LINE_STIPPLE), cso->line_stipple, line) { 18937ec681f3Smrg if (state->line_stipple_enable) { 18947ec681f3Smrg line.LineStipplePattern = state->line_stipple_pattern; 18957ec681f3Smrg line.LineStippleInverseRepeatCount = 1.0f / line_stipple_factor; 18967ec681f3Smrg line.LineStippleRepeatCount = line_stipple_factor; 18977ec681f3Smrg } 18989f464c52Smaya } 18999f464c52Smaya 19009f464c52Smaya return cso; 19019f464c52Smaya} 19029f464c52Smaya 19039f464c52Smaya/** 19049f464c52Smaya * The pipe->bind_rasterizer_state() driver hook. 19059f464c52Smaya * 19069f464c52Smaya * Bind a rasterizer CSO and flag related dirty bits. 19079f464c52Smaya */ 19089f464c52Smayastatic void 19099f464c52Smayairis_bind_rasterizer_state(struct pipe_context *ctx, void *state) 19109f464c52Smaya{ 19119f464c52Smaya struct iris_context *ice = (struct iris_context *) ctx; 19129f464c52Smaya struct iris_rasterizer_state *old_cso = ice->state.cso_rast; 19139f464c52Smaya struct iris_rasterizer_state *new_cso = state; 19149f464c52Smaya 19159f464c52Smaya if (new_cso) { 19169f464c52Smaya /* Try to avoid re-emitting 3DSTATE_LINE_STIPPLE, it's non-pipelined */ 19179f464c52Smaya if (cso_changed_memcmp(line_stipple)) 19189f464c52Smaya ice->state.dirty |= IRIS_DIRTY_LINE_STIPPLE; 19199f464c52Smaya 19209f464c52Smaya if (cso_changed(half_pixel_center)) 19219f464c52Smaya ice->state.dirty |= IRIS_DIRTY_MULTISAMPLE; 19229f464c52Smaya 19239f464c52Smaya if (cso_changed(line_stipple_enable) || cso_changed(poly_stipple_enable)) 19249f464c52Smaya ice->state.dirty |= IRIS_DIRTY_WM; 19259f464c52Smaya 19269f464c52Smaya if (cso_changed(rasterizer_discard)) 19279f464c52Smaya ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP; 19289f464c52Smaya 19299f464c52Smaya if (cso_changed(flatshade_first)) 19309f464c52Smaya ice->state.dirty |= IRIS_DIRTY_STREAMOUT; 19319f464c52Smaya 19329f464c52Smaya if (cso_changed(depth_clip_near) || cso_changed(depth_clip_far) || 19339f464c52Smaya cso_changed(clip_halfz)) 19349f464c52Smaya ice->state.dirty |= IRIS_DIRTY_CC_VIEWPORT; 19359f464c52Smaya 19369f464c52Smaya if (cso_changed(sprite_coord_enable) || 19379f464c52Smaya cso_changed(sprite_coord_mode) || 19389f464c52Smaya cso_changed(light_twoside)) 19399f464c52Smaya ice->state.dirty |= IRIS_DIRTY_SBE; 19409f464c52Smaya 19419f464c52Smaya if (cso_changed(conservative_rasterization)) 19427ec681f3Smrg ice->state.stage_dirty |= IRIS_STAGE_DIRTY_FS; 19439f464c52Smaya } 19449f464c52Smaya 19459f464c52Smaya ice->state.cso_rast = new_cso; 19469f464c52Smaya ice->state.dirty |= IRIS_DIRTY_RASTER; 19479f464c52Smaya ice->state.dirty |= IRIS_DIRTY_CLIP; 19487ec681f3Smrg ice->state.stage_dirty |= 19497ec681f3Smrg ice->state.stage_dirty_for_nos[IRIS_NOS_RASTERIZER]; 19509f464c52Smaya} 19519f464c52Smaya 19529f464c52Smaya/** 19539f464c52Smaya * Return true if the given wrap mode requires the border color to exist. 19549f464c52Smaya * 19559f464c52Smaya * (We can skip uploading it if the sampler isn't going to use it.) 19569f464c52Smaya */ 19579f464c52Smayastatic bool 19589f464c52Smayawrap_mode_needs_border_color(unsigned wrap_mode) 19599f464c52Smaya{ 19609f464c52Smaya return wrap_mode == TCM_CLAMP_BORDER || wrap_mode == TCM_HALF_BORDER; 19619f464c52Smaya} 19629f464c52Smaya 19639f464c52Smaya/** 19649f464c52Smaya * Gallium CSO for sampler state. 19659f464c52Smaya */ 19669f464c52Smayastruct iris_sampler_state { 19679f464c52Smaya union pipe_color_union border_color; 19689f464c52Smaya bool needs_border_color; 19699f464c52Smaya 19709f464c52Smaya uint32_t sampler_state[GENX(SAMPLER_STATE_length)]; 19719f464c52Smaya}; 19729f464c52Smaya 19739f464c52Smaya/** 19749f464c52Smaya * The pipe->create_sampler_state() driver hook. 19759f464c52Smaya * 19769f464c52Smaya * We fill out SAMPLER_STATE (except for the border color pointer), and 19779f464c52Smaya * store that on the CPU. It doesn't make sense to upload it to a GPU 19789f464c52Smaya * buffer object yet, because 3DSTATE_SAMPLER_STATE_POINTERS requires 19799f464c52Smaya * all bound sampler states to be in contiguous memor. 19809f464c52Smaya */ 19819f464c52Smayastatic void * 19829f464c52Smayairis_create_sampler_state(struct pipe_context *ctx, 19839f464c52Smaya const struct pipe_sampler_state *state) 19849f464c52Smaya{ 19859f464c52Smaya struct iris_sampler_state *cso = CALLOC_STRUCT(iris_sampler_state); 19869f464c52Smaya 19879f464c52Smaya if (!cso) 19889f464c52Smaya return NULL; 19899f464c52Smaya 19909f464c52Smaya STATIC_ASSERT(PIPE_TEX_FILTER_NEAREST == MAPFILTER_NEAREST); 19919f464c52Smaya STATIC_ASSERT(PIPE_TEX_FILTER_LINEAR == MAPFILTER_LINEAR); 19929f464c52Smaya 19939f464c52Smaya unsigned wrap_s = translate_wrap(state->wrap_s); 19949f464c52Smaya unsigned wrap_t = translate_wrap(state->wrap_t); 19959f464c52Smaya unsigned wrap_r = translate_wrap(state->wrap_r); 19969f464c52Smaya 19979f464c52Smaya memcpy(&cso->border_color, &state->border_color, sizeof(cso->border_color)); 19989f464c52Smaya 19999f464c52Smaya cso->needs_border_color = wrap_mode_needs_border_color(wrap_s) || 20009f464c52Smaya wrap_mode_needs_border_color(wrap_t) || 20019f464c52Smaya wrap_mode_needs_border_color(wrap_r); 20029f464c52Smaya 20039f464c52Smaya float min_lod = state->min_lod; 20049f464c52Smaya unsigned mag_img_filter = state->mag_img_filter; 20059f464c52Smaya 20069f464c52Smaya // XXX: explain this code ported from ilo...I don't get it at all... 20079f464c52Smaya if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && 20089f464c52Smaya state->min_lod > 0.0f) { 20099f464c52Smaya min_lod = 0.0f; 20109f464c52Smaya mag_img_filter = state->min_img_filter; 20119f464c52Smaya } 20129f464c52Smaya 20139f464c52Smaya iris_pack_state(GENX(SAMPLER_STATE), cso->sampler_state, samp) { 20149f464c52Smaya samp.TCXAddressControlMode = wrap_s; 20159f464c52Smaya samp.TCYAddressControlMode = wrap_t; 20169f464c52Smaya samp.TCZAddressControlMode = wrap_r; 20179f464c52Smaya samp.CubeSurfaceControlMode = state->seamless_cube_map; 20189f464c52Smaya samp.NonnormalizedCoordinateEnable = !state->normalized_coords; 20199f464c52Smaya samp.MinModeFilter = state->min_img_filter; 20209f464c52Smaya samp.MagModeFilter = mag_img_filter; 20219f464c52Smaya samp.MipModeFilter = translate_mip_filter(state->min_mip_filter); 20229f464c52Smaya samp.MaximumAnisotropy = RATIO21; 20239f464c52Smaya 20249f464c52Smaya if (state->max_anisotropy >= 2) { 20259f464c52Smaya if (state->min_img_filter == PIPE_TEX_FILTER_LINEAR) { 20269f464c52Smaya samp.MinModeFilter = MAPFILTER_ANISOTROPIC; 20279f464c52Smaya samp.AnisotropicAlgorithm = EWAApproximation; 20289f464c52Smaya } 20299f464c52Smaya 20309f464c52Smaya if (state->mag_img_filter == PIPE_TEX_FILTER_LINEAR) 20319f464c52Smaya samp.MagModeFilter = MAPFILTER_ANISOTROPIC; 20329f464c52Smaya 20339f464c52Smaya samp.MaximumAnisotropy = 20349f464c52Smaya MIN2((state->max_anisotropy - 2) / 2, RATIO161); 20359f464c52Smaya } 20369f464c52Smaya 20379f464c52Smaya /* Set address rounding bits if not using nearest filtering. */ 20389f464c52Smaya if (state->min_img_filter != PIPE_TEX_FILTER_NEAREST) { 20399f464c52Smaya samp.UAddressMinFilterRoundingEnable = true; 20409f464c52Smaya samp.VAddressMinFilterRoundingEnable = true; 20419f464c52Smaya samp.RAddressMinFilterRoundingEnable = true; 20429f464c52Smaya } 20439f464c52Smaya 20449f464c52Smaya if (state->mag_img_filter != PIPE_TEX_FILTER_NEAREST) { 20459f464c52Smaya samp.UAddressMagFilterRoundingEnable = true; 20469f464c52Smaya samp.VAddressMagFilterRoundingEnable = true; 20479f464c52Smaya samp.RAddressMagFilterRoundingEnable = true; 20489f464c52Smaya } 20499f464c52Smaya 20509f464c52Smaya if (state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) 20519f464c52Smaya samp.ShadowFunction = translate_shadow_func(state->compare_func); 20529f464c52Smaya 20537ec681f3Smrg const float hw_max_lod = GFX_VER >= 7 ? 14 : 13; 20549f464c52Smaya 20559f464c52Smaya samp.LODPreClampMode = CLAMP_MODE_OGL; 20569f464c52Smaya samp.MinLOD = CLAMP(min_lod, 0, hw_max_lod); 20579f464c52Smaya samp.MaxLOD = CLAMP(state->max_lod, 0, hw_max_lod); 20589f464c52Smaya samp.TextureLODBias = CLAMP(state->lod_bias, -16, 15); 20599f464c52Smaya 20609f464c52Smaya /* .BorderColorPointer is filled in by iris_bind_sampler_states. */ 20619f464c52Smaya } 20629f464c52Smaya 20639f464c52Smaya return cso; 20649f464c52Smaya} 20659f464c52Smaya 20669f464c52Smaya/** 20679f464c52Smaya * The pipe->bind_sampler_states() driver hook. 20689f464c52Smaya */ 20699f464c52Smayastatic void 20709f464c52Smayairis_bind_sampler_states(struct pipe_context *ctx, 20719f464c52Smaya enum pipe_shader_type p_stage, 20729f464c52Smaya unsigned start, unsigned count, 20739f464c52Smaya void **states) 20749f464c52Smaya{ 20759f464c52Smaya struct iris_context *ice = (struct iris_context *) ctx; 20769f464c52Smaya gl_shader_stage stage = stage_from_pipe(p_stage); 20779f464c52Smaya struct iris_shader_state *shs = &ice->state.shaders[stage]; 20789f464c52Smaya 20799f464c52Smaya assert(start + count <= IRIS_MAX_TEXTURE_SAMPLERS); 20809f464c52Smaya 20817ec681f3Smrg bool dirty = false; 20827ec681f3Smrg 20839f464c52Smaya for (int i = 0; i < count; i++) { 20847ec681f3Smrg struct iris_sampler_state *state = states ? states[i] : NULL; 20857ec681f3Smrg if (shs->samplers[start + i] != state) { 20867ec681f3Smrg shs->samplers[start + i] = state; 20877ec681f3Smrg dirty = true; 20887ec681f3Smrg } 20899f464c52Smaya } 20909f464c52Smaya 20917ec681f3Smrg if (dirty) 20927ec681f3Smrg ice->state.stage_dirty |= IRIS_STAGE_DIRTY_SAMPLER_STATES_VS << stage; 20939f464c52Smaya} 20949f464c52Smaya 20959f464c52Smaya/** 20969f464c52Smaya * Upload the sampler states into a contiguous area of GPU memory, for 20979f464c52Smaya * for 3DSTATE_SAMPLER_STATE_POINTERS_*. 20989f464c52Smaya * 20999f464c52Smaya * Also fill out the border color state pointers. 21009f464c52Smaya */ 21019f464c52Smayastatic void 21029f464c52Smayairis_upload_sampler_states(struct iris_context *ice, gl_shader_stage stage) 21039f464c52Smaya{ 21049f464c52Smaya struct iris_shader_state *shs = &ice->state.shaders[stage]; 21059f464c52Smaya const struct shader_info *info = iris_get_shader_info(ice, stage); 21069f464c52Smaya 21077ec681f3Smrg /* We assume gallium frontends will call pipe->bind_sampler_states() 21089f464c52Smaya * if the program's number of textures changes. 21099f464c52Smaya */ 21107ec681f3Smrg unsigned count = info ? BITSET_LAST_BIT(info->textures_used) : 0; 21119f464c52Smaya 21129f464c52Smaya if (!count) 21139f464c52Smaya return; 21149f464c52Smaya 21159f464c52Smaya /* Assemble the SAMPLER_STATEs into a contiguous table that lives 21169f464c52Smaya * in the dynamic state memory zone, so we can point to it via the 21179f464c52Smaya * 3DSTATE_SAMPLER_STATE_POINTERS_* commands. 21189f464c52Smaya */ 21197ec681f3Smrg unsigned size = count * 4 * GENX(SAMPLER_STATE_length); 21209f464c52Smaya uint32_t *map = 21217ec681f3Smrg upload_state(ice->state.dynamic_uploader, &shs->sampler_table, size, 32); 21229f464c52Smaya if (unlikely(!map)) 21239f464c52Smaya return; 21249f464c52Smaya 21259f464c52Smaya struct pipe_resource *res = shs->sampler_table.res; 21267ec681f3Smrg struct iris_bo *bo = iris_resource_bo(res); 21277ec681f3Smrg 21287ec681f3Smrg iris_record_state_size(ice->state.sizes, 21297ec681f3Smrg bo->address + shs->sampler_table.offset, size); 21307ec681f3Smrg 21317ec681f3Smrg shs->sampler_table.offset += iris_bo_offset_from_base_address(bo); 21329f464c52Smaya 21339f464c52Smaya /* Make sure all land in the same BO */ 21349f464c52Smaya iris_border_color_pool_reserve(ice, IRIS_MAX_TEXTURE_SAMPLERS); 21359f464c52Smaya 21369f464c52Smaya ice->state.need_border_colors &= ~(1 << stage); 21379f464c52Smaya 21389f464c52Smaya for (int i = 0; i < count; i++) { 21399f464c52Smaya struct iris_sampler_state *state = shs->samplers[i]; 21409f464c52Smaya struct iris_sampler_view *tex = shs->textures[i]; 21419f464c52Smaya 21429f464c52Smaya if (!state) { 21439f464c52Smaya memset(map, 0, 4 * GENX(SAMPLER_STATE_length)); 21449f464c52Smaya } else if (!state->needs_border_color) { 21459f464c52Smaya memcpy(map, state->sampler_state, 4 * GENX(SAMPLER_STATE_length)); 21469f464c52Smaya } else { 21479f464c52Smaya ice->state.need_border_colors |= 1 << stage; 21489f464c52Smaya 21499f464c52Smaya /* We may need to swizzle the border color for format faking. 21509f464c52Smaya * A/LA formats are faked as R/RG with 000R or R00G swizzles. 21519f464c52Smaya * This means we need to move the border color's A channel into 21529f464c52Smaya * the R or G channels so that those read swizzles will move it 21539f464c52Smaya * back into A. 21549f464c52Smaya */ 21559f464c52Smaya union pipe_color_union *color = &state->border_color; 21569f464c52Smaya union pipe_color_union tmp; 21579f464c52Smaya if (tex) { 21589f464c52Smaya enum pipe_format internal_format = tex->res->internal_format; 21599f464c52Smaya 21609f464c52Smaya if (util_format_is_alpha(internal_format)) { 21619f464c52Smaya unsigned char swz[4] = { 21629f464c52Smaya PIPE_SWIZZLE_W, PIPE_SWIZZLE_0, 21639f464c52Smaya PIPE_SWIZZLE_0, PIPE_SWIZZLE_0 21649f464c52Smaya }; 21659f464c52Smaya util_format_apply_color_swizzle(&tmp, color, swz, true); 21669f464c52Smaya color = &tmp; 21679f464c52Smaya } else if (util_format_is_luminance_alpha(internal_format) && 21689f464c52Smaya internal_format != PIPE_FORMAT_L8A8_SRGB) { 21699f464c52Smaya unsigned char swz[4] = { 21709f464c52Smaya PIPE_SWIZZLE_X, PIPE_SWIZZLE_W, 21719f464c52Smaya PIPE_SWIZZLE_0, PIPE_SWIZZLE_0 21729f464c52Smaya }; 21739f464c52Smaya util_format_apply_color_swizzle(&tmp, color, swz, true); 21749f464c52Smaya color = &tmp; 21759f464c52Smaya } 21769f464c52Smaya } 21779f464c52Smaya 21789f464c52Smaya /* Stream out the border color and merge the pointer. */ 21799f464c52Smaya uint32_t offset = iris_upload_border_color(ice, color); 21809f464c52Smaya 21819f464c52Smaya uint32_t dynamic[GENX(SAMPLER_STATE_length)]; 21829f464c52Smaya iris_pack_state(GENX(SAMPLER_STATE), dynamic, dyns) { 21839f464c52Smaya dyns.BorderColorPointer = offset; 21849f464c52Smaya } 21859f464c52Smaya 21869f464c52Smaya for (uint32_t j = 0; j < GENX(SAMPLER_STATE_length); j++) 21879f464c52Smaya map[j] = state->sampler_state[j] | dynamic[j]; 21889f464c52Smaya } 21899f464c52Smaya 21909f464c52Smaya map += GENX(SAMPLER_STATE_length); 21919f464c52Smaya } 21929f464c52Smaya} 21939f464c52Smaya 21949f464c52Smayastatic enum isl_channel_select 21959f464c52Smayafmt_swizzle(const struct iris_format_info *fmt, enum pipe_swizzle swz) 21969f464c52Smaya{ 21979f464c52Smaya switch (swz) { 21989f464c52Smaya case PIPE_SWIZZLE_X: return fmt->swizzle.r; 21999f464c52Smaya case PIPE_SWIZZLE_Y: return fmt->swizzle.g; 22009f464c52Smaya case PIPE_SWIZZLE_Z: return fmt->swizzle.b; 22019f464c52Smaya case PIPE_SWIZZLE_W: return fmt->swizzle.a; 22027ec681f3Smrg case PIPE_SWIZZLE_1: return ISL_CHANNEL_SELECT_ONE; 22037ec681f3Smrg case PIPE_SWIZZLE_0: return ISL_CHANNEL_SELECT_ZERO; 22049f464c52Smaya default: unreachable("invalid swizzle"); 22059f464c52Smaya } 22069f464c52Smaya} 22079f464c52Smaya 22089f464c52Smayastatic void 22099f464c52Smayafill_buffer_surface_state(struct isl_device *isl_dev, 22107ec681f3Smrg struct iris_resource *res, 22119f464c52Smaya void *map, 22129f464c52Smaya enum isl_format format, 22139f464c52Smaya struct isl_swizzle swizzle, 22149f464c52Smaya unsigned offset, 22157ec681f3Smrg unsigned size, 22167ec681f3Smrg isl_surf_usage_flags_t usage) 22179f464c52Smaya{ 22189f464c52Smaya const struct isl_format_layout *fmtl = isl_format_get_layout(format); 22199f464c52Smaya const unsigned cpp = format == ISL_FORMAT_RAW ? 1 : fmtl->bpb / 8; 22209f464c52Smaya 22219f464c52Smaya /* The ARB_texture_buffer_specification says: 22229f464c52Smaya * 22239f464c52Smaya * "The number of texels in the buffer texture's texel array is given by 22249f464c52Smaya * 22259f464c52Smaya * floor(<buffer_size> / (<components> * sizeof(<base_type>)), 22269f464c52Smaya * 22279f464c52Smaya * where <buffer_size> is the size of the buffer object, in basic 22289f464c52Smaya * machine units and <components> and <base_type> are the element count 22299f464c52Smaya * and base data type for elements, as specified in Table X.1. The 22309f464c52Smaya * number of texels in the texel array is then clamped to the 22319f464c52Smaya * implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB." 22329f464c52Smaya * 22339f464c52Smaya * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride, 22349f464c52Smaya * so that when ISL divides by stride to obtain the number of texels, that 22359f464c52Smaya * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE. 22369f464c52Smaya */ 22379f464c52Smaya unsigned final_size = 22387ec681f3Smrg MIN3(size, res->bo->size - res->offset - offset, 22397ec681f3Smrg IRIS_MAX_TEXTURE_BUFFER_SIZE * cpp); 22409f464c52Smaya 22419f464c52Smaya isl_buffer_fill_state(isl_dev, map, 22427ec681f3Smrg .address = res->bo->address + res->offset + offset, 22439f464c52Smaya .size_B = final_size, 22449f464c52Smaya .format = format, 22459f464c52Smaya .swizzle = swizzle, 22469f464c52Smaya .stride_B = cpp, 22477ec681f3Smrg .mocs = iris_mocs(res->bo, isl_dev, usage)); 22489f464c52Smaya} 22499f464c52Smaya 22509f464c52Smaya#define SURFACE_STATE_ALIGNMENT 64 22519f464c52Smaya 22529f464c52Smaya/** 22539f464c52Smaya * Allocate several contiguous SURFACE_STATE structures, one for each 22547ec681f3Smrg * supported auxiliary surface mode. This only allocates the CPU-side 22557ec681f3Smrg * copy, they will need to be uploaded later after they're filled in. 22569f464c52Smaya */ 22577ec681f3Smrgstatic void 22587ec681f3Smrgalloc_surface_states(struct iris_surface_state *surf_state, 22599f464c52Smaya unsigned aux_usages) 22609f464c52Smaya{ 22619f464c52Smaya const unsigned surf_size = 4 * GENX(RENDER_SURFACE_STATE_length); 22629f464c52Smaya 22639f464c52Smaya /* If this changes, update this to explicitly align pointers */ 22649f464c52Smaya STATIC_ASSERT(surf_size == SURFACE_STATE_ALIGNMENT); 22659f464c52Smaya 22669f464c52Smaya assert(aux_usages != 0); 22679f464c52Smaya 22687ec681f3Smrg /* In case we're re-allocating them... */ 22697ec681f3Smrg free(surf_state->cpu); 22707ec681f3Smrg 22717ec681f3Smrg surf_state->num_states = util_bitcount(aux_usages); 22727ec681f3Smrg surf_state->cpu = calloc(surf_state->num_states, surf_size); 22737ec681f3Smrg surf_state->ref.offset = 0; 22747ec681f3Smrg pipe_resource_reference(&surf_state->ref.res, NULL); 22757ec681f3Smrg 22767ec681f3Smrg assert(surf_state->cpu); 22777ec681f3Smrg} 22787ec681f3Smrg 22797ec681f3Smrg/** 22807ec681f3Smrg * Upload the CPU side SURFACE_STATEs into a GPU buffer. 22817ec681f3Smrg */ 22827ec681f3Smrgstatic void 22837ec681f3Smrgupload_surface_states(struct u_upload_mgr *mgr, 22847ec681f3Smrg struct iris_surface_state *surf_state) 22857ec681f3Smrg{ 22867ec681f3Smrg const unsigned surf_size = 4 * GENX(RENDER_SURFACE_STATE_length); 22877ec681f3Smrg const unsigned bytes = surf_state->num_states * surf_size; 22887ec681f3Smrg 22899f464c52Smaya void *map = 22907ec681f3Smrg upload_state(mgr, &surf_state->ref, bytes, SURFACE_STATE_ALIGNMENT); 22919f464c52Smaya 22927ec681f3Smrg surf_state->ref.offset += 22937ec681f3Smrg iris_bo_offset_from_base_address(iris_resource_bo(surf_state->ref.res)); 22949f464c52Smaya 22957ec681f3Smrg if (map) 22967ec681f3Smrg memcpy(map, surf_state->cpu, bytes); 22977ec681f3Smrg} 22987ec681f3Smrg 22997ec681f3Smrg/** 23007ec681f3Smrg * Update resource addresses in a set of SURFACE_STATE descriptors, 23017ec681f3Smrg * and re-upload them if necessary. 23027ec681f3Smrg */ 23037ec681f3Smrgstatic bool 23047ec681f3Smrgupdate_surface_state_addrs(struct u_upload_mgr *mgr, 23057ec681f3Smrg struct iris_surface_state *surf_state, 23067ec681f3Smrg struct iris_bo *bo) 23077ec681f3Smrg{ 23087ec681f3Smrg if (surf_state->bo_address == bo->address) 23097ec681f3Smrg return false; 23107ec681f3Smrg 23117ec681f3Smrg STATIC_ASSERT(GENX(RENDER_SURFACE_STATE_SurfaceBaseAddress_start) % 64 == 0); 23127ec681f3Smrg STATIC_ASSERT(GENX(RENDER_SURFACE_STATE_SurfaceBaseAddress_bits) == 64); 23137ec681f3Smrg 23147ec681f3Smrg uint64_t *ss_addr = (uint64_t *) &surf_state->cpu[GENX(RENDER_SURFACE_STATE_SurfaceBaseAddress_start) / 32]; 23157ec681f3Smrg 23167ec681f3Smrg /* First, update the CPU copies. We assume no other fields exist in 23177ec681f3Smrg * the QWord containing Surface Base Address. 23187ec681f3Smrg */ 23197ec681f3Smrg for (unsigned i = 0; i < surf_state->num_states; i++) { 23207ec681f3Smrg *ss_addr = *ss_addr - surf_state->bo_address + bo->address; 23217ec681f3Smrg ss_addr = ((void *) ss_addr) + SURFACE_STATE_ALIGNMENT; 23227ec681f3Smrg } 23237ec681f3Smrg 23247ec681f3Smrg /* Next, upload the updated copies to a GPU buffer. */ 23257ec681f3Smrg upload_surface_states(mgr, surf_state); 23267ec681f3Smrg 23277ec681f3Smrg surf_state->bo_address = bo->address; 23287ec681f3Smrg 23297ec681f3Smrg return true; 23309f464c52Smaya} 23319f464c52Smaya 23329f464c52Smayastatic void 23339f464c52Smayafill_surface_state(struct isl_device *isl_dev, 23349f464c52Smaya void *map, 23359f464c52Smaya struct iris_resource *res, 23367ec681f3Smrg struct isl_surf *surf, 23379f464c52Smaya struct isl_view *view, 23387ec681f3Smrg unsigned aux_usage, 23397ec681f3Smrg uint32_t extra_main_offset, 23407ec681f3Smrg uint32_t tile_x_sa, 23417ec681f3Smrg uint32_t tile_y_sa) 23429f464c52Smaya{ 23439f464c52Smaya struct isl_surf_fill_state_info f = { 23447ec681f3Smrg .surf = surf, 23459f464c52Smaya .view = view, 23467ec681f3Smrg .mocs = iris_mocs(res->bo, isl_dev, view->usage), 23477ec681f3Smrg .address = res->bo->address + res->offset + extra_main_offset, 23487ec681f3Smrg .x_offset_sa = tile_x_sa, 23497ec681f3Smrg .y_offset_sa = tile_y_sa, 23509f464c52Smaya }; 23519f464c52Smaya 23529f464c52Smaya if (aux_usage != ISL_AUX_USAGE_NONE) { 23539f464c52Smaya f.aux_surf = &res->aux.surf; 23549f464c52Smaya f.aux_usage = aux_usage; 23557ec681f3Smrg f.clear_color = res->aux.clear_color; 23569f464c52Smaya 23577ec681f3Smrg if (res->aux.bo) 23587ec681f3Smrg f.aux_address = res->aux.bo->address + res->aux.offset; 23597ec681f3Smrg 23607ec681f3Smrg if (res->aux.clear_color_bo) { 23617ec681f3Smrg f.clear_address = res->aux.clear_color_bo->address + 23627ec681f3Smrg res->aux.clear_color_offset; 23637ec681f3Smrg f.use_clear_address = isl_dev->info->ver > 9; 23649f464c52Smaya } 23659f464c52Smaya } 23669f464c52Smaya 23679f464c52Smaya isl_surf_fill_state_s(isl_dev, map, &f); 23689f464c52Smaya} 23699f464c52Smaya 23709f464c52Smaya/** 23719f464c52Smaya * The pipe->create_sampler_view() driver hook. 23729f464c52Smaya */ 23739f464c52Smayastatic struct pipe_sampler_view * 23749f464c52Smayairis_create_sampler_view(struct pipe_context *ctx, 23759f464c52Smaya struct pipe_resource *tex, 23769f464c52Smaya const struct pipe_sampler_view *tmpl) 23779f464c52Smaya{ 23789f464c52Smaya struct iris_screen *screen = (struct iris_screen *)ctx->screen; 23797ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 23809f464c52Smaya struct iris_sampler_view *isv = calloc(1, sizeof(struct iris_sampler_view)); 23819f464c52Smaya 23829f464c52Smaya if (!isv) 23839f464c52Smaya return NULL; 23849f464c52Smaya 23859f464c52Smaya /* initialize base object */ 23869f464c52Smaya isv->base = *tmpl; 23879f464c52Smaya isv->base.context = ctx; 23889f464c52Smaya isv->base.texture = NULL; 23899f464c52Smaya pipe_reference_init(&isv->base.reference, 1); 23909f464c52Smaya pipe_resource_reference(&isv->base.texture, tex); 23919f464c52Smaya 23929f464c52Smaya if (util_format_is_depth_or_stencil(tmpl->format)) { 23939f464c52Smaya struct iris_resource *zres, *sres; 23949f464c52Smaya const struct util_format_description *desc = 23959f464c52Smaya util_format_description(tmpl->format); 23969f464c52Smaya 23979f464c52Smaya iris_get_depth_stencil_resources(tex, &zres, &sres); 23989f464c52Smaya 23997ec681f3Smrg tex = util_format_has_depth(desc) ? &zres->base.b : &sres->base.b; 24009f464c52Smaya } 24019f464c52Smaya 24029f464c52Smaya isv->res = (struct iris_resource *) tex; 24039f464c52Smaya 24047ec681f3Smrg alloc_surface_states(&isv->surface_state, isv->res->aux.sampler_usages); 24057ec681f3Smrg 24067ec681f3Smrg isv->surface_state.bo_address = isv->res->bo->address; 24079f464c52Smaya 24089f464c52Smaya isl_surf_usage_flags_t usage = ISL_SURF_USAGE_TEXTURE_BIT; 24099f464c52Smaya 24109f464c52Smaya if (isv->base.target == PIPE_TEXTURE_CUBE || 24119f464c52Smaya isv->base.target == PIPE_TEXTURE_CUBE_ARRAY) 24129f464c52Smaya usage |= ISL_SURF_USAGE_CUBE_BIT; 24139f464c52Smaya 24149f464c52Smaya const struct iris_format_info fmt = 24159f464c52Smaya iris_format_for_usage(devinfo, tmpl->format, usage); 24169f464c52Smaya 24179f464c52Smaya isv->clear_color = isv->res->aux.clear_color; 24189f464c52Smaya 24199f464c52Smaya isv->view = (struct isl_view) { 24209f464c52Smaya .format = fmt.fmt, 24219f464c52Smaya .swizzle = (struct isl_swizzle) { 24229f464c52Smaya .r = fmt_swizzle(&fmt, tmpl->swizzle_r), 24239f464c52Smaya .g = fmt_swizzle(&fmt, tmpl->swizzle_g), 24249f464c52Smaya .b = fmt_swizzle(&fmt, tmpl->swizzle_b), 24259f464c52Smaya .a = fmt_swizzle(&fmt, tmpl->swizzle_a), 24269f464c52Smaya }, 24279f464c52Smaya .usage = usage, 24289f464c52Smaya }; 24299f464c52Smaya 24307ec681f3Smrg void *map = isv->surface_state.cpu; 24317ec681f3Smrg 24329f464c52Smaya /* Fill out SURFACE_STATE for this view. */ 24339f464c52Smaya if (tmpl->target != PIPE_BUFFER) { 24349f464c52Smaya isv->view.base_level = tmpl->u.tex.first_level; 24359f464c52Smaya isv->view.levels = tmpl->u.tex.last_level - tmpl->u.tex.first_level + 1; 24367ec681f3Smrg 24377ec681f3Smrg if (tmpl->target == PIPE_TEXTURE_3D) { 24387ec681f3Smrg isv->view.base_array_layer = 0; 24397ec681f3Smrg isv->view.array_len = 1; 24407ec681f3Smrg } else { 24417ec681f3Smrg isv->view.base_array_layer = tmpl->u.tex.first_layer; 24427ec681f3Smrg isv->view.array_len = 24437ec681f3Smrg tmpl->u.tex.last_layer - tmpl->u.tex.first_layer + 1; 24447ec681f3Smrg } 24459f464c52Smaya 24469f464c52Smaya unsigned aux_modes = isv->res->aux.sampler_usages; 24479f464c52Smaya while (aux_modes) { 24489f464c52Smaya enum isl_aux_usage aux_usage = u_bit_scan(&aux_modes); 24499f464c52Smaya 24507ec681f3Smrg fill_surface_state(&screen->isl_dev, map, isv->res, &isv->res->surf, 24517ec681f3Smrg &isv->view, aux_usage, 0, 0, 0); 24529f464c52Smaya 24539f464c52Smaya map += SURFACE_STATE_ALIGNMENT; 24549f464c52Smaya } 24559f464c52Smaya } else { 24567ec681f3Smrg fill_buffer_surface_state(&screen->isl_dev, isv->res, map, 24579f464c52Smaya isv->view.format, isv->view.swizzle, 24587ec681f3Smrg tmpl->u.buf.offset, tmpl->u.buf.size, 24597ec681f3Smrg ISL_SURF_USAGE_TEXTURE_BIT); 24609f464c52Smaya } 24619f464c52Smaya 24629f464c52Smaya return &isv->base; 24639f464c52Smaya} 24649f464c52Smaya 24659f464c52Smayastatic void 24669f464c52Smayairis_sampler_view_destroy(struct pipe_context *ctx, 24679f464c52Smaya struct pipe_sampler_view *state) 24689f464c52Smaya{ 24699f464c52Smaya struct iris_sampler_view *isv = (void *) state; 24709f464c52Smaya pipe_resource_reference(&state->texture, NULL); 24717ec681f3Smrg pipe_resource_reference(&isv->surface_state.ref.res, NULL); 24727ec681f3Smrg free(isv->surface_state.cpu); 24739f464c52Smaya free(isv); 24749f464c52Smaya} 24759f464c52Smaya 24769f464c52Smaya/** 24779f464c52Smaya * The pipe->create_surface() driver hook. 24789f464c52Smaya * 24799f464c52Smaya * In Gallium nomenclature, "surfaces" are a view of a resource that 24809f464c52Smaya * can be bound as a render target or depth/stencil buffer. 24819f464c52Smaya */ 24829f464c52Smayastatic struct pipe_surface * 24839f464c52Smayairis_create_surface(struct pipe_context *ctx, 24849f464c52Smaya struct pipe_resource *tex, 24859f464c52Smaya const struct pipe_surface *tmpl) 24869f464c52Smaya{ 24879f464c52Smaya struct iris_screen *screen = (struct iris_screen *)ctx->screen; 24887ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 24899f464c52Smaya 24909f464c52Smaya isl_surf_usage_flags_t usage = 0; 24919f464c52Smaya if (tmpl->writable) 24929f464c52Smaya usage = ISL_SURF_USAGE_STORAGE_BIT; 24939f464c52Smaya else if (util_format_is_depth_or_stencil(tmpl->format)) 24949f464c52Smaya usage = ISL_SURF_USAGE_DEPTH_BIT; 24959f464c52Smaya else 24969f464c52Smaya usage = ISL_SURF_USAGE_RENDER_TARGET_BIT; 24979f464c52Smaya 24989f464c52Smaya const struct iris_format_info fmt = 24997ec681f3Smrg iris_format_for_usage(devinfo, tmpl->format, usage); 25009f464c52Smaya 25019f464c52Smaya if ((usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) && 25029f464c52Smaya !isl_format_supports_rendering(devinfo, fmt.fmt)) { 25039f464c52Smaya /* Framebuffer validation will reject this invalid case, but it 25049f464c52Smaya * hasn't had the opportunity yet. In the meantime, we need to 25059f464c52Smaya * avoid hitting ISL asserts about unsupported formats below. 25069f464c52Smaya */ 25079f464c52Smaya return NULL; 25089f464c52Smaya } 25099f464c52Smaya 25107ec681f3Smrg struct iris_surface *surf = calloc(1, sizeof(struct iris_surface)); 25117ec681f3Smrg struct pipe_surface *psurf = &surf->base; 25127ec681f3Smrg struct iris_resource *res = (struct iris_resource *) tex; 25137ec681f3Smrg 25147ec681f3Smrg if (!surf) 25157ec681f3Smrg return NULL; 25167ec681f3Smrg 25177ec681f3Smrg pipe_reference_init(&psurf->reference, 1); 25187ec681f3Smrg pipe_resource_reference(&psurf->texture, tex); 25197ec681f3Smrg psurf->context = ctx; 25207ec681f3Smrg psurf->format = tmpl->format; 25217ec681f3Smrg psurf->width = tex->width0; 25227ec681f3Smrg psurf->height = tex->height0; 25237ec681f3Smrg psurf->texture = tex; 25247ec681f3Smrg psurf->u.tex.first_layer = tmpl->u.tex.first_layer; 25257ec681f3Smrg psurf->u.tex.last_layer = tmpl->u.tex.last_layer; 25267ec681f3Smrg psurf->u.tex.level = tmpl->u.tex.level; 25277ec681f3Smrg 25287ec681f3Smrg uint32_t array_len = tmpl->u.tex.last_layer - tmpl->u.tex.first_layer + 1; 25297ec681f3Smrg 25309f464c52Smaya struct isl_view *view = &surf->view; 25319f464c52Smaya *view = (struct isl_view) { 25329f464c52Smaya .format = fmt.fmt, 25339f464c52Smaya .base_level = tmpl->u.tex.level, 25349f464c52Smaya .levels = 1, 25359f464c52Smaya .base_array_layer = tmpl->u.tex.first_layer, 25367ec681f3Smrg .array_len = array_len, 25379f464c52Smaya .swizzle = ISL_SWIZZLE_IDENTITY, 25389f464c52Smaya .usage = usage, 25399f464c52Smaya }; 25409f464c52Smaya 25417ec681f3Smrg#if GFX_VER == 8 25427ec681f3Smrg struct isl_view *read_view = &surf->read_view; 25437ec681f3Smrg *read_view = (struct isl_view) { 25447ec681f3Smrg .format = fmt.fmt, 25457ec681f3Smrg .base_level = tmpl->u.tex.level, 25467ec681f3Smrg .levels = 1, 25477ec681f3Smrg .base_array_layer = tmpl->u.tex.first_layer, 25487ec681f3Smrg .array_len = array_len, 25497ec681f3Smrg .swizzle = ISL_SWIZZLE_IDENTITY, 25507ec681f3Smrg .usage = ISL_SURF_USAGE_TEXTURE_BIT, 25517ec681f3Smrg }; 25527ec681f3Smrg 25537ec681f3Smrg struct isl_surf read_surf = res->surf; 25547ec681f3Smrg uint64_t read_surf_offset_B = 0; 25557ec681f3Smrg uint32_t read_surf_tile_x_sa = 0, read_surf_tile_y_sa = 0; 25567ec681f3Smrg if (tex->target == PIPE_TEXTURE_3D && array_len == 1) { 25577ec681f3Smrg /* The minimum array element field of the surface state structure is 25587ec681f3Smrg * ignored by the sampler unit for 3D textures on some hardware. If the 25597ec681f3Smrg * render buffer is a single slice of a 3D texture, create a 2D texture 25607ec681f3Smrg * covering that slice. 25617ec681f3Smrg * 25627ec681f3Smrg * TODO: This only handles the case where we're rendering to a single 25637ec681f3Smrg * slice of an array texture. If we have layered rendering combined 25647ec681f3Smrg * with non-coherent FB fetch and a non-zero base_array_layer, then 25657ec681f3Smrg * we're going to run into problems. 25667ec681f3Smrg * 25677ec681f3Smrg * See https://gitlab.freedesktop.org/mesa/mesa/-/issues/4904 25687ec681f3Smrg */ 25697ec681f3Smrg isl_surf_get_image_surf(&screen->isl_dev, &res->surf, 25707ec681f3Smrg read_view->base_level, 25717ec681f3Smrg 0, read_view->base_array_layer, 25727ec681f3Smrg &read_surf, &read_surf_offset_B, 25737ec681f3Smrg &read_surf_tile_x_sa, &read_surf_tile_y_sa); 25747ec681f3Smrg read_view->base_level = 0; 25757ec681f3Smrg read_view->base_array_layer = 0; 25767ec681f3Smrg assert(read_view->array_len == 1); 25777ec681f3Smrg } else if (tex->target == PIPE_TEXTURE_1D_ARRAY) { 25787ec681f3Smrg /* Convert 1D array textures to 2D arrays because shaders always provide 25797ec681f3Smrg * the array index coordinate at the Z component to avoid recompiles 25807ec681f3Smrg * when changing the texture target of the framebuffer. 25817ec681f3Smrg */ 25827ec681f3Smrg assert(read_surf.dim_layout == ISL_DIM_LAYOUT_GFX4_2D); 25837ec681f3Smrg read_surf.dim = ISL_SURF_DIM_2D; 25847ec681f3Smrg } 25857ec681f3Smrg#endif 25867ec681f3Smrg 25879f464c52Smaya surf->clear_color = res->aux.clear_color; 25889f464c52Smaya 25899f464c52Smaya /* Bail early for depth/stencil - we don't want SURFACE_STATE for them. */ 25909f464c52Smaya if (res->surf.usage & (ISL_SURF_USAGE_DEPTH_BIT | 25919f464c52Smaya ISL_SURF_USAGE_STENCIL_BIT)) 25929f464c52Smaya return psurf; 25939f464c52Smaya 25949f464c52Smaya 25957ec681f3Smrg alloc_surface_states(&surf->surface_state, res->aux.possible_usages); 25967ec681f3Smrg surf->surface_state.bo_address = res->bo->address; 25977ec681f3Smrg 25987ec681f3Smrg#if GFX_VER == 8 25997ec681f3Smrg alloc_surface_states(&surf->surface_state_read, res->aux.possible_usages); 26007ec681f3Smrg surf->surface_state_read.bo_address = res->bo->address; 26017ec681f3Smrg#endif 26029f464c52Smaya 26039f464c52Smaya if (!isl_format_is_compressed(res->surf.format)) { 26047ec681f3Smrg void *map = surf->surface_state.cpu; 26057ec681f3Smrg UNUSED void *map_read = surf->surface_state_read.cpu; 26067ec681f3Smrg 26079f464c52Smaya /* This is a normal surface. Fill out a SURFACE_STATE for each possible 26089f464c52Smaya * auxiliary surface mode and return the pipe_surface. 26099f464c52Smaya */ 26109f464c52Smaya unsigned aux_modes = res->aux.possible_usages; 26119f464c52Smaya while (aux_modes) { 26129f464c52Smaya enum isl_aux_usage aux_usage = u_bit_scan(&aux_modes); 26137ec681f3Smrg fill_surface_state(&screen->isl_dev, map, res, &res->surf, 26147ec681f3Smrg view, aux_usage, 0, 0, 0); 26159f464c52Smaya map += SURFACE_STATE_ALIGNMENT; 26167ec681f3Smrg 26177ec681f3Smrg#if GFX_VER == 8 26187ec681f3Smrg fill_surface_state(&screen->isl_dev, map_read, res, 26197ec681f3Smrg &read_surf, read_view, aux_usage, 26207ec681f3Smrg read_surf_offset_B, 26217ec681f3Smrg read_surf_tile_x_sa, read_surf_tile_y_sa); 26227ec681f3Smrg map_read += SURFACE_STATE_ALIGNMENT; 26237ec681f3Smrg#endif 26249f464c52Smaya } 26259f464c52Smaya 26269f464c52Smaya return psurf; 26279f464c52Smaya } 26289f464c52Smaya 26299f464c52Smaya /* The resource has a compressed format, which is not renderable, but we 26309f464c52Smaya * have a renderable view format. We must be attempting to upload blocks 26319f464c52Smaya * of compressed data via an uncompressed view. 26329f464c52Smaya * 26339f464c52Smaya * In this case, we can assume there are no auxiliary buffers, a single 26349f464c52Smaya * miplevel, and that the resource is single-sampled. Gallium may try 26359f464c52Smaya * and create an uncompressed view with multiple layers, however. 26369f464c52Smaya */ 26379f464c52Smaya assert(!isl_format_is_compressed(fmt.fmt)); 26389f464c52Smaya assert(res->aux.possible_usages == 1 << ISL_AUX_USAGE_NONE); 26399f464c52Smaya assert(res->surf.samples == 1); 26409f464c52Smaya assert(view->levels == 1); 26419f464c52Smaya 26429f464c52Smaya struct isl_surf isl_surf; 26437ec681f3Smrg uint64_t offset_B = 0; 26447ec681f3Smrg uint32_t tile_x_el = 0, tile_y_el = 0; 26457ec681f3Smrg bool ok = isl_surf_get_uncompressed_surf(&screen->isl_dev, &res->surf, 26467ec681f3Smrg view, &isl_surf, view, 26477ec681f3Smrg &offset_B, &tile_x_el, &tile_y_el); 26487ec681f3Smrg if (!ok) { 26497ec681f3Smrg free(surf); 26507ec681f3Smrg return NULL; 26519f464c52Smaya } 26529f464c52Smaya 26539f464c52Smaya psurf->width = isl_surf.logical_level0_px.width; 26549f464c52Smaya psurf->height = isl_surf.logical_level0_px.height; 26559f464c52Smaya 26569f464c52Smaya struct isl_surf_fill_state_info f = { 26579f464c52Smaya .surf = &isl_surf, 26589f464c52Smaya .view = view, 26597ec681f3Smrg .mocs = iris_mocs(res->bo, &screen->isl_dev, 26607ec681f3Smrg ISL_SURF_USAGE_RENDER_TARGET_BIT), 26617ec681f3Smrg .address = res->bo->address + offset_B, 26627ec681f3Smrg .x_offset_sa = tile_x_el, /* Single-sampled, so el == sa */ 26637ec681f3Smrg .y_offset_sa = tile_y_el, /* Single-sampled, so el == sa */ 26649f464c52Smaya }; 26659f464c52Smaya 26667ec681f3Smrg isl_surf_fill_state_s(&screen->isl_dev, surf->surface_state.cpu, &f); 26677ec681f3Smrg 26689f464c52Smaya return psurf; 26699f464c52Smaya} 26709f464c52Smaya 26717ec681f3Smrg#if GFX_VER < 9 26729f464c52Smayastatic void 26739f464c52Smayafill_default_image_param(struct brw_image_param *param) 26749f464c52Smaya{ 26759f464c52Smaya memset(param, 0, sizeof(*param)); 26769f464c52Smaya /* Set the swizzling shifts to all-ones to effectively disable swizzling -- 26779f464c52Smaya * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more 26789f464c52Smaya * detailed explanation of these parameters. 26799f464c52Smaya */ 26809f464c52Smaya param->swizzling[0] = 0xff; 26819f464c52Smaya param->swizzling[1] = 0xff; 26829f464c52Smaya} 26839f464c52Smaya 26849f464c52Smayastatic void 26859f464c52Smayafill_buffer_image_param(struct brw_image_param *param, 26869f464c52Smaya enum pipe_format pfmt, 26879f464c52Smaya unsigned size) 26889f464c52Smaya{ 26899f464c52Smaya const unsigned cpp = util_format_get_blocksize(pfmt); 26909f464c52Smaya 26919f464c52Smaya fill_default_image_param(param); 26929f464c52Smaya param->size[0] = size / cpp; 26939f464c52Smaya param->stride[0] = cpp; 26949f464c52Smaya} 26959f464c52Smaya#else 26969f464c52Smaya#define isl_surf_fill_image_param(x, ...) 26979f464c52Smaya#define fill_default_image_param(x, ...) 26989f464c52Smaya#define fill_buffer_image_param(x, ...) 26999f464c52Smaya#endif 27009f464c52Smaya 27019f464c52Smaya/** 27029f464c52Smaya * The pipe->set_shader_images() driver hook. 27039f464c52Smaya */ 27049f464c52Smayastatic void 27059f464c52Smayairis_set_shader_images(struct pipe_context *ctx, 27069f464c52Smaya enum pipe_shader_type p_stage, 27079f464c52Smaya unsigned start_slot, unsigned count, 27087ec681f3Smrg unsigned unbind_num_trailing_slots, 27099f464c52Smaya const struct pipe_image_view *p_images) 27109f464c52Smaya{ 27119f464c52Smaya struct iris_context *ice = (struct iris_context *) ctx; 27129f464c52Smaya struct iris_screen *screen = (struct iris_screen *)ctx->screen; 27139f464c52Smaya gl_shader_stage stage = stage_from_pipe(p_stage); 27149f464c52Smaya struct iris_shader_state *shs = &ice->state.shaders[stage]; 27157ec681f3Smrg#if GFX_VER == 8 27169f464c52Smaya struct iris_genx_state *genx = ice->state.genx; 27179f464c52Smaya struct brw_image_param *image_params = genx->shaders[stage].image_param; 27189f464c52Smaya#endif 27199f464c52Smaya 27207ec681f3Smrg shs->bound_image_views &= 27217ec681f3Smrg ~u_bit_consecutive(start_slot, count + unbind_num_trailing_slots); 27229f464c52Smaya 27239f464c52Smaya for (unsigned i = 0; i < count; i++) { 27249f464c52Smaya struct iris_image_view *iv = &shs->image[start_slot + i]; 27259f464c52Smaya 27269f464c52Smaya if (p_images && p_images[i].resource) { 27279f464c52Smaya const struct pipe_image_view *img = &p_images[i]; 27289f464c52Smaya struct iris_resource *res = (void *) img->resource; 27299f464c52Smaya 27307ec681f3Smrg util_copy_image_view(&iv->base, img); 27319f464c52Smaya 27329f464c52Smaya shs->bound_image_views |= 1 << (start_slot + i); 27339f464c52Smaya 27349f464c52Smaya res->bind_history |= PIPE_BIND_SHADER_IMAGE; 27357ec681f3Smrg res->bind_stages |= 1 << stage; 27369f464c52Smaya 27377ec681f3Smrg enum isl_format isl_fmt = iris_image_view_get_format(ice, img); 27389f464c52Smaya 27397ec681f3Smrg /* Render compression with images supported on gfx12+ only. */ 27407ec681f3Smrg unsigned aux_usages = GFX_VER >= 12 ? res->aux.possible_usages : 27417ec681f3Smrg 1 << ISL_AUX_USAGE_NONE; 27429f464c52Smaya 27437ec681f3Smrg alloc_surface_states(&iv->surface_state, aux_usages); 27447ec681f3Smrg iv->surface_state.bo_address = res->bo->address; 27459f464c52Smaya 27467ec681f3Smrg void *map = iv->surface_state.cpu; 27479f464c52Smaya 27487ec681f3Smrg if (res->base.b.target != PIPE_BUFFER) { 27499f464c52Smaya struct isl_view view = { 27509f464c52Smaya .format = isl_fmt, 27519f464c52Smaya .base_level = img->u.tex.level, 27529f464c52Smaya .levels = 1, 27539f464c52Smaya .base_array_layer = img->u.tex.first_layer, 27549f464c52Smaya .array_len = img->u.tex.last_layer - img->u.tex.first_layer + 1, 27559f464c52Smaya .swizzle = ISL_SWIZZLE_IDENTITY, 27567ec681f3Smrg .usage = ISL_SURF_USAGE_STORAGE_BIT, 27579f464c52Smaya }; 27589f464c52Smaya 27597ec681f3Smrg /* If using untyped fallback. */ 27607ec681f3Smrg if (isl_fmt == ISL_FORMAT_RAW) { 27617ec681f3Smrg fill_buffer_surface_state(&screen->isl_dev, res, map, 27629f464c52Smaya isl_fmt, ISL_SWIZZLE_IDENTITY, 27637ec681f3Smrg 0, res->bo->size, 27647ec681f3Smrg ISL_SURF_USAGE_STORAGE_BIT); 27659f464c52Smaya } else { 27667ec681f3Smrg unsigned aux_modes = aux_usages; 27679f464c52Smaya while (aux_modes) { 27689f464c52Smaya enum isl_aux_usage usage = u_bit_scan(&aux_modes); 27699f464c52Smaya 27707ec681f3Smrg fill_surface_state(&screen->isl_dev, map, res, &res->surf, 27717ec681f3Smrg &view, usage, 0, 0, 0); 27729f464c52Smaya 27739f464c52Smaya map += SURFACE_STATE_ALIGNMENT; 27749f464c52Smaya } 27759f464c52Smaya } 27769f464c52Smaya 27779f464c52Smaya isl_surf_fill_image_param(&screen->isl_dev, 27789f464c52Smaya &image_params[start_slot + i], 27799f464c52Smaya &res->surf, &view); 27809f464c52Smaya } else { 27817ec681f3Smrg util_range_add(&res->base.b, &res->valid_buffer_range, img->u.buf.offset, 27829f464c52Smaya img->u.buf.offset + img->u.buf.size); 27839f464c52Smaya 27847ec681f3Smrg fill_buffer_surface_state(&screen->isl_dev, res, map, 27859f464c52Smaya isl_fmt, ISL_SWIZZLE_IDENTITY, 27867ec681f3Smrg img->u.buf.offset, img->u.buf.size, 27877ec681f3Smrg ISL_SURF_USAGE_STORAGE_BIT); 27889f464c52Smaya fill_buffer_image_param(&image_params[start_slot + i], 27899f464c52Smaya img->format, img->u.buf.size); 27909f464c52Smaya } 27917ec681f3Smrg 27927ec681f3Smrg upload_surface_states(ice->state.surface_uploader, &iv->surface_state); 27939f464c52Smaya } else { 27949f464c52Smaya pipe_resource_reference(&iv->base.resource, NULL); 27957ec681f3Smrg pipe_resource_reference(&iv->surface_state.ref.res, NULL); 27969f464c52Smaya fill_default_image_param(&image_params[start_slot + i]); 27979f464c52Smaya } 27989f464c52Smaya } 27999f464c52Smaya 28007ec681f3Smrg ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_VS << stage; 28019f464c52Smaya ice->state.dirty |= 28029f464c52Smaya stage == MESA_SHADER_COMPUTE ? IRIS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES 28039f464c52Smaya : IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES; 28049f464c52Smaya 28059f464c52Smaya /* Broadwell also needs brw_image_params re-uploaded */ 28067ec681f3Smrg if (GFX_VER < 9) { 28077ec681f3Smrg ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_VS << stage; 28087ec681f3Smrg shs->sysvals_need_upload = true; 28097ec681f3Smrg } 28107ec681f3Smrg 28117ec681f3Smrg if (unbind_num_trailing_slots) { 28127ec681f3Smrg iris_set_shader_images(ctx, p_stage, start_slot + count, 28137ec681f3Smrg unbind_num_trailing_slots, 0, NULL); 28149f464c52Smaya } 28159f464c52Smaya} 28169f464c52Smaya 28179f464c52Smaya 28189f464c52Smaya/** 28199f464c52Smaya * The pipe->set_sampler_views() driver hook. 28209f464c52Smaya */ 28219f464c52Smayastatic void 28229f464c52Smayairis_set_sampler_views(struct pipe_context *ctx, 28239f464c52Smaya enum pipe_shader_type p_stage, 28249f464c52Smaya unsigned start, unsigned count, 28257ec681f3Smrg unsigned unbind_num_trailing_slots, 28267ec681f3Smrg bool take_ownership, 28279f464c52Smaya struct pipe_sampler_view **views) 28289f464c52Smaya{ 28299f464c52Smaya struct iris_context *ice = (struct iris_context *) ctx; 28309f464c52Smaya gl_shader_stage stage = stage_from_pipe(p_stage); 28319f464c52Smaya struct iris_shader_state *shs = &ice->state.shaders[stage]; 28327ec681f3Smrg unsigned i; 28339f464c52Smaya 28347ec681f3Smrg shs->bound_sampler_views &= 28357ec681f3Smrg ~u_bit_consecutive(start, count + unbind_num_trailing_slots); 28369f464c52Smaya 28377ec681f3Smrg for (i = 0; i < count; i++) { 28389f464c52Smaya struct pipe_sampler_view *pview = views ? views[i] : NULL; 28397ec681f3Smrg 28407ec681f3Smrg if (take_ownership) { 28417ec681f3Smrg pipe_sampler_view_reference((struct pipe_sampler_view **) 28427ec681f3Smrg &shs->textures[start + i], NULL); 28437ec681f3Smrg shs->textures[start + i] = (struct iris_sampler_view *)pview; 28447ec681f3Smrg } else { 28457ec681f3Smrg pipe_sampler_view_reference((struct pipe_sampler_view **) 28467ec681f3Smrg &shs->textures[start + i], pview); 28477ec681f3Smrg } 28489f464c52Smaya struct iris_sampler_view *view = (void *) pview; 28499f464c52Smaya if (view) { 28509f464c52Smaya view->res->bind_history |= PIPE_BIND_SAMPLER_VIEW; 28517ec681f3Smrg view->res->bind_stages |= 1 << stage; 28527ec681f3Smrg 28539f464c52Smaya shs->bound_sampler_views |= 1 << (start + i); 28547ec681f3Smrg 28557ec681f3Smrg update_surface_state_addrs(ice->state.surface_uploader, 28567ec681f3Smrg &view->surface_state, view->res->bo); 28579f464c52Smaya } 28589f464c52Smaya } 28597ec681f3Smrg for (; i < count + unbind_num_trailing_slots; i++) { 28607ec681f3Smrg pipe_sampler_view_reference((struct pipe_sampler_view **) 28617ec681f3Smrg &shs->textures[start + i], NULL); 28627ec681f3Smrg } 28639f464c52Smaya 28647ec681f3Smrg ice->state.stage_dirty |= (IRIS_STAGE_DIRTY_BINDINGS_VS << stage); 28659f464c52Smaya ice->state.dirty |= 28669f464c52Smaya stage == MESA_SHADER_COMPUTE ? IRIS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES 28679f464c52Smaya : IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES; 28689f464c52Smaya} 28699f464c52Smaya 28707ec681f3Smrgstatic void 28717ec681f3Smrgiris_set_compute_resources(struct pipe_context *ctx, 28727ec681f3Smrg unsigned start, unsigned count, 28737ec681f3Smrg struct pipe_surface **resources) 28747ec681f3Smrg{ 28757ec681f3Smrg assert(count == 0); 28767ec681f3Smrg} 28777ec681f3Smrg 28787ec681f3Smrgstatic void 28797ec681f3Smrgiris_set_global_binding(struct pipe_context *ctx, 28807ec681f3Smrg unsigned start_slot, unsigned count, 28817ec681f3Smrg struct pipe_resource **resources, 28827ec681f3Smrg uint32_t **handles) 28837ec681f3Smrg{ 28847ec681f3Smrg struct iris_context *ice = (struct iris_context *) ctx; 28857ec681f3Smrg 28867ec681f3Smrg assert(start_slot + count <= IRIS_MAX_GLOBAL_BINDINGS); 28877ec681f3Smrg for (unsigned i = 0; i < count; i++) { 28887ec681f3Smrg if (resources && resources[i]) { 28897ec681f3Smrg pipe_resource_reference(&ice->state.global_bindings[start_slot + i], 28907ec681f3Smrg resources[i]); 28917ec681f3Smrg struct iris_resource *res = (void *) resources[i]; 28927ec681f3Smrg uint64_t addr = res->bo->address; 28937ec681f3Smrg memcpy(handles[i], &addr, sizeof(addr)); 28947ec681f3Smrg } else { 28957ec681f3Smrg pipe_resource_reference(&ice->state.global_bindings[start_slot + i], 28967ec681f3Smrg NULL); 28977ec681f3Smrg } 28987ec681f3Smrg } 28997ec681f3Smrg 29007ec681f3Smrg ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_CS; 29017ec681f3Smrg} 29027ec681f3Smrg 29039f464c52Smaya/** 29049f464c52Smaya * The pipe->set_tess_state() driver hook. 29059f464c52Smaya */ 29069f464c52Smayastatic void 29079f464c52Smayairis_set_tess_state(struct pipe_context *ctx, 29089f464c52Smaya const float default_outer_level[4], 29099f464c52Smaya const float default_inner_level[2]) 29109f464c52Smaya{ 29119f464c52Smaya struct iris_context *ice = (struct iris_context *) ctx; 29129f464c52Smaya struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL]; 29139f464c52Smaya 29149f464c52Smaya memcpy(&ice->state.default_outer_level[0], &default_outer_level[0], 4 * sizeof(float)); 29159f464c52Smaya memcpy(&ice->state.default_inner_level[0], &default_inner_level[0], 2 * sizeof(float)); 29169f464c52Smaya 29177ec681f3Smrg ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_TCS; 29187ec681f3Smrg shs->sysvals_need_upload = true; 29197ec681f3Smrg} 29207ec681f3Smrg 29217ec681f3Smrgstatic void 29227ec681f3Smrgiris_set_patch_vertices(struct pipe_context *ctx, uint8_t patch_vertices) 29237ec681f3Smrg{ 29247ec681f3Smrg struct iris_context *ice = (struct iris_context *) ctx; 29257ec681f3Smrg 29267ec681f3Smrg ice->state.patch_vertices = patch_vertices; 29279f464c52Smaya} 29289f464c52Smaya 29299f464c52Smayastatic void 29309f464c52Smayairis_surface_destroy(struct pipe_context *ctx, struct pipe_surface *p_surf) 29319f464c52Smaya{ 29329f464c52Smaya struct iris_surface *surf = (void *) p_surf; 29339f464c52Smaya pipe_resource_reference(&p_surf->texture, NULL); 29347ec681f3Smrg pipe_resource_reference(&surf->surface_state.ref.res, NULL); 29357ec681f3Smrg pipe_resource_reference(&surf->surface_state_read.ref.res, NULL); 29367ec681f3Smrg free(surf->surface_state.cpu); 29377ec681f3Smrg free(surf->surface_state_read.cpu); 29389f464c52Smaya free(surf); 29399f464c52Smaya} 29409f464c52Smaya 29419f464c52Smayastatic void 29429f464c52Smayairis_set_clip_state(struct pipe_context *ctx, 29439f464c52Smaya const struct pipe_clip_state *state) 29449f464c52Smaya{ 29459f464c52Smaya struct iris_context *ice = (struct iris_context *) ctx; 29469f464c52Smaya struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX]; 29477ec681f3Smrg struct iris_shader_state *gshs = &ice->state.shaders[MESA_SHADER_GEOMETRY]; 29487ec681f3Smrg struct iris_shader_state *tshs = &ice->state.shaders[MESA_SHADER_TESS_EVAL]; 29499f464c52Smaya 29509f464c52Smaya memcpy(&ice->state.clip_planes, state, sizeof(*state)); 29519f464c52Smaya 29527ec681f3Smrg ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_VS | 29537ec681f3Smrg IRIS_STAGE_DIRTY_CONSTANTS_GS | 29547ec681f3Smrg IRIS_STAGE_DIRTY_CONSTANTS_TES; 29557ec681f3Smrg shs->sysvals_need_upload = true; 29567ec681f3Smrg gshs->sysvals_need_upload = true; 29577ec681f3Smrg tshs->sysvals_need_upload = true; 29589f464c52Smaya} 29599f464c52Smaya 29609f464c52Smaya/** 29619f464c52Smaya * The pipe->set_polygon_stipple() driver hook. 29629f464c52Smaya */ 29639f464c52Smayastatic void 29649f464c52Smayairis_set_polygon_stipple(struct pipe_context *ctx, 29659f464c52Smaya const struct pipe_poly_stipple *state) 29669f464c52Smaya{ 29679f464c52Smaya struct iris_context *ice = (struct iris_context *) ctx; 29689f464c52Smaya memcpy(&ice->state.poly_stipple, state, sizeof(*state)); 29699f464c52Smaya ice->state.dirty |= IRIS_DIRTY_POLYGON_STIPPLE; 29709f464c52Smaya} 29719f464c52Smaya 29729f464c52Smaya/** 29739f464c52Smaya * The pipe->set_sample_mask() driver hook. 29749f464c52Smaya */ 29759f464c52Smayastatic void 29769f464c52Smayairis_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) 29779f464c52Smaya{ 29789f464c52Smaya struct iris_context *ice = (struct iris_context *) ctx; 29799f464c52Smaya 29809f464c52Smaya /* We only support 16x MSAA, so we have 16 bits of sample maks. 29819f464c52Smaya * st/mesa may pass us 0xffffffff though, meaning "enable all samples". 29829f464c52Smaya */ 29839f464c52Smaya ice->state.sample_mask = sample_mask & 0xffff; 29849f464c52Smaya ice->state.dirty |= IRIS_DIRTY_SAMPLE_MASK; 29859f464c52Smaya} 29869f464c52Smaya 29879f464c52Smaya/** 29889f464c52Smaya * The pipe->set_scissor_states() driver hook. 29899f464c52Smaya * 29909f464c52Smaya * This corresponds to our SCISSOR_RECT state structures. It's an 29919f464c52Smaya * exact match, so we just store them, and memcpy them out later. 29929f464c52Smaya */ 29939f464c52Smayastatic void 29949f464c52Smayairis_set_scissor_states(struct pipe_context *ctx, 29959f464c52Smaya unsigned start_slot, 29969f464c52Smaya unsigned num_scissors, 29979f464c52Smaya const struct pipe_scissor_state *rects) 29989f464c52Smaya{ 29999f464c52Smaya struct iris_context *ice = (struct iris_context *) ctx; 30009f464c52Smaya 30019f464c52Smaya for (unsigned i = 0; i < num_scissors; i++) { 30029f464c52Smaya if (rects[i].minx == rects[i].maxx || rects[i].miny == rects[i].maxy) { 30039f464c52Smaya /* If the scissor was out of bounds and got clamped to 0 width/height 30049f464c52Smaya * at the bounds, the subtraction of 1 from maximums could produce a 30059f464c52Smaya * negative number and thus not clip anything. Instead, just provide 30069f464c52Smaya * a min > max scissor inside the bounds, which produces the expected 30079f464c52Smaya * no rendering. 30089f464c52Smaya */ 30099f464c52Smaya ice->state.scissors[start_slot + i] = (struct pipe_scissor_state) { 30109f464c52Smaya .minx = 1, .maxx = 0, .miny = 1, .maxy = 0, 30119f464c52Smaya }; 30129f464c52Smaya } else { 30139f464c52Smaya ice->state.scissors[start_slot + i] = (struct pipe_scissor_state) { 30149f464c52Smaya .minx = rects[i].minx, .miny = rects[i].miny, 30159f464c52Smaya .maxx = rects[i].maxx - 1, .maxy = rects[i].maxy - 1, 30169f464c52Smaya }; 30179f464c52Smaya } 30189f464c52Smaya } 30199f464c52Smaya 30209f464c52Smaya ice->state.dirty |= IRIS_DIRTY_SCISSOR_RECT; 30219f464c52Smaya} 30229f464c52Smaya 30239f464c52Smaya/** 30249f464c52Smaya * The pipe->set_stencil_ref() driver hook. 30259f464c52Smaya * 30269f464c52Smaya * This is added to 3DSTATE_WM_DEPTH_STENCIL dynamically at draw time. 30279f464c52Smaya */ 30289f464c52Smayastatic void 30299f464c52Smayairis_set_stencil_ref(struct pipe_context *ctx, 30307ec681f3Smrg const struct pipe_stencil_ref state) 30319f464c52Smaya{ 30329f464c52Smaya struct iris_context *ice = (struct iris_context *) ctx; 30337ec681f3Smrg memcpy(&ice->state.stencil_ref, &state, sizeof(state)); 30347ec681f3Smrg if (GFX_VER >= 12) 30357ec681f3Smrg ice->state.dirty |= IRIS_DIRTY_STENCIL_REF; 30367ec681f3Smrg else if (GFX_VER >= 9) 30379f464c52Smaya ice->state.dirty |= IRIS_DIRTY_WM_DEPTH_STENCIL; 30387ec681f3Smrg else 30397ec681f3Smrg ice->state.dirty |= IRIS_DIRTY_COLOR_CALC_STATE; 30409f464c52Smaya} 30419f464c52Smaya 30429f464c52Smayastatic float 30439f464c52Smayaviewport_extent(const struct pipe_viewport_state *state, int axis, float sign) 30449f464c52Smaya{ 30459f464c52Smaya return copysignf(state->scale[axis], sign) + state->translate[axis]; 30469f464c52Smaya} 30479f464c52Smaya 30489f464c52Smaya/** 30499f464c52Smaya * The pipe->set_viewport_states() driver hook. 30509f464c52Smaya * 30519f464c52Smaya * This corresponds to our SF_CLIP_VIEWPORT states. We can't calculate 30529f464c52Smaya * the guardband yet, as we need the framebuffer dimensions, but we can 30539f464c52Smaya * at least fill out the rest. 30549f464c52Smaya */ 30559f464c52Smayastatic void 30569f464c52Smayairis_set_viewport_states(struct pipe_context *ctx, 30579f464c52Smaya unsigned start_slot, 30589f464c52Smaya unsigned count, 30599f464c52Smaya const struct pipe_viewport_state *states) 30609f464c52Smaya{ 30619f464c52Smaya struct iris_context *ice = (struct iris_context *) ctx; 30629f464c52Smaya 30639f464c52Smaya memcpy(&ice->state.viewports[start_slot], states, sizeof(*states) * count); 30649f464c52Smaya 30659f464c52Smaya ice->state.dirty |= IRIS_DIRTY_SF_CL_VIEWPORT; 30669f464c52Smaya 30679f464c52Smaya if (ice->state.cso_rast && (!ice->state.cso_rast->depth_clip_near || 30689f464c52Smaya !ice->state.cso_rast->depth_clip_far)) 30699f464c52Smaya ice->state.dirty |= IRIS_DIRTY_CC_VIEWPORT; 30709f464c52Smaya} 30719f464c52Smaya 30729f464c52Smaya/** 30739f464c52Smaya * The pipe->set_framebuffer_state() driver hook. 30749f464c52Smaya * 30759f464c52Smaya * Sets the current draw FBO, including color render targets, depth, 30769f464c52Smaya * and stencil buffers. 30779f464c52Smaya */ 30789f464c52Smayastatic void 30799f464c52Smayairis_set_framebuffer_state(struct pipe_context *ctx, 30809f464c52Smaya const struct pipe_framebuffer_state *state) 30819f464c52Smaya{ 30829f464c52Smaya struct iris_context *ice = (struct iris_context *) ctx; 30839f464c52Smaya struct iris_screen *screen = (struct iris_screen *)ctx->screen; 30849f464c52Smaya struct isl_device *isl_dev = &screen->isl_dev; 30859f464c52Smaya struct pipe_framebuffer_state *cso = &ice->state.framebuffer; 30869f464c52Smaya struct iris_resource *zres; 30879f464c52Smaya struct iris_resource *stencil_res; 30889f464c52Smaya 30899f464c52Smaya unsigned samples = util_framebuffer_get_num_samples(state); 30909f464c52Smaya unsigned layers = util_framebuffer_get_num_layers(state); 30919f464c52Smaya 30929f464c52Smaya if (cso->samples != samples) { 30939f464c52Smaya ice->state.dirty |= IRIS_DIRTY_MULTISAMPLE; 30947ec681f3Smrg 30957ec681f3Smrg /* We need to toggle 3DSTATE_PS::32 Pixel Dispatch Enable */ 30967ec681f3Smrg if (GFX_VER >= 9 && (cso->samples == 16 || samples == 16)) 30977ec681f3Smrg ice->state.stage_dirty |= IRIS_STAGE_DIRTY_FS; 30989f464c52Smaya } 30999f464c52Smaya 31009f464c52Smaya if (cso->nr_cbufs != state->nr_cbufs) { 31019f464c52Smaya ice->state.dirty |= IRIS_DIRTY_BLEND_STATE; 31029f464c52Smaya } 31039f464c52Smaya 31049f464c52Smaya if ((cso->layers == 0) != (layers == 0)) { 31059f464c52Smaya ice->state.dirty |= IRIS_DIRTY_CLIP; 31069f464c52Smaya } 31079f464c52Smaya 31089f464c52Smaya if (cso->width != state->width || cso->height != state->height) { 31099f464c52Smaya ice->state.dirty |= IRIS_DIRTY_SF_CL_VIEWPORT; 31109f464c52Smaya } 31119f464c52Smaya 31127ec681f3Smrg if (cso->zsbuf || state->zsbuf) { 31137ec681f3Smrg ice->state.dirty |= IRIS_DIRTY_DEPTH_BUFFER; 31147ec681f3Smrg } 31157ec681f3Smrg 31169f464c52Smaya util_copy_framebuffer_state(cso, state); 31179f464c52Smaya cso->samples = samples; 31189f464c52Smaya cso->layers = layers; 31199f464c52Smaya 31209f464c52Smaya struct iris_depth_buffer_state *cso_z = &ice->state.genx->depth_buffer; 31219f464c52Smaya 31229f464c52Smaya struct isl_view view = { 31239f464c52Smaya .base_level = 0, 31249f464c52Smaya .levels = 1, 31259f464c52Smaya .base_array_layer = 0, 31269f464c52Smaya .array_len = 1, 31279f464c52Smaya .swizzle = ISL_SWIZZLE_IDENTITY, 31289f464c52Smaya }; 31299f464c52Smaya 31309f464c52Smaya struct isl_depth_stencil_hiz_emit_info info = { .view = &view }; 31319f464c52Smaya 31329f464c52Smaya if (cso->zsbuf) { 31339f464c52Smaya iris_get_depth_stencil_resources(cso->zsbuf->texture, &zres, 31349f464c52Smaya &stencil_res); 31359f464c52Smaya 31369f464c52Smaya view.base_level = cso->zsbuf->u.tex.level; 31379f464c52Smaya view.base_array_layer = cso->zsbuf->u.tex.first_layer; 31389f464c52Smaya view.array_len = 31399f464c52Smaya cso->zsbuf->u.tex.last_layer - cso->zsbuf->u.tex.first_layer + 1; 31409f464c52Smaya 31419f464c52Smaya if (zres) { 31429f464c52Smaya view.usage |= ISL_SURF_USAGE_DEPTH_BIT; 31439f464c52Smaya 31449f464c52Smaya info.depth_surf = &zres->surf; 31457ec681f3Smrg info.depth_address = zres->bo->address + zres->offset; 31467ec681f3Smrg info.mocs = iris_mocs(zres->bo, isl_dev, view.usage); 31479f464c52Smaya 31489f464c52Smaya view.format = zres->surf.format; 31499f464c52Smaya 31509f464c52Smaya if (iris_resource_level_has_hiz(zres, view.base_level)) { 31517ec681f3Smrg info.hiz_usage = zres->aux.usage; 31529f464c52Smaya info.hiz_surf = &zres->aux.surf; 31537ec681f3Smrg info.hiz_address = zres->aux.bo->address + zres->aux.offset; 31549f464c52Smaya } 31557ec681f3Smrg 31567ec681f3Smrg ice->state.hiz_usage = info.hiz_usage; 31579f464c52Smaya } 31589f464c52Smaya 31599f464c52Smaya if (stencil_res) { 31609f464c52Smaya view.usage |= ISL_SURF_USAGE_STENCIL_BIT; 31617ec681f3Smrg info.stencil_aux_usage = stencil_res->aux.usage; 31629f464c52Smaya info.stencil_surf = &stencil_res->surf; 31637ec681f3Smrg info.stencil_address = stencil_res->bo->address + stencil_res->offset; 31649f464c52Smaya if (!zres) { 31659f464c52Smaya view.format = stencil_res->surf.format; 31667ec681f3Smrg info.mocs = iris_mocs(stencil_res->bo, isl_dev, view.usage); 31679f464c52Smaya } 31689f464c52Smaya } 31699f464c52Smaya } 31709f464c52Smaya 31719f464c52Smaya isl_emit_depth_stencil_hiz_s(isl_dev, cso_z->packets, &info); 31729f464c52Smaya 31739f464c52Smaya /* Make a null surface for unbound buffers */ 31749f464c52Smaya void *null_surf_map = 31759f464c52Smaya upload_state(ice->state.surface_uploader, &ice->state.null_fb, 31769f464c52Smaya 4 * GENX(RENDER_SURFACE_STATE_length), 64); 31779f464c52Smaya isl_null_fill_state(&screen->isl_dev, null_surf_map, 31787ec681f3Smrg .size = isl_extent3d(MAX2(cso->width, 1), 31797ec681f3Smrg MAX2(cso->height, 1), 31807ec681f3Smrg cso->layers ? cso->layers : 1)); 31819f464c52Smaya ice->state.null_fb.offset += 31829f464c52Smaya iris_bo_offset_from_base_address(iris_resource_bo(ice->state.null_fb.res)); 31839f464c52Smaya 31849f464c52Smaya /* Render target change */ 31857ec681f3Smrg ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_FS; 31869f464c52Smaya 31877ec681f3Smrg ice->state.dirty |= IRIS_DIRTY_RENDER_BUFFER; 31889f464c52Smaya 31897ec681f3Smrg ice->state.dirty |= IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES; 31909f464c52Smaya 31917ec681f3Smrg ice->state.stage_dirty |= 31927ec681f3Smrg ice->state.stage_dirty_for_nos[IRIS_NOS_FRAMEBUFFER]; 31939f464c52Smaya 31947ec681f3Smrg if (GFX_VER == 8) 31957ec681f3Smrg ice->state.dirty |= IRIS_DIRTY_PMA_FIX; 31969f464c52Smaya} 31979f464c52Smaya 31989f464c52Smaya/** 31999f464c52Smaya * The pipe->set_constant_buffer() driver hook. 32009f464c52Smaya * 32019f464c52Smaya * This uploads any constant data in user buffers, and references 32029f464c52Smaya * any UBO resources containing constant data. 32039f464c52Smaya */ 32049f464c52Smayastatic void 32059f464c52Smayairis_set_constant_buffer(struct pipe_context *ctx, 32069f464c52Smaya enum pipe_shader_type p_stage, unsigned index, 32077ec681f3Smrg bool take_ownership, 32089f464c52Smaya const struct pipe_constant_buffer *input) 32099f464c52Smaya{ 32109f464c52Smaya struct iris_context *ice = (struct iris_context *) ctx; 32119f464c52Smaya gl_shader_stage stage = stage_from_pipe(p_stage); 32129f464c52Smaya struct iris_shader_state *shs = &ice->state.shaders[stage]; 32139f464c52Smaya struct pipe_shader_buffer *cbuf = &shs->constbuf[index]; 32149f464c52Smaya 32157ec681f3Smrg /* TODO: Only do this if the buffer changes? */ 32167ec681f3Smrg pipe_resource_reference(&shs->constbuf_surf_state[index].res, NULL); 32177ec681f3Smrg 32187ec681f3Smrg if (input && input->buffer_size && (input->buffer || input->user_buffer)) { 32199f464c52Smaya shs->bound_cbufs |= 1u << index; 32209f464c52Smaya 32217ec681f3Smrg if (input->user_buffer) { 32227ec681f3Smrg void *map = NULL; 32237ec681f3Smrg pipe_resource_reference(&cbuf->buffer, NULL); 32247ec681f3Smrg u_upload_alloc(ice->ctx.const_uploader, 0, input->buffer_size, 64, 32257ec681f3Smrg &cbuf->buffer_offset, &cbuf->buffer, (void **) &map); 32267ec681f3Smrg 32277ec681f3Smrg if (!cbuf->buffer) { 32287ec681f3Smrg /* Allocation was unsuccessful - just unbind */ 32297ec681f3Smrg iris_set_constant_buffer(ctx, p_stage, index, false, NULL); 32307ec681f3Smrg return; 32317ec681f3Smrg } 32327ec681f3Smrg 32337ec681f3Smrg assert(map); 32347ec681f3Smrg memcpy(map, input->user_buffer, input->buffer_size); 32357ec681f3Smrg } else if (input->buffer) { 32367ec681f3Smrg if (cbuf->buffer != input->buffer) { 32377ec681f3Smrg ice->state.dirty |= (IRIS_DIRTY_RENDER_MISC_BUFFER_FLUSHES | 32387ec681f3Smrg IRIS_DIRTY_COMPUTE_MISC_BUFFER_FLUSHES); 32397ec681f3Smrg shs->dirty_cbufs |= 1u << index; 32407ec681f3Smrg } 32417ec681f3Smrg 32427ec681f3Smrg if (take_ownership) { 32437ec681f3Smrg pipe_resource_reference(&cbuf->buffer, NULL); 32447ec681f3Smrg cbuf->buffer = input->buffer; 32457ec681f3Smrg } else { 32467ec681f3Smrg pipe_resource_reference(&cbuf->buffer, input->buffer); 32477ec681f3Smrg } 32487ec681f3Smrg 32497ec681f3Smrg cbuf->buffer_offset = input->buffer_offset; 32507ec681f3Smrg } 32519f464c52Smaya 32527ec681f3Smrg cbuf->buffer_size = 32539f464c52Smaya MIN2(input->buffer_size, 32547ec681f3Smrg iris_resource_bo(cbuf->buffer)->size - cbuf->buffer_offset); 32559f464c52Smaya 32569f464c52Smaya struct iris_resource *res = (void *) cbuf->buffer; 32579f464c52Smaya res->bind_history |= PIPE_BIND_CONSTANT_BUFFER; 32587ec681f3Smrg res->bind_stages |= 1 << stage; 32599f464c52Smaya } else { 32609f464c52Smaya shs->bound_cbufs &= ~(1u << index); 32619f464c52Smaya pipe_resource_reference(&cbuf->buffer, NULL); 32629f464c52Smaya } 32639f464c52Smaya 32647ec681f3Smrg ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_VS << stage; 32659f464c52Smaya} 32669f464c52Smaya 32679f464c52Smayastatic void 32687ec681f3Smrgupload_sysvals(struct iris_context *ice, 32697ec681f3Smrg gl_shader_stage stage, 32707ec681f3Smrg const struct pipe_grid_info *grid) 32719f464c52Smaya{ 32729f464c52Smaya UNUSED struct iris_genx_state *genx = ice->state.genx; 32739f464c52Smaya struct iris_shader_state *shs = &ice->state.shaders[stage]; 32747ec681f3Smrg 32759f464c52Smaya struct iris_compiled_shader *shader = ice->shaders.prog[stage]; 32767ec681f3Smrg if (!shader || (shader->num_system_values == 0 && 32777ec681f3Smrg shader->kernel_input_size == 0)) 32787ec681f3Smrg return; 32799f464c52Smaya 32807ec681f3Smrg assert(shader->num_cbufs > 0); 32819f464c52Smaya 32827ec681f3Smrg unsigned sysval_cbuf_index = shader->num_cbufs - 1; 32837ec681f3Smrg struct pipe_shader_buffer *cbuf = &shs->constbuf[sysval_cbuf_index]; 32847ec681f3Smrg unsigned system_values_start = 32857ec681f3Smrg ALIGN(shader->kernel_input_size, sizeof(uint32_t)); 32867ec681f3Smrg unsigned upload_size = system_values_start + 32877ec681f3Smrg shader->num_system_values * sizeof(uint32_t); 32887ec681f3Smrg void *map = NULL; 32899f464c52Smaya 32907ec681f3Smrg assert(sysval_cbuf_index < PIPE_MAX_CONSTANT_BUFFERS); 32919f464c52Smaya u_upload_alloc(ice->ctx.const_uploader, 0, upload_size, 64, 32927ec681f3Smrg &cbuf->buffer_offset, &cbuf->buffer, &map); 32937ec681f3Smrg 32947ec681f3Smrg if (shader->kernel_input_size > 0) 32957ec681f3Smrg memcpy(map, grid->input, shader->kernel_input_size); 32969f464c52Smaya 32977ec681f3Smrg uint32_t *sysval_map = map + system_values_start; 32989f464c52Smaya for (int i = 0; i < shader->num_system_values; i++) { 32999f464c52Smaya uint32_t sysval = shader->system_values[i]; 33009f464c52Smaya uint32_t value = 0; 33019f464c52Smaya 33029f464c52Smaya if (BRW_PARAM_DOMAIN(sysval) == BRW_PARAM_DOMAIN_IMAGE) { 33037ec681f3Smrg#if GFX_VER == 8 33049f464c52Smaya unsigned img = BRW_PARAM_IMAGE_IDX(sysval); 33059f464c52Smaya unsigned offset = BRW_PARAM_IMAGE_OFFSET(sysval); 33069f464c52Smaya struct brw_image_param *param = 33079f464c52Smaya &genx->shaders[stage].image_param[img]; 33089f464c52Smaya 33099f464c52Smaya assert(offset < sizeof(struct brw_image_param)); 33109f464c52Smaya value = ((uint32_t *) param)[offset]; 33119f464c52Smaya#endif 33129f464c52Smaya } else if (sysval == BRW_PARAM_BUILTIN_ZERO) { 33139f464c52Smaya value = 0; 33149f464c52Smaya } else if (BRW_PARAM_BUILTIN_IS_CLIP_PLANE(sysval)) { 33159f464c52Smaya int plane = BRW_PARAM_BUILTIN_CLIP_PLANE_IDX(sysval); 33169f464c52Smaya int comp = BRW_PARAM_BUILTIN_CLIP_PLANE_COMP(sysval); 33179f464c52Smaya value = fui(ice->state.clip_planes.ucp[plane][comp]); 33189f464c52Smaya } else if (sysval == BRW_PARAM_BUILTIN_PATCH_VERTICES_IN) { 33199f464c52Smaya if (stage == MESA_SHADER_TESS_CTRL) { 33209f464c52Smaya value = ice->state.vertices_per_patch; 33219f464c52Smaya } else { 33229f464c52Smaya assert(stage == MESA_SHADER_TESS_EVAL); 33239f464c52Smaya const struct shader_info *tcs_info = 33249f464c52Smaya iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL); 33259f464c52Smaya if (tcs_info) 33269f464c52Smaya value = tcs_info->tess.tcs_vertices_out; 33279f464c52Smaya else 33289f464c52Smaya value = ice->state.vertices_per_patch; 33299f464c52Smaya } 33309f464c52Smaya } else if (sysval >= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X && 33319f464c52Smaya sysval <= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_W) { 33329f464c52Smaya unsigned i = sysval - BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X; 33339f464c52Smaya value = fui(ice->state.default_outer_level[i]); 33349f464c52Smaya } else if (sysval == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X) { 33359f464c52Smaya value = fui(ice->state.default_inner_level[0]); 33369f464c52Smaya } else if (sysval == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y) { 33379f464c52Smaya value = fui(ice->state.default_inner_level[1]); 33387ec681f3Smrg } else if (sysval >= BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X && 33397ec681f3Smrg sysval <= BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_Z) { 33407ec681f3Smrg unsigned i = sysval - BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X; 33417ec681f3Smrg value = ice->state.last_block[i]; 33427ec681f3Smrg } else if (sysval == BRW_PARAM_BUILTIN_WORK_DIM) { 33437ec681f3Smrg value = grid->work_dim; 33449f464c52Smaya } else { 33459f464c52Smaya assert(!"unhandled system value"); 33469f464c52Smaya } 33479f464c52Smaya 33487ec681f3Smrg *sysval_map++ = value; 33499f464c52Smaya } 33509f464c52Smaya 33519f464c52Smaya cbuf->buffer_size = upload_size; 33527ec681f3Smrg iris_upload_ubo_ssbo_surf_state(ice, cbuf, 33537ec681f3Smrg &shs->constbuf_surf_state[sysval_cbuf_index], 33547ec681f3Smrg ISL_SURF_USAGE_CONSTANT_BUFFER_BIT); 33557ec681f3Smrg 33567ec681f3Smrg shs->sysvals_need_upload = false; 33579f464c52Smaya} 33589f464c52Smaya 33599f464c52Smaya/** 33609f464c52Smaya * The pipe->set_shader_buffers() driver hook. 33619f464c52Smaya * 33629f464c52Smaya * This binds SSBOs and ABOs. Unfortunately, we need to stream out 33639f464c52Smaya * SURFACE_STATE here, as the buffer offset may change each time. 33649f464c52Smaya */ 33659f464c52Smayastatic void 33669f464c52Smayairis_set_shader_buffers(struct pipe_context *ctx, 33679f464c52Smaya enum pipe_shader_type p_stage, 33689f464c52Smaya unsigned start_slot, unsigned count, 33699f464c52Smaya const struct pipe_shader_buffer *buffers, 33709f464c52Smaya unsigned writable_bitmask) 33719f464c52Smaya{ 33729f464c52Smaya struct iris_context *ice = (struct iris_context *) ctx; 33739f464c52Smaya gl_shader_stage stage = stage_from_pipe(p_stage); 33749f464c52Smaya struct iris_shader_state *shs = &ice->state.shaders[stage]; 33759f464c52Smaya 33769f464c52Smaya unsigned modified_bits = u_bit_consecutive(start_slot, count); 33779f464c52Smaya 33789f464c52Smaya shs->bound_ssbos &= ~modified_bits; 33799f464c52Smaya shs->writable_ssbos &= ~modified_bits; 33809f464c52Smaya shs->writable_ssbos |= writable_bitmask << start_slot; 33819f464c52Smaya 33829f464c52Smaya for (unsigned i = 0; i < count; i++) { 33839f464c52Smaya if (buffers && buffers[i].buffer) { 33849f464c52Smaya struct iris_resource *res = (void *) buffers[i].buffer; 33859f464c52Smaya struct pipe_shader_buffer *ssbo = &shs->ssbo[start_slot + i]; 33869f464c52Smaya struct iris_state_ref *surf_state = 33879f464c52Smaya &shs->ssbo_surf_state[start_slot + i]; 33887ec681f3Smrg pipe_resource_reference(&ssbo->buffer, &res->base.b); 33899f464c52Smaya ssbo->buffer_offset = buffers[i].buffer_offset; 33909f464c52Smaya ssbo->buffer_size = 33919f464c52Smaya MIN2(buffers[i].buffer_size, res->bo->size - ssbo->buffer_offset); 33929f464c52Smaya 33939f464c52Smaya shs->bound_ssbos |= 1 << (start_slot + i); 33949f464c52Smaya 33957ec681f3Smrg isl_surf_usage_flags_t usage = ISL_SURF_USAGE_STORAGE_BIT; 33967ec681f3Smrg 33977ec681f3Smrg iris_upload_ubo_ssbo_surf_state(ice, ssbo, surf_state, usage); 33989f464c52Smaya 33999f464c52Smaya res->bind_history |= PIPE_BIND_SHADER_BUFFER; 34007ec681f3Smrg res->bind_stages |= 1 << stage; 34019f464c52Smaya 34027ec681f3Smrg util_range_add(&res->base.b, &res->valid_buffer_range, ssbo->buffer_offset, 34039f464c52Smaya ssbo->buffer_offset + ssbo->buffer_size); 34049f464c52Smaya } else { 34059f464c52Smaya pipe_resource_reference(&shs->ssbo[start_slot + i].buffer, NULL); 34069f464c52Smaya pipe_resource_reference(&shs->ssbo_surf_state[start_slot + i].res, 34079f464c52Smaya NULL); 34089f464c52Smaya } 34099f464c52Smaya } 34109f464c52Smaya 34117ec681f3Smrg ice->state.dirty |= (IRIS_DIRTY_RENDER_MISC_BUFFER_FLUSHES | 34127ec681f3Smrg IRIS_DIRTY_COMPUTE_MISC_BUFFER_FLUSHES); 34137ec681f3Smrg ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_VS << stage; 34149f464c52Smaya} 34159f464c52Smaya 34169f464c52Smayastatic void 34179f464c52Smayairis_delete_state(struct pipe_context *ctx, void *state) 34189f464c52Smaya{ 34199f464c52Smaya free(state); 34209f464c52Smaya} 34219f464c52Smaya 34229f464c52Smaya/** 34239f464c52Smaya * The pipe->set_vertex_buffers() driver hook. 34249f464c52Smaya * 34259f464c52Smaya * This translates pipe_vertex_buffer to our 3DSTATE_VERTEX_BUFFERS packet. 34269f464c52Smaya */ 34279f464c52Smayastatic void 34289f464c52Smayairis_set_vertex_buffers(struct pipe_context *ctx, 34299f464c52Smaya unsigned start_slot, unsigned count, 34307ec681f3Smrg unsigned unbind_num_trailing_slots, 34317ec681f3Smrg bool take_ownership, 34329f464c52Smaya const struct pipe_vertex_buffer *buffers) 34339f464c52Smaya{ 34349f464c52Smaya struct iris_context *ice = (struct iris_context *) ctx; 34357ec681f3Smrg struct iris_screen *screen = (struct iris_screen *)ctx->screen; 34369f464c52Smaya struct iris_genx_state *genx = ice->state.genx; 34379f464c52Smaya 34387ec681f3Smrg ice->state.bound_vertex_buffers &= 34397ec681f3Smrg ~u_bit_consecutive64(start_slot, count + unbind_num_trailing_slots); 34409f464c52Smaya 34419f464c52Smaya for (unsigned i = 0; i < count; i++) { 34429f464c52Smaya const struct pipe_vertex_buffer *buffer = buffers ? &buffers[i] : NULL; 34439f464c52Smaya struct iris_vertex_buffer_state *state = 34449f464c52Smaya &genx->vertex_buffers[start_slot + i]; 34459f464c52Smaya 34469f464c52Smaya if (!buffer) { 34479f464c52Smaya pipe_resource_reference(&state->resource, NULL); 34489f464c52Smaya continue; 34499f464c52Smaya } 34509f464c52Smaya 34519f464c52Smaya /* We may see user buffers that are NULL bindings. */ 34529f464c52Smaya assert(!(buffer->is_user_buffer && buffer->buffer.user != NULL)); 34539f464c52Smaya 34547ec681f3Smrg if (buffer->buffer.resource && 34557ec681f3Smrg state->resource != buffer->buffer.resource) 34567ec681f3Smrg ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFER_FLUSHES; 34577ec681f3Smrg 34587ec681f3Smrg if (take_ownership) { 34597ec681f3Smrg pipe_resource_reference(&state->resource, NULL); 34607ec681f3Smrg state->resource = buffer->buffer.resource; 34617ec681f3Smrg } else { 34627ec681f3Smrg pipe_resource_reference(&state->resource, buffer->buffer.resource); 34637ec681f3Smrg } 34649f464c52Smaya struct iris_resource *res = (void *) state->resource; 34659f464c52Smaya 34667ec681f3Smrg state->offset = (int) buffer->buffer_offset; 34677ec681f3Smrg 34689f464c52Smaya if (res) { 34699f464c52Smaya ice->state.bound_vertex_buffers |= 1ull << (start_slot + i); 34709f464c52Smaya res->bind_history |= PIPE_BIND_VERTEX_BUFFER; 34719f464c52Smaya } 34729f464c52Smaya 34739f464c52Smaya iris_pack_state(GENX(VERTEX_BUFFER_STATE), state->state, vb) { 34749f464c52Smaya vb.VertexBufferIndex = start_slot + i; 34759f464c52Smaya vb.AddressModifyEnable = true; 34769f464c52Smaya vb.BufferPitch = buffer->stride; 34779f464c52Smaya if (res) { 34787ec681f3Smrg vb.BufferSize = res->base.b.width0 - (int) buffer->buffer_offset; 34799f464c52Smaya vb.BufferStartingAddress = 34807ec681f3Smrg ro_bo(NULL, res->bo->address + (int) buffer->buffer_offset); 34817ec681f3Smrg vb.MOCS = iris_mocs(res->bo, &screen->isl_dev, 34827ec681f3Smrg ISL_SURF_USAGE_VERTEX_BUFFER_BIT); 34837ec681f3Smrg#if GFX_VER >= 12 34847ec681f3Smrg vb.L3BypassDisable = true; 34857ec681f3Smrg#endif 34869f464c52Smaya } else { 34879f464c52Smaya vb.NullVertexBuffer = true; 34889f464c52Smaya } 34899f464c52Smaya } 34909f464c52Smaya } 34919f464c52Smaya 34927ec681f3Smrg for (unsigned i = 0; i < unbind_num_trailing_slots; i++) { 34937ec681f3Smrg struct iris_vertex_buffer_state *state = 34947ec681f3Smrg &genx->vertex_buffers[start_slot + count + i]; 34957ec681f3Smrg 34967ec681f3Smrg pipe_resource_reference(&state->resource, NULL); 34977ec681f3Smrg } 34987ec681f3Smrg 34999f464c52Smaya ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS; 35009f464c52Smaya} 35019f464c52Smaya 35029f464c52Smaya/** 35039f464c52Smaya * Gallium CSO for vertex elements. 35049f464c52Smaya */ 35059f464c52Smayastruct iris_vertex_element_state { 35069f464c52Smaya uint32_t vertex_elements[1 + 33 * GENX(VERTEX_ELEMENT_STATE_length)]; 35079f464c52Smaya uint32_t vf_instancing[33 * GENX(3DSTATE_VF_INSTANCING_length)]; 35089f464c52Smaya uint32_t edgeflag_ve[GENX(VERTEX_ELEMENT_STATE_length)]; 35099f464c52Smaya uint32_t edgeflag_vfi[GENX(3DSTATE_VF_INSTANCING_length)]; 35109f464c52Smaya unsigned count; 35119f464c52Smaya}; 35129f464c52Smaya 35139f464c52Smaya/** 35149f464c52Smaya * The pipe->create_vertex_elements() driver hook. 35159f464c52Smaya * 35169f464c52Smaya * This translates pipe_vertex_element to our 3DSTATE_VERTEX_ELEMENTS 35179f464c52Smaya * and 3DSTATE_VF_INSTANCING commands. The vertex_elements and vf_instancing 35189f464c52Smaya * arrays are ready to be emitted at draw time if no EdgeFlag or SGVs are 35199f464c52Smaya * needed. In these cases we will need information available at draw time. 35209f464c52Smaya * We setup edgeflag_ve and edgeflag_vfi as alternatives last 35219f464c52Smaya * 3DSTATE_VERTEX_ELEMENT and 3DSTATE_VF_INSTANCING that can be used at 35229f464c52Smaya * draw time if we detect that EdgeFlag is needed by the Vertex Shader. 35239f464c52Smaya */ 35249f464c52Smayastatic void * 35259f464c52Smayairis_create_vertex_elements(struct pipe_context *ctx, 35269f464c52Smaya unsigned count, 35279f464c52Smaya const struct pipe_vertex_element *state) 35289f464c52Smaya{ 35299f464c52Smaya struct iris_screen *screen = (struct iris_screen *)ctx->screen; 35307ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 35319f464c52Smaya struct iris_vertex_element_state *cso = 35329f464c52Smaya malloc(sizeof(struct iris_vertex_element_state)); 35339f464c52Smaya 35349f464c52Smaya cso->count = count; 35359f464c52Smaya 35369f464c52Smaya iris_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), cso->vertex_elements, ve) { 35379f464c52Smaya ve.DWordLength = 35389f464c52Smaya 1 + GENX(VERTEX_ELEMENT_STATE_length) * MAX2(count, 1) - 2; 35399f464c52Smaya } 35409f464c52Smaya 35419f464c52Smaya uint32_t *ve_pack_dest = &cso->vertex_elements[1]; 35429f464c52Smaya uint32_t *vfi_pack_dest = cso->vf_instancing; 35439f464c52Smaya 35449f464c52Smaya if (count == 0) { 35459f464c52Smaya iris_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) { 35469f464c52Smaya ve.Valid = true; 35479f464c52Smaya ve.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT; 35489f464c52Smaya ve.Component0Control = VFCOMP_STORE_0; 35499f464c52Smaya ve.Component1Control = VFCOMP_STORE_0; 35509f464c52Smaya ve.Component2Control = VFCOMP_STORE_0; 35519f464c52Smaya ve.Component3Control = VFCOMP_STORE_1_FP; 35529f464c52Smaya } 35539f464c52Smaya 35549f464c52Smaya iris_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) { 35559f464c52Smaya } 35569f464c52Smaya } 35579f464c52Smaya 35589f464c52Smaya for (int i = 0; i < count; i++) { 35599f464c52Smaya const struct iris_format_info fmt = 35609f464c52Smaya iris_format_for_usage(devinfo, state[i].src_format, 0); 35619f464c52Smaya unsigned comp[4] = { VFCOMP_STORE_SRC, VFCOMP_STORE_SRC, 35629f464c52Smaya VFCOMP_STORE_SRC, VFCOMP_STORE_SRC }; 35639f464c52Smaya 35649f464c52Smaya switch (isl_format_get_num_channels(fmt.fmt)) { 35657ec681f3Smrg case 0: comp[0] = VFCOMP_STORE_0; FALLTHROUGH; 35667ec681f3Smrg case 1: comp[1] = VFCOMP_STORE_0; FALLTHROUGH; 35677ec681f3Smrg case 2: comp[2] = VFCOMP_STORE_0; FALLTHROUGH; 35689f464c52Smaya case 3: 35699f464c52Smaya comp[3] = isl_format_has_int_channel(fmt.fmt) ? VFCOMP_STORE_1_INT 35709f464c52Smaya : VFCOMP_STORE_1_FP; 35719f464c52Smaya break; 35729f464c52Smaya } 35739f464c52Smaya iris_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) { 35749f464c52Smaya ve.EdgeFlagEnable = false; 35759f464c52Smaya ve.VertexBufferIndex = state[i].vertex_buffer_index; 35769f464c52Smaya ve.Valid = true; 35779f464c52Smaya ve.SourceElementOffset = state[i].src_offset; 35789f464c52Smaya ve.SourceElementFormat = fmt.fmt; 35799f464c52Smaya ve.Component0Control = comp[0]; 35809f464c52Smaya ve.Component1Control = comp[1]; 35819f464c52Smaya ve.Component2Control = comp[2]; 35829f464c52Smaya ve.Component3Control = comp[3]; 35839f464c52Smaya } 35849f464c52Smaya 35859f464c52Smaya iris_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) { 35869f464c52Smaya vi.VertexElementIndex = i; 35879f464c52Smaya vi.InstancingEnable = state[i].instance_divisor > 0; 35889f464c52Smaya vi.InstanceDataStepRate = state[i].instance_divisor; 35899f464c52Smaya } 35909f464c52Smaya 35919f464c52Smaya ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length); 35929f464c52Smaya vfi_pack_dest += GENX(3DSTATE_VF_INSTANCING_length); 35939f464c52Smaya } 35949f464c52Smaya 35959f464c52Smaya /* An alternative version of the last VE and VFI is stored so it 35969f464c52Smaya * can be used at draw time in case Vertex Shader uses EdgeFlag 35979f464c52Smaya */ 35989f464c52Smaya if (count) { 35999f464c52Smaya const unsigned edgeflag_index = count - 1; 36009f464c52Smaya const struct iris_format_info fmt = 36019f464c52Smaya iris_format_for_usage(devinfo, state[edgeflag_index].src_format, 0); 36029f464c52Smaya iris_pack_state(GENX(VERTEX_ELEMENT_STATE), cso->edgeflag_ve, ve) { 36039f464c52Smaya ve.EdgeFlagEnable = true ; 36049f464c52Smaya ve.VertexBufferIndex = state[edgeflag_index].vertex_buffer_index; 36059f464c52Smaya ve.Valid = true; 36069f464c52Smaya ve.SourceElementOffset = state[edgeflag_index].src_offset; 36079f464c52Smaya ve.SourceElementFormat = fmt.fmt; 36089f464c52Smaya ve.Component0Control = VFCOMP_STORE_SRC; 36099f464c52Smaya ve.Component1Control = VFCOMP_STORE_0; 36109f464c52Smaya ve.Component2Control = VFCOMP_STORE_0; 36119f464c52Smaya ve.Component3Control = VFCOMP_STORE_0; 36129f464c52Smaya } 36139f464c52Smaya iris_pack_command(GENX(3DSTATE_VF_INSTANCING), cso->edgeflag_vfi, vi) { 36149f464c52Smaya /* The vi.VertexElementIndex of the EdgeFlag Vertex Element is filled 36159f464c52Smaya * at draw time, as it should change if SGVs are emitted. 36169f464c52Smaya */ 36179f464c52Smaya vi.InstancingEnable = state[edgeflag_index].instance_divisor > 0; 36189f464c52Smaya vi.InstanceDataStepRate = state[edgeflag_index].instance_divisor; 36199f464c52Smaya } 36209f464c52Smaya } 36219f464c52Smaya 36229f464c52Smaya return cso; 36239f464c52Smaya} 36249f464c52Smaya 36259f464c52Smaya/** 36269f464c52Smaya * The pipe->bind_vertex_elements_state() driver hook. 36279f464c52Smaya */ 36289f464c52Smayastatic void 36299f464c52Smayairis_bind_vertex_elements_state(struct pipe_context *ctx, void *state) 36309f464c52Smaya{ 36319f464c52Smaya struct iris_context *ice = (struct iris_context *) ctx; 36329f464c52Smaya struct iris_vertex_element_state *old_cso = ice->state.cso_vertex_elements; 36339f464c52Smaya struct iris_vertex_element_state *new_cso = state; 36349f464c52Smaya 36359f464c52Smaya /* 3DSTATE_VF_SGVs overrides the last VE, so if the count is changing, 36369f464c52Smaya * we need to re-emit it to ensure we're overriding the right one. 36379f464c52Smaya */ 36389f464c52Smaya if (new_cso && cso_changed(count)) 36399f464c52Smaya ice->state.dirty |= IRIS_DIRTY_VF_SGVS; 36409f464c52Smaya 36419f464c52Smaya ice->state.cso_vertex_elements = state; 36429f464c52Smaya ice->state.dirty |= IRIS_DIRTY_VERTEX_ELEMENTS; 36439f464c52Smaya} 36449f464c52Smaya 36459f464c52Smaya/** 36469f464c52Smaya * The pipe->create_stream_output_target() driver hook. 36479f464c52Smaya * 36489f464c52Smaya * "Target" here refers to a destination buffer. We translate this into 36499f464c52Smaya * a 3DSTATE_SO_BUFFER packet. We can handle most fields, but don't yet 36509f464c52Smaya * know which buffer this represents, or whether we ought to zero the 36519f464c52Smaya * write-offsets, or append. Those are handled in the set() hook. 36529f464c52Smaya */ 36539f464c52Smayastatic struct pipe_stream_output_target * 36549f464c52Smayairis_create_stream_output_target(struct pipe_context *ctx, 36559f464c52Smaya struct pipe_resource *p_res, 36569f464c52Smaya unsigned buffer_offset, 36579f464c52Smaya unsigned buffer_size) 36589f464c52Smaya{ 36599f464c52Smaya struct iris_resource *res = (void *) p_res; 36609f464c52Smaya struct iris_stream_output_target *cso = calloc(1, sizeof(*cso)); 36619f464c52Smaya if (!cso) 36629f464c52Smaya return NULL; 36639f464c52Smaya 36649f464c52Smaya res->bind_history |= PIPE_BIND_STREAM_OUTPUT; 36659f464c52Smaya 36669f464c52Smaya pipe_reference_init(&cso->base.reference, 1); 36679f464c52Smaya pipe_resource_reference(&cso->base.buffer, p_res); 36689f464c52Smaya cso->base.buffer_offset = buffer_offset; 36699f464c52Smaya cso->base.buffer_size = buffer_size; 36709f464c52Smaya cso->base.context = ctx; 36719f464c52Smaya 36727ec681f3Smrg util_range_add(&res->base.b, &res->valid_buffer_range, buffer_offset, 36739f464c52Smaya buffer_offset + buffer_size); 36749f464c52Smaya 36759f464c52Smaya return &cso->base; 36769f464c52Smaya} 36779f464c52Smaya 36789f464c52Smayastatic void 36799f464c52Smayairis_stream_output_target_destroy(struct pipe_context *ctx, 36809f464c52Smaya struct pipe_stream_output_target *state) 36819f464c52Smaya{ 36829f464c52Smaya struct iris_stream_output_target *cso = (void *) state; 36839f464c52Smaya 36849f464c52Smaya pipe_resource_reference(&cso->base.buffer, NULL); 36859f464c52Smaya pipe_resource_reference(&cso->offset.res, NULL); 36869f464c52Smaya 36879f464c52Smaya free(cso); 36889f464c52Smaya} 36899f464c52Smaya 36909f464c52Smaya/** 36919f464c52Smaya * The pipe->set_stream_output_targets() driver hook. 36929f464c52Smaya * 36939f464c52Smaya * At this point, we know which targets are bound to a particular index, 36949f464c52Smaya * and also whether we want to append or start over. We can finish the 36959f464c52Smaya * 3DSTATE_SO_BUFFER packets we started earlier. 36969f464c52Smaya */ 36979f464c52Smayastatic void 36989f464c52Smayairis_set_stream_output_targets(struct pipe_context *ctx, 36999f464c52Smaya unsigned num_targets, 37009f464c52Smaya struct pipe_stream_output_target **targets, 37019f464c52Smaya const unsigned *offsets) 37029f464c52Smaya{ 37039f464c52Smaya struct iris_context *ice = (struct iris_context *) ctx; 37049f464c52Smaya struct iris_genx_state *genx = ice->state.genx; 37059f464c52Smaya uint32_t *so_buffers = genx->so_buffers; 37067ec681f3Smrg struct iris_screen *screen = (struct iris_screen *)ctx->screen; 37079f464c52Smaya 37089f464c52Smaya const bool active = num_targets > 0; 37099f464c52Smaya if (ice->state.streamout_active != active) { 37109f464c52Smaya ice->state.streamout_active = active; 37119f464c52Smaya ice->state.dirty |= IRIS_DIRTY_STREAMOUT; 37129f464c52Smaya 37139f464c52Smaya /* We only emit 3DSTATE_SO_DECL_LIST when streamout is active, because 37149f464c52Smaya * it's a non-pipelined command. If we're switching streamout on, we 37159f464c52Smaya * may have missed emitting it earlier, so do so now. (We're already 37169f464c52Smaya * taking a stall to update 3DSTATE_SO_BUFFERS anyway...) 37179f464c52Smaya */ 37189f464c52Smaya if (active) { 37199f464c52Smaya ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST; 37209f464c52Smaya } else { 37219f464c52Smaya uint32_t flush = 0; 37229f464c52Smaya for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { 37239f464c52Smaya struct iris_stream_output_target *tgt = 37249f464c52Smaya (void *) ice->state.so_target[i]; 37259f464c52Smaya if (tgt) { 37269f464c52Smaya struct iris_resource *res = (void *) tgt->base.buffer; 37279f464c52Smaya 37287ec681f3Smrg flush |= iris_flush_bits_for_history(ice, res); 37299f464c52Smaya iris_dirty_for_history(ice, res); 37309f464c52Smaya } 37319f464c52Smaya } 37327ec681f3Smrg#if GFX_VER >= 12 37337ec681f3Smrg /* SO draws require flushing of const cache to make SO data 37347ec681f3Smrg * observable when VB/IB are cached in L3. 37357ec681f3Smrg */ 37367ec681f3Smrg if (flush & PIPE_CONTROL_VF_CACHE_INVALIDATE) 37377ec681f3Smrg flush |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 37387ec681f3Smrg#endif 37397ec681f3Smrg iris_emit_pipe_control_flush(&ice->batches[IRIS_BATCH_RENDER], 37407ec681f3Smrg "make streamout results visible", flush); 37419f464c52Smaya } 37429f464c52Smaya } 37439f464c52Smaya 37449f464c52Smaya for (int i = 0; i < 4; i++) { 37459f464c52Smaya pipe_so_target_reference(&ice->state.so_target[i], 37469f464c52Smaya i < num_targets ? targets[i] : NULL); 37479f464c52Smaya } 37489f464c52Smaya 37499f464c52Smaya /* No need to update 3DSTATE_SO_BUFFER unless SOL is active. */ 37509f464c52Smaya if (!active) 37519f464c52Smaya return; 37529f464c52Smaya 37539f464c52Smaya for (unsigned i = 0; i < 4; i++, 37549f464c52Smaya so_buffers += GENX(3DSTATE_SO_BUFFER_length)) { 37559f464c52Smaya 37569f464c52Smaya struct iris_stream_output_target *tgt = (void *) ice->state.so_target[i]; 37579f464c52Smaya unsigned offset = offsets[i]; 37589f464c52Smaya 37599f464c52Smaya if (!tgt) { 37607ec681f3Smrg iris_pack_command(GENX(3DSTATE_SO_BUFFER), so_buffers, sob) { 37617ec681f3Smrg#if GFX_VER < 12 37629f464c52Smaya sob.SOBufferIndex = i; 37637ec681f3Smrg#else 37647ec681f3Smrg sob._3DCommandOpcode = 0; 37657ec681f3Smrg sob._3DCommandSubOpcode = SO_BUFFER_INDEX_0_CMD + i; 37667ec681f3Smrg#endif 37677ec681f3Smrg } 37689f464c52Smaya continue; 37699f464c52Smaya } 37709f464c52Smaya 37717ec681f3Smrg if (!tgt->offset.res) 37727ec681f3Smrg upload_state(ctx->const_uploader, &tgt->offset, sizeof(uint32_t), 4); 37737ec681f3Smrg 37749f464c52Smaya struct iris_resource *res = (void *) tgt->base.buffer; 37759f464c52Smaya 37769f464c52Smaya /* Note that offsets[i] will either be 0, causing us to zero 37779f464c52Smaya * the value in the buffer, or 0xFFFFFFFF, which happens to mean 37789f464c52Smaya * "continue appending at the existing offset." 37799f464c52Smaya */ 37809f464c52Smaya assert(offset == 0 || offset == 0xFFFFFFFF); 37819f464c52Smaya 37827ec681f3Smrg /* When we're first called with an offset of 0, we want the next 37837ec681f3Smrg * 3DSTATE_SO_BUFFER packets to reset the offset to the beginning. 37847ec681f3Smrg * Any further times we emit those packets, we want to use 0xFFFFFFFF 37857ec681f3Smrg * to continue appending from the current offset. 37867ec681f3Smrg * 37877ec681f3Smrg * Note that we might be called by Begin (offset = 0), Pause, then 37887ec681f3Smrg * Resume (offset = 0xFFFFFFFF) before ever drawing (where these 37897ec681f3Smrg * commands will actually be sent to the GPU). In this case, we 37907ec681f3Smrg * don't want to append - we still want to do our initial zeroing. 37919f464c52Smaya */ 37927ec681f3Smrg if (offset == 0) 37937ec681f3Smrg tgt->zero_offset = true; 37949f464c52Smaya 37959f464c52Smaya iris_pack_command(GENX(3DSTATE_SO_BUFFER), so_buffers, sob) { 37967ec681f3Smrg#if GFX_VER < 12 37977ec681f3Smrg sob.SOBufferIndex = i; 37987ec681f3Smrg#else 37997ec681f3Smrg sob._3DCommandOpcode = 0; 38007ec681f3Smrg sob._3DCommandSubOpcode = SO_BUFFER_INDEX_0_CMD + i; 38017ec681f3Smrg#endif 38029f464c52Smaya sob.SurfaceBaseAddress = 38037ec681f3Smrg rw_bo(NULL, res->bo->address + tgt->base.buffer_offset, 38047ec681f3Smrg IRIS_DOMAIN_OTHER_WRITE); 38059f464c52Smaya sob.SOBufferEnable = true; 38069f464c52Smaya sob.StreamOffsetWriteEnable = true; 38079f464c52Smaya sob.StreamOutputBufferOffsetAddressEnable = true; 38087ec681f3Smrg sob.MOCS = iris_mocs(res->bo, &screen->isl_dev, 0); 38099f464c52Smaya 38109f464c52Smaya sob.SurfaceSize = MAX2(tgt->base.buffer_size / 4, 1) - 1; 38119f464c52Smaya sob.StreamOutputBufferOffsetAddress = 38127ec681f3Smrg rw_bo(NULL, iris_resource_bo(tgt->offset.res)->address + 38137ec681f3Smrg tgt->offset.offset, IRIS_DOMAIN_OTHER_WRITE); 38147ec681f3Smrg sob.StreamOffset = 0xFFFFFFFF; /* not offset, see above */ 38159f464c52Smaya } 38169f464c52Smaya } 38179f464c52Smaya 38189f464c52Smaya ice->state.dirty |= IRIS_DIRTY_SO_BUFFERS; 38199f464c52Smaya} 38209f464c52Smaya 38219f464c52Smaya/** 38229f464c52Smaya * An iris-vtable helper for encoding the 3DSTATE_SO_DECL_LIST and 38239f464c52Smaya * 3DSTATE_STREAMOUT packets. 38249f464c52Smaya * 38259f464c52Smaya * 3DSTATE_SO_DECL_LIST is a list of shader outputs we want the streamout 38269f464c52Smaya * hardware to record. We can create it entirely based on the shader, with 38279f464c52Smaya * no dynamic state dependencies. 38289f464c52Smaya * 38299f464c52Smaya * 3DSTATE_STREAMOUT is an annoying mix of shader-based information and 38309f464c52Smaya * state-based settings. We capture the shader-related ones here, and merge 38319f464c52Smaya * the rest in at draw time. 38329f464c52Smaya */ 38339f464c52Smayastatic uint32_t * 38349f464c52Smayairis_create_so_decl_list(const struct pipe_stream_output_info *info, 38359f464c52Smaya const struct brw_vue_map *vue_map) 38369f464c52Smaya{ 38379f464c52Smaya struct GENX(SO_DECL) so_decl[MAX_VERTEX_STREAMS][128]; 38389f464c52Smaya int buffer_mask[MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; 38399f464c52Smaya int next_offset[MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; 38409f464c52Smaya int decls[MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; 38419f464c52Smaya int max_decls = 0; 38429f464c52Smaya STATIC_ASSERT(ARRAY_SIZE(so_decl[0]) >= MAX_PROGRAM_OUTPUTS); 38439f464c52Smaya 38449f464c52Smaya memset(so_decl, 0, sizeof(so_decl)); 38459f464c52Smaya 38469f464c52Smaya /* Construct the list of SO_DECLs to be emitted. The formatting of the 38479f464c52Smaya * command feels strange -- each dword pair contains a SO_DECL per stream. 38489f464c52Smaya */ 38499f464c52Smaya for (unsigned i = 0; i < info->num_outputs; i++) { 38509f464c52Smaya const struct pipe_stream_output *output = &info->output[i]; 38519f464c52Smaya const int buffer = output->output_buffer; 38529f464c52Smaya const int varying = output->register_index; 38539f464c52Smaya const unsigned stream_id = output->stream; 38549f464c52Smaya assert(stream_id < MAX_VERTEX_STREAMS); 38559f464c52Smaya 38569f464c52Smaya buffer_mask[stream_id] |= 1 << buffer; 38579f464c52Smaya 38589f464c52Smaya assert(vue_map->varying_to_slot[varying] >= 0); 38599f464c52Smaya 38609f464c52Smaya /* Mesa doesn't store entries for gl_SkipComponents in the Outputs[] 38619f464c52Smaya * array. Instead, it simply increments DstOffset for the following 38629f464c52Smaya * input by the number of components that should be skipped. 38639f464c52Smaya * 38649f464c52Smaya * Our hardware is unusual in that it requires us to program SO_DECLs 38659f464c52Smaya * for fake "hole" components, rather than simply taking the offset 38669f464c52Smaya * for each real varying. Each hole can have size 1, 2, 3, or 4; we 38679f464c52Smaya * program as many size = 4 holes as we can, then a final hole to 38689f464c52Smaya * accommodate the final 1, 2, or 3 remaining. 38699f464c52Smaya */ 38709f464c52Smaya int skip_components = output->dst_offset - next_offset[buffer]; 38719f464c52Smaya 38729f464c52Smaya while (skip_components > 0) { 38739f464c52Smaya so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) { 38749f464c52Smaya .HoleFlag = 1, 38759f464c52Smaya .OutputBufferSlot = output->output_buffer, 38769f464c52Smaya .ComponentMask = (1 << MIN2(skip_components, 4)) - 1, 38779f464c52Smaya }; 38789f464c52Smaya skip_components -= 4; 38799f464c52Smaya } 38809f464c52Smaya 38819f464c52Smaya next_offset[buffer] = output->dst_offset + output->num_components; 38829f464c52Smaya 38839f464c52Smaya so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) { 38849f464c52Smaya .OutputBufferSlot = output->output_buffer, 38859f464c52Smaya .RegisterIndex = vue_map->varying_to_slot[varying], 38869f464c52Smaya .ComponentMask = 38879f464c52Smaya ((1 << output->num_components) - 1) << output->start_component, 38889f464c52Smaya }; 38899f464c52Smaya 38909f464c52Smaya if (decls[stream_id] > max_decls) 38919f464c52Smaya max_decls = decls[stream_id]; 38929f464c52Smaya } 38939f464c52Smaya 38949f464c52Smaya unsigned dwords = GENX(3DSTATE_STREAMOUT_length) + (3 + 2 * max_decls); 38959f464c52Smaya uint32_t *map = ralloc_size(NULL, sizeof(uint32_t) * dwords); 38969f464c52Smaya uint32_t *so_decl_map = map + GENX(3DSTATE_STREAMOUT_length); 38979f464c52Smaya 38989f464c52Smaya iris_pack_command(GENX(3DSTATE_STREAMOUT), map, sol) { 38999f464c52Smaya int urb_entry_read_offset = 0; 39009f464c52Smaya int urb_entry_read_length = (vue_map->num_slots + 1) / 2 - 39019f464c52Smaya urb_entry_read_offset; 39029f464c52Smaya 39039f464c52Smaya /* We always read the whole vertex. This could be reduced at some 39049f464c52Smaya * point by reading less and offsetting the register index in the 39059f464c52Smaya * SO_DECLs. 39069f464c52Smaya */ 39079f464c52Smaya sol.Stream0VertexReadOffset = urb_entry_read_offset; 39089f464c52Smaya sol.Stream0VertexReadLength = urb_entry_read_length - 1; 39099f464c52Smaya sol.Stream1VertexReadOffset = urb_entry_read_offset; 39109f464c52Smaya sol.Stream1VertexReadLength = urb_entry_read_length - 1; 39119f464c52Smaya sol.Stream2VertexReadOffset = urb_entry_read_offset; 39129f464c52Smaya sol.Stream2VertexReadLength = urb_entry_read_length - 1; 39139f464c52Smaya sol.Stream3VertexReadOffset = urb_entry_read_offset; 39149f464c52Smaya sol.Stream3VertexReadLength = urb_entry_read_length - 1; 39159f464c52Smaya 39169f464c52Smaya /* Set buffer pitches; 0 means unbound. */ 39179f464c52Smaya sol.Buffer0SurfacePitch = 4 * info->stride[0]; 39189f464c52Smaya sol.Buffer1SurfacePitch = 4 * info->stride[1]; 39199f464c52Smaya sol.Buffer2SurfacePitch = 4 * info->stride[2]; 39209f464c52Smaya sol.Buffer3SurfacePitch = 4 * info->stride[3]; 39219f464c52Smaya } 39229f464c52Smaya 39239f464c52Smaya iris_pack_command(GENX(3DSTATE_SO_DECL_LIST), so_decl_map, list) { 39249f464c52Smaya list.DWordLength = 3 + 2 * max_decls - 2; 39259f464c52Smaya list.StreamtoBufferSelects0 = buffer_mask[0]; 39269f464c52Smaya list.StreamtoBufferSelects1 = buffer_mask[1]; 39279f464c52Smaya list.StreamtoBufferSelects2 = buffer_mask[2]; 39289f464c52Smaya list.StreamtoBufferSelects3 = buffer_mask[3]; 39299f464c52Smaya list.NumEntries0 = decls[0]; 39309f464c52Smaya list.NumEntries1 = decls[1]; 39319f464c52Smaya list.NumEntries2 = decls[2]; 39329f464c52Smaya list.NumEntries3 = decls[3]; 39339f464c52Smaya } 39349f464c52Smaya 39359f464c52Smaya for (int i = 0; i < max_decls; i++) { 39369f464c52Smaya iris_pack_state(GENX(SO_DECL_ENTRY), so_decl_map + 3 + i * 2, entry) { 39379f464c52Smaya entry.Stream0Decl = so_decl[0][i]; 39389f464c52Smaya entry.Stream1Decl = so_decl[1][i]; 39399f464c52Smaya entry.Stream2Decl = so_decl[2][i]; 39409f464c52Smaya entry.Stream3Decl = so_decl[3][i]; 39419f464c52Smaya } 39429f464c52Smaya } 39439f464c52Smaya 39449f464c52Smaya return map; 39459f464c52Smaya} 39469f464c52Smaya 39479f464c52Smayastatic void 39489f464c52Smayairis_compute_sbe_urb_read_interval(uint64_t fs_input_slots, 39499f464c52Smaya const struct brw_vue_map *last_vue_map, 39509f464c52Smaya bool two_sided_color, 39519f464c52Smaya unsigned *out_offset, 39529f464c52Smaya unsigned *out_length) 39539f464c52Smaya{ 39549f464c52Smaya /* The compiler computes the first URB slot without considering COL/BFC 39559f464c52Smaya * swizzling (because it doesn't know whether it's enabled), so we need 39569f464c52Smaya * to do that here too. This may result in a smaller offset, which 39579f464c52Smaya * should be safe. 39589f464c52Smaya */ 39599f464c52Smaya const unsigned first_slot = 39609f464c52Smaya brw_compute_first_urb_slot_required(fs_input_slots, last_vue_map); 39619f464c52Smaya 39629f464c52Smaya /* This becomes the URB read offset (counted in pairs of slots). */ 39639f464c52Smaya assert(first_slot % 2 == 0); 39649f464c52Smaya *out_offset = first_slot / 2; 39659f464c52Smaya 39669f464c52Smaya /* We need to adjust the inputs read to account for front/back color 39679f464c52Smaya * swizzling, as it can make the URB length longer. 39689f464c52Smaya */ 39699f464c52Smaya for (int c = 0; c <= 1; c++) { 39709f464c52Smaya if (fs_input_slots & (VARYING_BIT_COL0 << c)) { 39719f464c52Smaya /* If two sided color is enabled, the fragment shader's gl_Color 39729f464c52Smaya * (COL0) input comes from either the gl_FrontColor (COL0) or 39739f464c52Smaya * gl_BackColor (BFC0) input varyings. Mark BFC as used, too. 39749f464c52Smaya */ 39759f464c52Smaya if (two_sided_color) 39769f464c52Smaya fs_input_slots |= (VARYING_BIT_BFC0 << c); 39779f464c52Smaya 39789f464c52Smaya /* If front color isn't written, we opt to give them back color 39799f464c52Smaya * instead of an undefined value. Switch from COL to BFC. 39809f464c52Smaya */ 39819f464c52Smaya if (last_vue_map->varying_to_slot[VARYING_SLOT_COL0 + c] == -1) { 39829f464c52Smaya fs_input_slots &= ~(VARYING_BIT_COL0 << c); 39839f464c52Smaya fs_input_slots |= (VARYING_BIT_BFC0 << c); 39849f464c52Smaya } 39859f464c52Smaya } 39869f464c52Smaya } 39879f464c52Smaya 39889f464c52Smaya /* Compute the minimum URB Read Length necessary for the FS inputs. 39899f464c52Smaya * 39909f464c52Smaya * From the Sandy Bridge PRM, Volume 2, Part 1, documentation for 39919f464c52Smaya * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length": 39929f464c52Smaya * 39939f464c52Smaya * "This field should be set to the minimum length required to read the 39949f464c52Smaya * maximum source attribute. The maximum source attribute is indicated 39959f464c52Smaya * by the maximum value of the enabled Attribute # Source Attribute if 39969f464c52Smaya * Attribute Swizzle Enable is set, Number of Output Attributes-1 if 39979f464c52Smaya * enable is not set. 39989f464c52Smaya * read_length = ceiling((max_source_attr + 1) / 2) 39999f464c52Smaya * 40009f464c52Smaya * [errata] Corruption/Hang possible if length programmed larger than 40019f464c52Smaya * recommended" 40029f464c52Smaya * 40039f464c52Smaya * Similar text exists for Ivy Bridge. 40049f464c52Smaya * 40059f464c52Smaya * We find the last URB slot that's actually read by the FS. 40069f464c52Smaya */ 40079f464c52Smaya unsigned last_read_slot = last_vue_map->num_slots - 1; 40089f464c52Smaya while (last_read_slot > first_slot && !(fs_input_slots & 40099f464c52Smaya (1ull << last_vue_map->slot_to_varying[last_read_slot]))) 40109f464c52Smaya --last_read_slot; 40119f464c52Smaya 40129f464c52Smaya /* The URB read length is the difference of the two, counted in pairs. */ 40139f464c52Smaya *out_length = DIV_ROUND_UP(last_read_slot - first_slot + 1, 2); 40149f464c52Smaya} 40159f464c52Smaya 40169f464c52Smayastatic void 40179f464c52Smayairis_emit_sbe_swiz(struct iris_batch *batch, 40189f464c52Smaya const struct iris_context *ice, 40197ec681f3Smrg const struct brw_vue_map *vue_map, 40209f464c52Smaya unsigned urb_read_offset, 40219f464c52Smaya unsigned sprite_coord_enables) 40229f464c52Smaya{ 40239f464c52Smaya struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attr_overrides[16] = {}; 40249f464c52Smaya const struct brw_wm_prog_data *wm_prog_data = (void *) 40259f464c52Smaya ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data; 40269f464c52Smaya const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; 40279f464c52Smaya 40289f464c52Smaya /* XXX: this should be generated when putting programs in place */ 40299f464c52Smaya 40307ec681f3Smrg for (uint8_t idx = 0; idx < wm_prog_data->urb_setup_attribs_count; idx++) { 40317ec681f3Smrg const uint8_t fs_attr = wm_prog_data->urb_setup_attribs[idx]; 40329f464c52Smaya const int input_index = wm_prog_data->urb_setup[fs_attr]; 40339f464c52Smaya if (input_index < 0 || input_index >= 16) 40349f464c52Smaya continue; 40359f464c52Smaya 40369f464c52Smaya struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr = 40379f464c52Smaya &attr_overrides[input_index]; 40389f464c52Smaya int slot = vue_map->varying_to_slot[fs_attr]; 40399f464c52Smaya 40409f464c52Smaya /* Viewport and Layer are stored in the VUE header. We need to override 40419f464c52Smaya * them to zero if earlier stages didn't write them, as GL requires that 40429f464c52Smaya * they read back as zero when not explicitly set. 40439f464c52Smaya */ 40449f464c52Smaya switch (fs_attr) { 40459f464c52Smaya case VARYING_SLOT_VIEWPORT: 40469f464c52Smaya case VARYING_SLOT_LAYER: 40479f464c52Smaya attr->ComponentOverrideX = true; 40489f464c52Smaya attr->ComponentOverrideW = true; 40499f464c52Smaya attr->ConstantSource = CONST_0000; 40509f464c52Smaya 40519f464c52Smaya if (!(vue_map->slots_valid & VARYING_BIT_LAYER)) 40529f464c52Smaya attr->ComponentOverrideY = true; 40539f464c52Smaya if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT)) 40549f464c52Smaya attr->ComponentOverrideZ = true; 40559f464c52Smaya continue; 40569f464c52Smaya 40579f464c52Smaya case VARYING_SLOT_PRIMITIVE_ID: 40589f464c52Smaya /* Override if the previous shader stage didn't write gl_PrimitiveID. */ 40599f464c52Smaya if (slot == -1) { 40609f464c52Smaya attr->ComponentOverrideX = true; 40619f464c52Smaya attr->ComponentOverrideY = true; 40629f464c52Smaya attr->ComponentOverrideZ = true; 40639f464c52Smaya attr->ComponentOverrideW = true; 40649f464c52Smaya attr->ConstantSource = PRIM_ID; 40659f464c52Smaya continue; 40669f464c52Smaya } 40677ec681f3Smrg break; 40689f464c52Smaya 40699f464c52Smaya default: 40709f464c52Smaya break; 40719f464c52Smaya } 40729f464c52Smaya 40739f464c52Smaya if (sprite_coord_enables & (1 << input_index)) 40749f464c52Smaya continue; 40759f464c52Smaya 40769f464c52Smaya /* If there was only a back color written but not front, use back 40779f464c52Smaya * as the color instead of undefined. 40789f464c52Smaya */ 40799f464c52Smaya if (slot == -1 && fs_attr == VARYING_SLOT_COL0) 40809f464c52Smaya slot = vue_map->varying_to_slot[VARYING_SLOT_BFC0]; 40819f464c52Smaya if (slot == -1 && fs_attr == VARYING_SLOT_COL1) 40829f464c52Smaya slot = vue_map->varying_to_slot[VARYING_SLOT_BFC1]; 40839f464c52Smaya 40849f464c52Smaya /* Not written by the previous stage - undefined. */ 40859f464c52Smaya if (slot == -1) { 40869f464c52Smaya attr->ComponentOverrideX = true; 40879f464c52Smaya attr->ComponentOverrideY = true; 40889f464c52Smaya attr->ComponentOverrideZ = true; 40899f464c52Smaya attr->ComponentOverrideW = true; 40909f464c52Smaya attr->ConstantSource = CONST_0001_FLOAT; 40919f464c52Smaya continue; 40929f464c52Smaya } 40939f464c52Smaya 40949f464c52Smaya /* Compute the location of the attribute relative to the read offset, 40959f464c52Smaya * which is counted in 256-bit increments (two 128-bit VUE slots). 40969f464c52Smaya */ 40979f464c52Smaya const int source_attr = slot - 2 * urb_read_offset; 40989f464c52Smaya assert(source_attr >= 0 && source_attr <= 32); 40999f464c52Smaya attr->SourceAttribute = source_attr; 41009f464c52Smaya 41019f464c52Smaya /* If we are doing two-sided color, and the VUE slot following this one 41029f464c52Smaya * represents a back-facing color, then we need to instruct the SF unit 41039f464c52Smaya * to do back-facing swizzling. 41049f464c52Smaya */ 41059f464c52Smaya if (cso_rast->light_twoside && 41069f464c52Smaya ((vue_map->slot_to_varying[slot] == VARYING_SLOT_COL0 && 41079f464c52Smaya vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC0) || 41089f464c52Smaya (vue_map->slot_to_varying[slot] == VARYING_SLOT_COL1 && 41099f464c52Smaya vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC1))) 41109f464c52Smaya attr->SwizzleSelect = INPUTATTR_FACING; 41119f464c52Smaya } 41129f464c52Smaya 41139f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_SBE_SWIZ), sbes) { 41149f464c52Smaya for (int i = 0; i < 16; i++) 41159f464c52Smaya sbes.Attribute[i] = attr_overrides[i]; 41169f464c52Smaya } 41179f464c52Smaya} 41189f464c52Smaya 41197ec681f3Smrgstatic bool 41207ec681f3Smrgiris_is_drawing_points(const struct iris_context *ice) 41217ec681f3Smrg{ 41227ec681f3Smrg const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; 41237ec681f3Smrg 41247ec681f3Smrg if (cso_rast->fill_mode_point) { 41257ec681f3Smrg return true; 41267ec681f3Smrg } 41277ec681f3Smrg 41287ec681f3Smrg if (ice->shaders.prog[MESA_SHADER_GEOMETRY]) { 41297ec681f3Smrg const struct brw_gs_prog_data *gs_prog_data = 41307ec681f3Smrg (void *) ice->shaders.prog[MESA_SHADER_GEOMETRY]->prog_data; 41317ec681f3Smrg return gs_prog_data->output_topology == _3DPRIM_POINTLIST; 41327ec681f3Smrg } else if (ice->shaders.prog[MESA_SHADER_TESS_EVAL]) { 41337ec681f3Smrg const struct brw_tes_prog_data *tes_data = 41347ec681f3Smrg (void *) ice->shaders.prog[MESA_SHADER_TESS_EVAL]->prog_data; 41357ec681f3Smrg return tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_POINT; 41367ec681f3Smrg } else { 41377ec681f3Smrg return ice->state.prim_mode == PIPE_PRIM_POINTS; 41387ec681f3Smrg } 41397ec681f3Smrg} 41407ec681f3Smrg 41419f464c52Smayastatic unsigned 41429f464c52Smayairis_calculate_point_sprite_overrides(const struct brw_wm_prog_data *prog_data, 41439f464c52Smaya const struct iris_rasterizer_state *cso) 41449f464c52Smaya{ 41459f464c52Smaya unsigned overrides = 0; 41469f464c52Smaya 41479f464c52Smaya if (prog_data->urb_setup[VARYING_SLOT_PNTC] != -1) 41489f464c52Smaya overrides |= 1 << prog_data->urb_setup[VARYING_SLOT_PNTC]; 41499f464c52Smaya 41509f464c52Smaya for (int i = 0; i < 8; i++) { 41519f464c52Smaya if ((cso->sprite_coord_enable & (1 << i)) && 41529f464c52Smaya prog_data->urb_setup[VARYING_SLOT_TEX0 + i] != -1) 41539f464c52Smaya overrides |= 1 << prog_data->urb_setup[VARYING_SLOT_TEX0 + i]; 41549f464c52Smaya } 41559f464c52Smaya 41569f464c52Smaya return overrides; 41579f464c52Smaya} 41589f464c52Smaya 41599f464c52Smayastatic void 41609f464c52Smayairis_emit_sbe(struct iris_batch *batch, const struct iris_context *ice) 41619f464c52Smaya{ 41629f464c52Smaya const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; 41639f464c52Smaya const struct brw_wm_prog_data *wm_prog_data = (void *) 41649f464c52Smaya ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data; 41659f464c52Smaya const struct shader_info *fs_info = 41669f464c52Smaya iris_get_shader_info(ice, MESA_SHADER_FRAGMENT); 41677ec681f3Smrg const struct brw_vue_map *last_vue_map = 41687ec681f3Smrg &brw_vue_prog_data(ice->shaders.last_vue_shader->prog_data)->vue_map; 41699f464c52Smaya 41709f464c52Smaya unsigned urb_read_offset, urb_read_length; 41719f464c52Smaya iris_compute_sbe_urb_read_interval(fs_info->inputs_read, 41727ec681f3Smrg last_vue_map, 41739f464c52Smaya cso_rast->light_twoside, 41749f464c52Smaya &urb_read_offset, &urb_read_length); 41759f464c52Smaya 41769f464c52Smaya unsigned sprite_coord_overrides = 41777ec681f3Smrg iris_is_drawing_points(ice) ? 41787ec681f3Smrg iris_calculate_point_sprite_overrides(wm_prog_data, cso_rast) : 0; 41799f464c52Smaya 41809f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) { 41819f464c52Smaya sbe.AttributeSwizzleEnable = true; 41829f464c52Smaya sbe.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs; 41839f464c52Smaya sbe.PointSpriteTextureCoordinateOrigin = cso_rast->sprite_coord_mode; 41849f464c52Smaya sbe.VertexURBEntryReadOffset = urb_read_offset; 41859f464c52Smaya sbe.VertexURBEntryReadLength = urb_read_length; 41869f464c52Smaya sbe.ForceVertexURBEntryReadOffset = true; 41879f464c52Smaya sbe.ForceVertexURBEntryReadLength = true; 41889f464c52Smaya sbe.ConstantInterpolationEnable = wm_prog_data->flat_inputs; 41899f464c52Smaya sbe.PointSpriteTextureCoordinateEnable = sprite_coord_overrides; 41907ec681f3Smrg#if GFX_VER >= 9 41919f464c52Smaya for (int i = 0; i < 32; i++) { 41929f464c52Smaya sbe.AttributeActiveComponentFormat[i] = ACTIVE_COMPONENT_XYZW; 41939f464c52Smaya } 41949f464c52Smaya#endif 41959f464c52Smaya } 41969f464c52Smaya 41977ec681f3Smrg iris_emit_sbe_swiz(batch, ice, last_vue_map, urb_read_offset, 41987ec681f3Smrg sprite_coord_overrides); 41999f464c52Smaya} 42009f464c52Smaya 42019f464c52Smaya/* ------------------------------------------------------------------- */ 42029f464c52Smaya 42039f464c52Smaya/** 42049f464c52Smaya * Populate VS program key fields based on the current state. 42059f464c52Smaya */ 42069f464c52Smayastatic void 42079f464c52Smayairis_populate_vs_key(const struct iris_context *ice, 42089f464c52Smaya const struct shader_info *info, 42097ec681f3Smrg gl_shader_stage last_stage, 42107ec681f3Smrg struct iris_vs_prog_key *key) 42119f464c52Smaya{ 42129f464c52Smaya const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; 42139f464c52Smaya 42149f464c52Smaya if (info->clip_distance_array_size == 0 && 42157ec681f3Smrg (info->outputs_written & (VARYING_BIT_POS | VARYING_BIT_CLIP_VERTEX)) && 42167ec681f3Smrg last_stage == MESA_SHADER_VERTEX) 42177ec681f3Smrg key->vue.nr_userclip_plane_consts = cso_rast->num_clip_plane_consts; 42189f464c52Smaya} 42199f464c52Smaya 42209f464c52Smaya/** 42219f464c52Smaya * Populate TCS program key fields based on the current state. 42229f464c52Smaya */ 42239f464c52Smayastatic void 42249f464c52Smayairis_populate_tcs_key(const struct iris_context *ice, 42257ec681f3Smrg struct iris_tcs_prog_key *key) 42269f464c52Smaya{ 42279f464c52Smaya} 42289f464c52Smaya 42299f464c52Smaya/** 42309f464c52Smaya * Populate TES program key fields based on the current state. 42319f464c52Smaya */ 42329f464c52Smayastatic void 42339f464c52Smayairis_populate_tes_key(const struct iris_context *ice, 42347ec681f3Smrg const struct shader_info *info, 42357ec681f3Smrg gl_shader_stage last_stage, 42367ec681f3Smrg struct iris_tes_prog_key *key) 42379f464c52Smaya{ 42387ec681f3Smrg const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; 42397ec681f3Smrg 42407ec681f3Smrg if (info->clip_distance_array_size == 0 && 42417ec681f3Smrg (info->outputs_written & (VARYING_BIT_POS | VARYING_BIT_CLIP_VERTEX)) && 42427ec681f3Smrg last_stage == MESA_SHADER_TESS_EVAL) 42437ec681f3Smrg key->vue.nr_userclip_plane_consts = cso_rast->num_clip_plane_consts; 42449f464c52Smaya} 42459f464c52Smaya 42469f464c52Smaya/** 42479f464c52Smaya * Populate GS program key fields based on the current state. 42489f464c52Smaya */ 42499f464c52Smayastatic void 42509f464c52Smayairis_populate_gs_key(const struct iris_context *ice, 42517ec681f3Smrg const struct shader_info *info, 42527ec681f3Smrg gl_shader_stage last_stage, 42537ec681f3Smrg struct iris_gs_prog_key *key) 42549f464c52Smaya{ 42557ec681f3Smrg const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; 42567ec681f3Smrg 42577ec681f3Smrg if (info->clip_distance_array_size == 0 && 42587ec681f3Smrg (info->outputs_written & (VARYING_BIT_POS | VARYING_BIT_CLIP_VERTEX)) && 42597ec681f3Smrg last_stage == MESA_SHADER_GEOMETRY) 42607ec681f3Smrg key->vue.nr_userclip_plane_consts = cso_rast->num_clip_plane_consts; 42619f464c52Smaya} 42629f464c52Smaya 42639f464c52Smaya/** 42649f464c52Smaya * Populate FS program key fields based on the current state. 42659f464c52Smaya */ 42669f464c52Smayastatic void 42679f464c52Smayairis_populate_fs_key(const struct iris_context *ice, 42687ec681f3Smrg const struct shader_info *info, 42697ec681f3Smrg struct iris_fs_prog_key *key) 42709f464c52Smaya{ 42719f464c52Smaya struct iris_screen *screen = (void *) ice->ctx.screen; 42729f464c52Smaya const struct pipe_framebuffer_state *fb = &ice->state.framebuffer; 42739f464c52Smaya const struct iris_depth_stencil_alpha_state *zsa = ice->state.cso_zsa; 42749f464c52Smaya const struct iris_rasterizer_state *rast = ice->state.cso_rast; 42759f464c52Smaya const struct iris_blend_state *blend = ice->state.cso_blend; 42769f464c52Smaya 42779f464c52Smaya key->nr_color_regions = fb->nr_cbufs; 42789f464c52Smaya 42799f464c52Smaya key->clamp_fragment_color = rast->clamp_fragment_color; 42809f464c52Smaya 42819f464c52Smaya key->alpha_to_coverage = blend->alpha_to_coverage; 42829f464c52Smaya 42837ec681f3Smrg key->alpha_test_replicate_alpha = fb->nr_cbufs > 1 && zsa->alpha_enabled; 42849f464c52Smaya 42857ec681f3Smrg key->flat_shade = rast->flatshade && 42867ec681f3Smrg (info->inputs_read & (VARYING_BIT_COL0 | VARYING_BIT_COL1)); 42879f464c52Smaya 42889f464c52Smaya key->persample_interp = rast->force_persample_interp; 42899f464c52Smaya key->multisample_fbo = rast->multisample && fb->samples > 1; 42909f464c52Smaya 42917ec681f3Smrg key->coherent_fb_fetch = GFX_VER >= 9; 42929f464c52Smaya 42939f464c52Smaya key->force_dual_color_blend = 42949f464c52Smaya screen->driconf.dual_color_blend_by_location && 42959f464c52Smaya (blend->blend_enables & 1) && blend->dual_color_blending; 42969f464c52Smaya 42979f464c52Smaya /* TODO: Respect glHint for key->high_quality_derivatives */ 42989f464c52Smaya} 42999f464c52Smaya 43009f464c52Smayastatic void 43019f464c52Smayairis_populate_cs_key(const struct iris_context *ice, 43027ec681f3Smrg struct iris_cs_prog_key *key) 43039f464c52Smaya{ 43049f464c52Smaya} 43059f464c52Smaya 43069f464c52Smayastatic uint64_t 43079f464c52SmayaKSP(const struct iris_compiled_shader *shader) 43089f464c52Smaya{ 43099f464c52Smaya struct iris_resource *res = (void *) shader->assembly.res; 43109f464c52Smaya return iris_bo_offset_from_base_address(res->bo) + shader->assembly.offset; 43119f464c52Smaya} 43129f464c52Smaya 43139f464c52Smaya#define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix, stage) \ 43149f464c52Smaya pkt.KernelStartPointer = KSP(shader); \ 43157ec681f3Smrg pkt.BindingTableEntryCount = shader->bt.size_bytes / 4; \ 43169f464c52Smaya pkt.FloatingPointMode = prog_data->use_alt_mode; \ 43179f464c52Smaya \ 43189f464c52Smaya pkt.DispatchGRFStartRegisterForURBData = \ 43199f464c52Smaya prog_data->dispatch_grf_start_reg; \ 43209f464c52Smaya pkt.prefix##URBEntryReadLength = vue_prog_data->urb_read_length; \ 43219f464c52Smaya pkt.prefix##URBEntryReadOffset = 0; \ 43229f464c52Smaya \ 43239f464c52Smaya pkt.StatisticsEnable = true; \ 43249f464c52Smaya pkt.Enable = true; \ 43259f464c52Smaya \ 43269f464c52Smaya if (prog_data->total_scratch) { \ 43277ec681f3Smrg INIT_THREAD_SCRATCH_SIZE(pkt) \ 43289f464c52Smaya } 43299f464c52Smaya 43307ec681f3Smrg#if GFX_VERx10 >= 125 43317ec681f3Smrg#define INIT_THREAD_SCRATCH_SIZE(pkt) 43327ec681f3Smrg#define MERGE_SCRATCH_ADDR(name) \ 43337ec681f3Smrg{ \ 43347ec681f3Smrg uint32_t pkt2[GENX(name##_length)] = {0}; \ 43357ec681f3Smrg _iris_pack_command(batch, GENX(name), pkt2, p) { \ 43367ec681f3Smrg p.ScratchSpaceBuffer = scratch_addr >> 4; \ 43377ec681f3Smrg } \ 43387ec681f3Smrg iris_emit_merge(batch, pkt, pkt2, GENX(name##_length)); \ 43397ec681f3Smrg} 43407ec681f3Smrg#else 43417ec681f3Smrg#define INIT_THREAD_SCRATCH_SIZE(pkt) \ 43427ec681f3Smrg pkt.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11; 43437ec681f3Smrg#define MERGE_SCRATCH_ADDR(name) \ 43447ec681f3Smrg{ \ 43457ec681f3Smrg uint32_t pkt2[GENX(name##_length)] = {0}; \ 43467ec681f3Smrg _iris_pack_command(batch, GENX(name), pkt2, p) { \ 43477ec681f3Smrg p.ScratchSpaceBasePointer = \ 43487ec681f3Smrg rw_bo(NULL, scratch_addr, IRIS_DOMAIN_NONE); \ 43497ec681f3Smrg } \ 43507ec681f3Smrg iris_emit_merge(batch, pkt, pkt2, GENX(name##_length)); \ 43517ec681f3Smrg} 43527ec681f3Smrg#endif 43537ec681f3Smrg 43547ec681f3Smrg 43559f464c52Smaya/** 43569f464c52Smaya * Encode most of 3DSTATE_VS based on the compiled shader. 43579f464c52Smaya */ 43589f464c52Smayastatic void 43597ec681f3Smrgiris_store_vs_state(const struct intel_device_info *devinfo, 43609f464c52Smaya struct iris_compiled_shader *shader) 43619f464c52Smaya{ 43629f464c52Smaya struct brw_stage_prog_data *prog_data = shader->prog_data; 43639f464c52Smaya struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; 43649f464c52Smaya 43659f464c52Smaya iris_pack_command(GENX(3DSTATE_VS), shader->derived_data, vs) { 43669f464c52Smaya INIT_THREAD_DISPATCH_FIELDS(vs, Vertex, MESA_SHADER_VERTEX); 43679f464c52Smaya vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1; 43689f464c52Smaya vs.SIMD8DispatchEnable = true; 43699f464c52Smaya vs.UserClipDistanceCullTestEnableBitmask = 43709f464c52Smaya vue_prog_data->cull_distance_mask; 43719f464c52Smaya } 43729f464c52Smaya} 43739f464c52Smaya 43749f464c52Smaya/** 43759f464c52Smaya * Encode most of 3DSTATE_HS based on the compiled shader. 43769f464c52Smaya */ 43779f464c52Smayastatic void 43787ec681f3Smrgiris_store_tcs_state(const struct intel_device_info *devinfo, 43799f464c52Smaya struct iris_compiled_shader *shader) 43809f464c52Smaya{ 43819f464c52Smaya struct brw_stage_prog_data *prog_data = shader->prog_data; 43829f464c52Smaya struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; 43839f464c52Smaya struct brw_tcs_prog_data *tcs_prog_data = (void *) prog_data; 43849f464c52Smaya 43859f464c52Smaya iris_pack_command(GENX(3DSTATE_HS), shader->derived_data, hs) { 43869f464c52Smaya INIT_THREAD_DISPATCH_FIELDS(hs, Vertex, MESA_SHADER_TESS_CTRL); 43879f464c52Smaya 43887ec681f3Smrg#if GFX_VER >= 12 43897ec681f3Smrg /* Wa_1604578095: 43907ec681f3Smrg * 43917ec681f3Smrg * Hang occurs when the number of max threads is less than 2 times 43927ec681f3Smrg * the number of instance count. The number of max threads must be 43937ec681f3Smrg * more than 2 times the number of instance count. 43947ec681f3Smrg */ 43957ec681f3Smrg assert((devinfo->max_tcs_threads / 2) > tcs_prog_data->instances); 43967ec681f3Smrg hs.DispatchGRFStartRegisterForURBData = prog_data->dispatch_grf_start_reg & 0x1f; 43977ec681f3Smrg hs.DispatchGRFStartRegisterForURBData5 = prog_data->dispatch_grf_start_reg >> 5; 43987ec681f3Smrg#endif 43997ec681f3Smrg 44009f464c52Smaya hs.InstanceCount = tcs_prog_data->instances - 1; 44019f464c52Smaya hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1; 44029f464c52Smaya hs.IncludeVertexHandles = true; 44037ec681f3Smrg 44047ec681f3Smrg#if GFX_VER == 12 44057ec681f3Smrg /* Patch Count threshold specifies the maximum number of patches that 44067ec681f3Smrg * will be accumulated before a thread dispatch is forced. 44077ec681f3Smrg */ 44087ec681f3Smrg hs.PatchCountThreshold = tcs_prog_data->patch_count_threshold; 44097ec681f3Smrg#endif 44107ec681f3Smrg 44117ec681f3Smrg#if GFX_VER >= 9 44127ec681f3Smrg hs.DispatchMode = vue_prog_data->dispatch_mode; 44137ec681f3Smrg hs.IncludePrimitiveID = tcs_prog_data->include_primitive_id; 44147ec681f3Smrg#endif 44159f464c52Smaya } 44169f464c52Smaya} 44179f464c52Smaya 44189f464c52Smaya/** 44199f464c52Smaya * Encode 3DSTATE_TE and most of 3DSTATE_DS based on the compiled shader. 44209f464c52Smaya */ 44219f464c52Smayastatic void 44227ec681f3Smrgiris_store_tes_state(const struct intel_device_info *devinfo, 44239f464c52Smaya struct iris_compiled_shader *shader) 44249f464c52Smaya{ 44259f464c52Smaya struct brw_stage_prog_data *prog_data = shader->prog_data; 44269f464c52Smaya struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; 44279f464c52Smaya struct brw_tes_prog_data *tes_prog_data = (void *) prog_data; 44289f464c52Smaya 44297ec681f3Smrg uint32_t *ds_state = (void *) shader->derived_data; 44307ec681f3Smrg uint32_t *te_state = ds_state + GENX(3DSTATE_DS_length); 44319f464c52Smaya 44329f464c52Smaya iris_pack_command(GENX(3DSTATE_DS), ds_state, ds) { 44339f464c52Smaya INIT_THREAD_DISPATCH_FIELDS(ds, Patch, MESA_SHADER_TESS_EVAL); 44349f464c52Smaya 44359f464c52Smaya ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH; 44369f464c52Smaya ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1; 44379f464c52Smaya ds.ComputeWCoordinateEnable = 44389f464c52Smaya tes_prog_data->domain == BRW_TESS_DOMAIN_TRI; 44399f464c52Smaya 44409f464c52Smaya ds.UserClipDistanceCullTestEnableBitmask = 44419f464c52Smaya vue_prog_data->cull_distance_mask; 44429f464c52Smaya } 44439f464c52Smaya 44447ec681f3Smrg iris_pack_command(GENX(3DSTATE_TE), te_state, te) { 44457ec681f3Smrg te.Partitioning = tes_prog_data->partitioning; 44467ec681f3Smrg te.OutputTopology = tes_prog_data->output_topology; 44477ec681f3Smrg te.TEDomain = tes_prog_data->domain; 44487ec681f3Smrg te.TEEnable = true; 44497ec681f3Smrg te.MaximumTessellationFactorOdd = 63.0; 44507ec681f3Smrg te.MaximumTessellationFactorNotOdd = 64.0; 44517ec681f3Smrg } 44529f464c52Smaya} 44539f464c52Smaya 44549f464c52Smaya/** 44559f464c52Smaya * Encode most of 3DSTATE_GS based on the compiled shader. 44569f464c52Smaya */ 44579f464c52Smayastatic void 44587ec681f3Smrgiris_store_gs_state(const struct intel_device_info *devinfo, 44599f464c52Smaya struct iris_compiled_shader *shader) 44609f464c52Smaya{ 44619f464c52Smaya struct brw_stage_prog_data *prog_data = shader->prog_data; 44629f464c52Smaya struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; 44639f464c52Smaya struct brw_gs_prog_data *gs_prog_data = (void *) prog_data; 44649f464c52Smaya 44659f464c52Smaya iris_pack_command(GENX(3DSTATE_GS), shader->derived_data, gs) { 44669f464c52Smaya INIT_THREAD_DISPATCH_FIELDS(gs, Vertex, MESA_SHADER_GEOMETRY); 44679f464c52Smaya 44689f464c52Smaya gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1; 44699f464c52Smaya gs.OutputTopology = gs_prog_data->output_topology; 44709f464c52Smaya gs.ControlDataHeaderSize = 44719f464c52Smaya gs_prog_data->control_data_header_size_hwords; 44729f464c52Smaya gs.InstanceControl = gs_prog_data->invocations - 1; 44739f464c52Smaya gs.DispatchMode = DISPATCH_MODE_SIMD8; 44749f464c52Smaya gs.IncludePrimitiveID = gs_prog_data->include_primitive_id; 44759f464c52Smaya gs.ControlDataFormat = gs_prog_data->control_data_format; 44769f464c52Smaya gs.ReorderMode = TRAILING; 44779f464c52Smaya gs.ExpectedVertexCount = gs_prog_data->vertices_in; 44789f464c52Smaya gs.MaximumNumberofThreads = 44797ec681f3Smrg GFX_VER == 8 ? (devinfo->max_gs_threads / 2 - 1) 44809f464c52Smaya : (devinfo->max_gs_threads - 1); 44819f464c52Smaya 44829f464c52Smaya if (gs_prog_data->static_vertex_count != -1) { 44839f464c52Smaya gs.StaticOutput = true; 44849f464c52Smaya gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count; 44859f464c52Smaya } 44869f464c52Smaya gs.IncludeVertexHandles = vue_prog_data->include_vue_handles; 44879f464c52Smaya 44889f464c52Smaya gs.UserClipDistanceCullTestEnableBitmask = 44899f464c52Smaya vue_prog_data->cull_distance_mask; 44909f464c52Smaya 44919f464c52Smaya const int urb_entry_write_offset = 1; 44929f464c52Smaya const uint32_t urb_entry_output_length = 44939f464c52Smaya DIV_ROUND_UP(vue_prog_data->vue_map.num_slots, 2) - 44949f464c52Smaya urb_entry_write_offset; 44959f464c52Smaya 44969f464c52Smaya gs.VertexURBEntryOutputReadOffset = urb_entry_write_offset; 44979f464c52Smaya gs.VertexURBEntryOutputLength = MAX2(urb_entry_output_length, 1); 44989f464c52Smaya } 44999f464c52Smaya} 45009f464c52Smaya 45019f464c52Smaya/** 45029f464c52Smaya * Encode most of 3DSTATE_PS and 3DSTATE_PS_EXTRA based on the shader. 45039f464c52Smaya */ 45049f464c52Smayastatic void 45057ec681f3Smrgiris_store_fs_state(const struct intel_device_info *devinfo, 45069f464c52Smaya struct iris_compiled_shader *shader) 45079f464c52Smaya{ 45089f464c52Smaya struct brw_stage_prog_data *prog_data = shader->prog_data; 45099f464c52Smaya struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data; 45109f464c52Smaya 45119f464c52Smaya uint32_t *ps_state = (void *) shader->derived_data; 45129f464c52Smaya uint32_t *psx_state = ps_state + GENX(3DSTATE_PS_length); 45139f464c52Smaya 45149f464c52Smaya iris_pack_command(GENX(3DSTATE_PS), ps_state, ps) { 45159f464c52Smaya ps.VectorMaskEnable = true; 45167ec681f3Smrg ps.BindingTableEntryCount = shader->bt.size_bytes / 4; 45179f464c52Smaya ps.FloatingPointMode = prog_data->use_alt_mode; 45187ec681f3Smrg ps.MaximumNumberofThreadsPerPSD = 64 - (GFX_VER == 8 ? 2 : 1); 45199f464c52Smaya 45209f464c52Smaya ps.PushConstantEnable = prog_data->ubo_ranges[0].length > 0; 45219f464c52Smaya 45229f464c52Smaya /* From the documentation for this packet: 45239f464c52Smaya * "If the PS kernel does not need the Position XY Offsets to 45249f464c52Smaya * compute a Position Value, then this field should be programmed 45259f464c52Smaya * to POSOFFSET_NONE." 45269f464c52Smaya * 45279f464c52Smaya * "SW Recommendation: If the PS kernel needs the Position Offsets 45289f464c52Smaya * to compute a Position XY value, this field should match Position 45299f464c52Smaya * ZW Interpolation Mode to ensure a consistent position.xyzw 45309f464c52Smaya * computation." 45319f464c52Smaya * 45329f464c52Smaya * We only require XY sample offsets. So, this recommendation doesn't 45339f464c52Smaya * look useful at the moment. We might need this in future. 45349f464c52Smaya */ 45359f464c52Smaya ps.PositionXYOffsetSelect = 45369f464c52Smaya wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE; 45379f464c52Smaya 45389f464c52Smaya if (prog_data->total_scratch) { 45397ec681f3Smrg INIT_THREAD_SCRATCH_SIZE(ps); 45409f464c52Smaya } 45419f464c52Smaya } 45429f464c52Smaya 45439f464c52Smaya iris_pack_command(GENX(3DSTATE_PS_EXTRA), psx_state, psx) { 45449f464c52Smaya psx.PixelShaderValid = true; 45459f464c52Smaya psx.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode; 45469f464c52Smaya psx.PixelShaderKillsPixel = wm_prog_data->uses_kill; 45479f464c52Smaya psx.AttributeEnable = wm_prog_data->num_varying_inputs != 0; 45489f464c52Smaya psx.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; 45499f464c52Smaya psx.PixelShaderUsesSourceW = wm_prog_data->uses_src_w; 45509f464c52Smaya psx.PixelShaderIsPerSample = wm_prog_data->persample_dispatch; 45519f464c52Smaya psx.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask; 45529f464c52Smaya 45537ec681f3Smrg#if GFX_VER >= 9 45549f464c52Smaya psx.PixelShaderPullsBary = wm_prog_data->pulls_bary; 45559f464c52Smaya psx.PixelShaderComputesStencil = wm_prog_data->computed_stencil; 45569f464c52Smaya#endif 45579f464c52Smaya } 45589f464c52Smaya} 45599f464c52Smaya 45609f464c52Smaya/** 45619f464c52Smaya * Compute the size of the derived data (shader command packets). 45629f464c52Smaya * 45639f464c52Smaya * This must match the data written by the iris_store_xs_state() functions. 45649f464c52Smaya */ 45659f464c52Smayastatic void 45667ec681f3Smrgiris_store_cs_state(const struct intel_device_info *devinfo, 45679f464c52Smaya struct iris_compiled_shader *shader) 45689f464c52Smaya{ 45699f464c52Smaya struct brw_cs_prog_data *cs_prog_data = (void *) shader->prog_data; 45709f464c52Smaya void *map = shader->derived_data; 45719f464c52Smaya 45729f464c52Smaya iris_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), map, desc) { 45737ec681f3Smrg#if GFX_VERx10 < 125 45749f464c52Smaya desc.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs; 45759f464c52Smaya desc.CrossThreadConstantDataReadLength = 45769f464c52Smaya cs_prog_data->push.cross_thread.regs; 45777ec681f3Smrg#else 45787ec681f3Smrg assert(cs_prog_data->push.per_thread.regs == 0); 45797ec681f3Smrg assert(cs_prog_data->push.cross_thread.regs == 0); 45807ec681f3Smrg#endif 45817ec681f3Smrg desc.BarrierEnable = cs_prog_data->uses_barrier; 45827ec681f3Smrg#if GFX_VER >= 12 45837ec681f3Smrg /* TODO: Check if we are missing workarounds and enable mid-thread 45847ec681f3Smrg * preemption. 45857ec681f3Smrg * 45867ec681f3Smrg * We still have issues with mid-thread preemption (it was already 45877ec681f3Smrg * disabled by the kernel on gfx11, due to missing workarounds). It's 45887ec681f3Smrg * possible that we are just missing some workarounds, and could enable 45897ec681f3Smrg * it later, but for now let's disable it to fix a GPU in compute in Car 45907ec681f3Smrg * Chase (and possibly more). 45917ec681f3Smrg */ 45927ec681f3Smrg desc.ThreadPreemptionDisable = true; 45937ec681f3Smrg#endif 45949f464c52Smaya } 45959f464c52Smaya} 45969f464c52Smaya 45979f464c52Smayastatic unsigned 45989f464c52Smayairis_derived_program_state_size(enum iris_program_cache_id cache_id) 45999f464c52Smaya{ 46009f464c52Smaya assert(cache_id <= IRIS_CACHE_BLORP); 46019f464c52Smaya 46029f464c52Smaya static const unsigned dwords[] = { 46039f464c52Smaya [IRIS_CACHE_VS] = GENX(3DSTATE_VS_length), 46049f464c52Smaya [IRIS_CACHE_TCS] = GENX(3DSTATE_HS_length), 46059f464c52Smaya [IRIS_CACHE_TES] = GENX(3DSTATE_TE_length) + GENX(3DSTATE_DS_length), 46069f464c52Smaya [IRIS_CACHE_GS] = GENX(3DSTATE_GS_length), 46079f464c52Smaya [IRIS_CACHE_FS] = 46089f464c52Smaya GENX(3DSTATE_PS_length) + GENX(3DSTATE_PS_EXTRA_length), 46099f464c52Smaya [IRIS_CACHE_CS] = GENX(INTERFACE_DESCRIPTOR_DATA_length), 46109f464c52Smaya [IRIS_CACHE_BLORP] = 0, 46119f464c52Smaya }; 46129f464c52Smaya 46139f464c52Smaya return sizeof(uint32_t) * dwords[cache_id]; 46149f464c52Smaya} 46159f464c52Smaya 46169f464c52Smaya/** 46179f464c52Smaya * Create any state packets corresponding to the given shader stage 46189f464c52Smaya * (i.e. 3DSTATE_VS) and save them as "derived data" in the shader variant. 46199f464c52Smaya * This means that we can look up a program in the in-memory cache and 46209f464c52Smaya * get most of the state packet without having to reconstruct it. 46219f464c52Smaya */ 46229f464c52Smayastatic void 46237ec681f3Smrgiris_store_derived_program_state(const struct intel_device_info *devinfo, 46249f464c52Smaya enum iris_program_cache_id cache_id, 46259f464c52Smaya struct iris_compiled_shader *shader) 46269f464c52Smaya{ 46279f464c52Smaya switch (cache_id) { 46289f464c52Smaya case IRIS_CACHE_VS: 46297ec681f3Smrg iris_store_vs_state(devinfo, shader); 46309f464c52Smaya break; 46319f464c52Smaya case IRIS_CACHE_TCS: 46327ec681f3Smrg iris_store_tcs_state(devinfo, shader); 46339f464c52Smaya break; 46349f464c52Smaya case IRIS_CACHE_TES: 46357ec681f3Smrg iris_store_tes_state(devinfo, shader); 46369f464c52Smaya break; 46379f464c52Smaya case IRIS_CACHE_GS: 46387ec681f3Smrg iris_store_gs_state(devinfo, shader); 46399f464c52Smaya break; 46409f464c52Smaya case IRIS_CACHE_FS: 46417ec681f3Smrg iris_store_fs_state(devinfo, shader); 46429f464c52Smaya break; 46439f464c52Smaya case IRIS_CACHE_CS: 46447ec681f3Smrg iris_store_cs_state(devinfo, shader); 46459f464c52Smaya break; 46467ec681f3Smrg case IRIS_CACHE_BLORP: 46479f464c52Smaya break; 46489f464c52Smaya } 46499f464c52Smaya} 46509f464c52Smaya 46519f464c52Smaya/* ------------------------------------------------------------------- */ 46529f464c52Smaya 46539f464c52Smayastatic const uint32_t push_constant_opcodes[] = { 46549f464c52Smaya [MESA_SHADER_VERTEX] = 21, 46559f464c52Smaya [MESA_SHADER_TESS_CTRL] = 25, /* HS */ 46569f464c52Smaya [MESA_SHADER_TESS_EVAL] = 26, /* DS */ 46579f464c52Smaya [MESA_SHADER_GEOMETRY] = 22, 46589f464c52Smaya [MESA_SHADER_FRAGMENT] = 23, 46599f464c52Smaya [MESA_SHADER_COMPUTE] = 0, 46609f464c52Smaya}; 46619f464c52Smaya 46629f464c52Smayastatic uint32_t 46639f464c52Smayause_null_surface(struct iris_batch *batch, struct iris_context *ice) 46649f464c52Smaya{ 46659f464c52Smaya struct iris_bo *state_bo = iris_resource_bo(ice->state.unbound_tex.res); 46669f464c52Smaya 46677ec681f3Smrg iris_use_pinned_bo(batch, state_bo, false, IRIS_DOMAIN_NONE); 46689f464c52Smaya 46699f464c52Smaya return ice->state.unbound_tex.offset; 46709f464c52Smaya} 46719f464c52Smaya 46729f464c52Smayastatic uint32_t 46739f464c52Smayause_null_fb_surface(struct iris_batch *batch, struct iris_context *ice) 46749f464c52Smaya{ 46759f464c52Smaya /* If set_framebuffer_state() was never called, fall back to 1x1x1 */ 46769f464c52Smaya if (!ice->state.null_fb.res) 46779f464c52Smaya return use_null_surface(batch, ice); 46789f464c52Smaya 46799f464c52Smaya struct iris_bo *state_bo = iris_resource_bo(ice->state.null_fb.res); 46809f464c52Smaya 46817ec681f3Smrg iris_use_pinned_bo(batch, state_bo, false, IRIS_DOMAIN_NONE); 46829f464c52Smaya 46839f464c52Smaya return ice->state.null_fb.offset; 46849f464c52Smaya} 46859f464c52Smaya 46869f464c52Smayastatic uint32_t 46879f464c52Smayasurf_state_offset_for_aux(struct iris_resource *res, 46889f464c52Smaya unsigned aux_modes, 46899f464c52Smaya enum isl_aux_usage aux_usage) 46909f464c52Smaya{ 46917ec681f3Smrg assert(aux_modes & (1 << aux_usage)); 46929f464c52Smaya return SURFACE_STATE_ALIGNMENT * 46939f464c52Smaya util_bitcount(aux_modes & ((1 << aux_usage) - 1)); 46949f464c52Smaya} 46959f464c52Smaya 46967ec681f3Smrg#if GFX_VER == 9 46979f464c52Smayastatic void 46989f464c52Smayasurf_state_update_clear_value(struct iris_batch *batch, 46999f464c52Smaya struct iris_resource *res, 47009f464c52Smaya struct iris_state_ref *state, 47019f464c52Smaya unsigned aux_modes, 47029f464c52Smaya enum isl_aux_usage aux_usage) 47039f464c52Smaya{ 47049f464c52Smaya struct isl_device *isl_dev = &batch->screen->isl_dev; 47059f464c52Smaya struct iris_bo *state_bo = iris_resource_bo(state->res); 47067ec681f3Smrg uint64_t real_offset = state->offset + IRIS_MEMZONE_BINDER_START; 47077ec681f3Smrg uint32_t offset_into_bo = real_offset - state_bo->address; 47089f464c52Smaya uint32_t clear_offset = offset_into_bo + 47099f464c52Smaya isl_dev->ss.clear_value_offset + 47109f464c52Smaya surf_state_offset_for_aux(res, aux_modes, aux_usage); 47117ec681f3Smrg uint32_t *color = res->aux.clear_color.u32; 47129f464c52Smaya 47137ec681f3Smrg assert(isl_dev->ss.clear_value_size == 16); 47147ec681f3Smrg 47157ec681f3Smrg if (aux_usage == ISL_AUX_USAGE_HIZ) { 47167ec681f3Smrg iris_emit_pipe_control_write(batch, "update fast clear value (Z)", 47177ec681f3Smrg PIPE_CONTROL_WRITE_IMMEDIATE, 47187ec681f3Smrg state_bo, clear_offset, color[0]); 47197ec681f3Smrg } else { 47207ec681f3Smrg iris_emit_pipe_control_write(batch, "update fast clear color (RG__)", 47217ec681f3Smrg PIPE_CONTROL_WRITE_IMMEDIATE, 47227ec681f3Smrg state_bo, clear_offset, 47237ec681f3Smrg (uint64_t) color[0] | 47247ec681f3Smrg (uint64_t) color[1] << 32); 47257ec681f3Smrg iris_emit_pipe_control_write(batch, "update fast clear color (__BA)", 47267ec681f3Smrg PIPE_CONTROL_WRITE_IMMEDIATE, 47277ec681f3Smrg state_bo, clear_offset + 8, 47287ec681f3Smrg (uint64_t) color[2] | 47297ec681f3Smrg (uint64_t) color[3] << 32); 47307ec681f3Smrg } 47317ec681f3Smrg 47327ec681f3Smrg iris_emit_pipe_control_flush(batch, 47337ec681f3Smrg "update fast clear: state cache invalidate", 47347ec681f3Smrg PIPE_CONTROL_FLUSH_ENABLE | 47357ec681f3Smrg PIPE_CONTROL_STATE_CACHE_INVALIDATE); 47369f464c52Smaya} 47377ec681f3Smrg#endif 47389f464c52Smaya 47399f464c52Smayastatic void 47409f464c52Smayaupdate_clear_value(struct iris_context *ice, 47419f464c52Smaya struct iris_batch *batch, 47429f464c52Smaya struct iris_resource *res, 47437ec681f3Smrg struct iris_surface_state *surf_state, 47449f464c52Smaya unsigned all_aux_modes, 47459f464c52Smaya struct isl_view *view) 47469f464c52Smaya{ 47477ec681f3Smrg UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev; 47489f464c52Smaya UNUSED unsigned aux_modes = all_aux_modes; 47499f464c52Smaya 47507ec681f3Smrg /* We only need to update the clear color in the surface state for gfx8 and 47517ec681f3Smrg * gfx9. Newer gens can read it directly from the clear color state buffer. 47529f464c52Smaya */ 47537ec681f3Smrg#if GFX_VER == 9 47547ec681f3Smrg /* Skip updating the ISL_AUX_USAGE_NONE surface state */ 47557ec681f3Smrg aux_modes &= ~(1 << ISL_AUX_USAGE_NONE); 47569f464c52Smaya 47577ec681f3Smrg while (aux_modes) { 47587ec681f3Smrg enum isl_aux_usage aux_usage = u_bit_scan(&aux_modes); 47599f464c52Smaya 47607ec681f3Smrg surf_state_update_clear_value(batch, res, &surf_state->ref, 47617ec681f3Smrg all_aux_modes, aux_usage); 47627ec681f3Smrg } 47637ec681f3Smrg#elif GFX_VER == 8 47647ec681f3Smrg /* TODO: Could update rather than re-filling */ 47657ec681f3Smrg alloc_surface_states(surf_state, all_aux_modes); 47669f464c52Smaya 47677ec681f3Smrg void *map = surf_state->cpu; 47687ec681f3Smrg 47697ec681f3Smrg while (aux_modes) { 47707ec681f3Smrg enum isl_aux_usage aux_usage = u_bit_scan(&aux_modes); 47717ec681f3Smrg fill_surface_state(isl_dev, map, res, &res->surf, view, aux_usage, 47727ec681f3Smrg 0, 0, 0); 47737ec681f3Smrg map += SURFACE_STATE_ALIGNMENT; 47749f464c52Smaya } 47757ec681f3Smrg 47767ec681f3Smrg upload_surface_states(ice->state.surface_uploader, surf_state); 47777ec681f3Smrg#endif 47789f464c52Smaya} 47799f464c52Smaya 47809f464c52Smaya/** 47819f464c52Smaya * Add a surface to the validation list, as well as the buffer containing 47829f464c52Smaya * the corresponding SURFACE_STATE. 47839f464c52Smaya * 47849f464c52Smaya * Returns the binding table entry (offset to SURFACE_STATE). 47859f464c52Smaya */ 47869f464c52Smayastatic uint32_t 47879f464c52Smayause_surface(struct iris_context *ice, 47889f464c52Smaya struct iris_batch *batch, 47899f464c52Smaya struct pipe_surface *p_surf, 47909f464c52Smaya bool writeable, 47917ec681f3Smrg enum isl_aux_usage aux_usage, 47927ec681f3Smrg bool is_read_surface, 47937ec681f3Smrg enum iris_domain access) 47949f464c52Smaya{ 47959f464c52Smaya struct iris_surface *surf = (void *) p_surf; 47969f464c52Smaya struct iris_resource *res = (void *) p_surf->texture; 47977ec681f3Smrg uint32_t offset = 0; 47989f464c52Smaya 47997ec681f3Smrg if (GFX_VER == 8 && is_read_surface && !surf->surface_state_read.ref.res) { 48007ec681f3Smrg upload_surface_states(ice->state.surface_uploader, 48017ec681f3Smrg &surf->surface_state_read); 48027ec681f3Smrg } 48039f464c52Smaya 48047ec681f3Smrg if (!surf->surface_state.ref.res) { 48057ec681f3Smrg upload_surface_states(ice->state.surface_uploader, 48067ec681f3Smrg &surf->surface_state); 48077ec681f3Smrg } 48089f464c52Smaya 48097ec681f3Smrg if (memcmp(&res->aux.clear_color, &surf->clear_color, 48107ec681f3Smrg sizeof(surf->clear_color)) != 0) { 48117ec681f3Smrg update_clear_value(ice, batch, res, &surf->surface_state, 48127ec681f3Smrg res->aux.possible_usages, &surf->view); 48137ec681f3Smrg if (GFX_VER == 8) { 48147ec681f3Smrg update_clear_value(ice, batch, res, &surf->surface_state_read, 48157ec681f3Smrg res->aux.possible_usages, &surf->read_view); 48169f464c52Smaya } 48177ec681f3Smrg surf->clear_color = res->aux.clear_color; 48189f464c52Smaya } 48199f464c52Smaya 48207ec681f3Smrg if (res->aux.clear_color_bo) 48217ec681f3Smrg iris_use_pinned_bo(batch, res->aux.clear_color_bo, false, access); 48227ec681f3Smrg 48237ec681f3Smrg if (res->aux.bo) 48247ec681f3Smrg iris_use_pinned_bo(batch, res->aux.bo, writeable, access); 48257ec681f3Smrg 48267ec681f3Smrg iris_use_pinned_bo(batch, res->bo, writeable, access); 48277ec681f3Smrg 48287ec681f3Smrg if (GFX_VER == 8 && is_read_surface) { 48297ec681f3Smrg iris_use_pinned_bo(batch, iris_resource_bo(surf->surface_state_read.ref.res), false, 48307ec681f3Smrg IRIS_DOMAIN_NONE); 48317ec681f3Smrg } else { 48327ec681f3Smrg iris_use_pinned_bo(batch, iris_resource_bo(surf->surface_state.ref.res), false, 48337ec681f3Smrg IRIS_DOMAIN_NONE); 48347ec681f3Smrg } 48357ec681f3Smrg 48367ec681f3Smrg offset = (GFX_VER == 8 && is_read_surface) 48377ec681f3Smrg ? surf->surface_state_read.ref.offset 48387ec681f3Smrg : surf->surface_state.ref.offset; 48397ec681f3Smrg 48407ec681f3Smrg return offset + 48419f464c52Smaya surf_state_offset_for_aux(res, res->aux.possible_usages, aux_usage); 48429f464c52Smaya} 48439f464c52Smaya 48449f464c52Smayastatic uint32_t 48459f464c52Smayause_sampler_view(struct iris_context *ice, 48469f464c52Smaya struct iris_batch *batch, 48479f464c52Smaya struct iris_sampler_view *isv) 48489f464c52Smaya{ 48499f464c52Smaya enum isl_aux_usage aux_usage = 48507ec681f3Smrg iris_resource_texture_aux_usage(ice, isv->res, isv->view.format); 48519f464c52Smaya 48527ec681f3Smrg if (!isv->surface_state.ref.res) 48537ec681f3Smrg upload_surface_states(ice->state.surface_uploader, &isv->surface_state); 48547ec681f3Smrg 48557ec681f3Smrg if (memcmp(&isv->res->aux.clear_color, &isv->clear_color, 48567ec681f3Smrg sizeof(isv->clear_color)) != 0) { 48577ec681f3Smrg update_clear_value(ice, batch, isv->res, &isv->surface_state, 48587ec681f3Smrg isv->res->aux.sampler_usages, &isv->view); 48597ec681f3Smrg isv->clear_color = isv->res->aux.clear_color; 48607ec681f3Smrg } 48617ec681f3Smrg 48627ec681f3Smrg if (isv->res->aux.clear_color_bo) { 48637ec681f3Smrg iris_use_pinned_bo(batch, isv->res->aux.clear_color_bo, 48647ec681f3Smrg false, IRIS_DOMAIN_OTHER_READ); 48657ec681f3Smrg } 48669f464c52Smaya 48679f464c52Smaya if (isv->res->aux.bo) { 48687ec681f3Smrg iris_use_pinned_bo(batch, isv->res->aux.bo, 48697ec681f3Smrg false, IRIS_DOMAIN_OTHER_READ); 48709f464c52Smaya } 48719f464c52Smaya 48727ec681f3Smrg iris_use_pinned_bo(batch, isv->res->bo, false, IRIS_DOMAIN_OTHER_READ); 48737ec681f3Smrg iris_use_pinned_bo(batch, iris_resource_bo(isv->surface_state.ref.res), false, 48747ec681f3Smrg IRIS_DOMAIN_NONE); 48757ec681f3Smrg 48767ec681f3Smrg return isv->surface_state.ref.offset + 48779f464c52Smaya surf_state_offset_for_aux(isv->res, isv->res->aux.sampler_usages, 48789f464c52Smaya aux_usage); 48799f464c52Smaya} 48809f464c52Smaya 48819f464c52Smayastatic uint32_t 48829f464c52Smayause_ubo_ssbo(struct iris_batch *batch, 48839f464c52Smaya struct iris_context *ice, 48849f464c52Smaya struct pipe_shader_buffer *buf, 48859f464c52Smaya struct iris_state_ref *surf_state, 48867ec681f3Smrg bool writable, enum iris_domain access) 48879f464c52Smaya{ 48887ec681f3Smrg if (!buf->buffer || !surf_state->res) 48899f464c52Smaya return use_null_surface(batch, ice); 48909f464c52Smaya 48917ec681f3Smrg iris_use_pinned_bo(batch, iris_resource_bo(buf->buffer), writable, access); 48927ec681f3Smrg iris_use_pinned_bo(batch, iris_resource_bo(surf_state->res), false, 48937ec681f3Smrg IRIS_DOMAIN_NONE); 48949f464c52Smaya 48959f464c52Smaya return surf_state->offset; 48969f464c52Smaya} 48979f464c52Smaya 48989f464c52Smayastatic uint32_t 48999f464c52Smayause_image(struct iris_batch *batch, struct iris_context *ice, 49007ec681f3Smrg struct iris_shader_state *shs, const struct shader_info *info, 49017ec681f3Smrg int i) 49029f464c52Smaya{ 49039f464c52Smaya struct iris_image_view *iv = &shs->image[i]; 49049f464c52Smaya struct iris_resource *res = (void *) iv->base.resource; 49059f464c52Smaya 49069f464c52Smaya if (!res) 49079f464c52Smaya return use_null_surface(batch, ice); 49089f464c52Smaya 49099f464c52Smaya bool write = iv->base.shader_access & PIPE_IMAGE_ACCESS_WRITE; 49109f464c52Smaya 49117ec681f3Smrg iris_use_pinned_bo(batch, res->bo, write, IRIS_DOMAIN_NONE); 49127ec681f3Smrg iris_use_pinned_bo(batch, iris_resource_bo(iv->surface_state.ref.res), 49137ec681f3Smrg false, IRIS_DOMAIN_NONE); 49149f464c52Smaya 49159f464c52Smaya if (res->aux.bo) 49167ec681f3Smrg iris_use_pinned_bo(batch, res->aux.bo, write, IRIS_DOMAIN_NONE); 49177ec681f3Smrg 49187ec681f3Smrg enum isl_aux_usage aux_usage = 49197ec681f3Smrg iris_image_view_aux_usage(ice, &iv->base, info); 49209f464c52Smaya 49217ec681f3Smrg return iv->surface_state.ref.offset + 49227ec681f3Smrg surf_state_offset_for_aux(res, res->aux.possible_usages, aux_usage); 49239f464c52Smaya} 49249f464c52Smaya 49259f464c52Smaya#define push_bt_entry(addr) \ 49269f464c52Smaya assert(addr >= binder_addr); \ 49277ec681f3Smrg assert(s < shader->bt.size_bytes / sizeof(uint32_t)); \ 49289f464c52Smaya if (!pin_only) bt_map[s++] = (addr) - binder_addr; 49299f464c52Smaya 49307ec681f3Smrg#define bt_assert(section) \ 49317ec681f3Smrg if (!pin_only && shader->bt.used_mask[section] != 0) \ 49327ec681f3Smrg assert(shader->bt.offsets[section] == s); 49339f464c52Smaya 49349f464c52Smaya/** 49359f464c52Smaya * Populate the binding table for a given shader stage. 49369f464c52Smaya * 49379f464c52Smaya * This fills out the table of pointers to surfaces required by the shader, 49389f464c52Smaya * and also adds those buffers to the validation list so the kernel can make 49399f464c52Smaya * resident before running our batch. 49409f464c52Smaya */ 49419f464c52Smayastatic void 49429f464c52Smayairis_populate_binding_table(struct iris_context *ice, 49439f464c52Smaya struct iris_batch *batch, 49449f464c52Smaya gl_shader_stage stage, 49459f464c52Smaya bool pin_only) 49469f464c52Smaya{ 49479f464c52Smaya const struct iris_binder *binder = &ice->state.binder; 49489f464c52Smaya struct iris_compiled_shader *shader = ice->shaders.prog[stage]; 49499f464c52Smaya if (!shader) 49509f464c52Smaya return; 49519f464c52Smaya 49527ec681f3Smrg struct iris_binding_table *bt = &shader->bt; 49539f464c52Smaya UNUSED struct brw_stage_prog_data *prog_data = shader->prog_data; 49549f464c52Smaya struct iris_shader_state *shs = &ice->state.shaders[stage]; 49557ec681f3Smrg uint32_t binder_addr = binder->bo->address; 49569f464c52Smaya 49579f464c52Smaya uint32_t *bt_map = binder->map + binder->bt_offset[stage]; 49589f464c52Smaya int s = 0; 49599f464c52Smaya 49609f464c52Smaya const struct shader_info *info = iris_get_shader_info(ice, stage); 49619f464c52Smaya if (!info) { 49629f464c52Smaya /* TCS passthrough doesn't need a binding table. */ 49639f464c52Smaya assert(stage == MESA_SHADER_TESS_CTRL); 49649f464c52Smaya return; 49659f464c52Smaya } 49669f464c52Smaya 49677ec681f3Smrg if (stage == MESA_SHADER_COMPUTE && 49687ec681f3Smrg shader->bt.used_mask[IRIS_SURFACE_GROUP_CS_WORK_GROUPS]) { 49699f464c52Smaya /* surface for gl_NumWorkGroups */ 49709f464c52Smaya struct iris_state_ref *grid_data = &ice->state.grid_size; 49719f464c52Smaya struct iris_state_ref *grid_state = &ice->state.grid_surf_state; 49727ec681f3Smrg iris_use_pinned_bo(batch, iris_resource_bo(grid_data->res), false, 49737ec681f3Smrg IRIS_DOMAIN_OTHER_READ); 49747ec681f3Smrg iris_use_pinned_bo(batch, iris_resource_bo(grid_state->res), false, 49757ec681f3Smrg IRIS_DOMAIN_NONE); 49769f464c52Smaya push_bt_entry(grid_state->offset); 49779f464c52Smaya } 49789f464c52Smaya 49799f464c52Smaya if (stage == MESA_SHADER_FRAGMENT) { 49809f464c52Smaya struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 49819f464c52Smaya /* Note that cso_fb->nr_cbufs == fs_key->nr_color_regions. */ 49829f464c52Smaya if (cso_fb->nr_cbufs) { 49839f464c52Smaya for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) { 49849f464c52Smaya uint32_t addr; 49859f464c52Smaya if (cso_fb->cbufs[i]) { 49869f464c52Smaya addr = use_surface(ice, batch, cso_fb->cbufs[i], true, 49877ec681f3Smrg ice->state.draw_aux_usage[i], false, 49887ec681f3Smrg IRIS_DOMAIN_RENDER_WRITE); 49899f464c52Smaya } else { 49909f464c52Smaya addr = use_null_fb_surface(batch, ice); 49919f464c52Smaya } 49929f464c52Smaya push_bt_entry(addr); 49939f464c52Smaya } 49947ec681f3Smrg } else if (GFX_VER < 11) { 49959f464c52Smaya uint32_t addr = use_null_fb_surface(batch, ice); 49969f464c52Smaya push_bt_entry(addr); 49979f464c52Smaya } 49989f464c52Smaya } 49999f464c52Smaya 50007ec681f3Smrg#define foreach_surface_used(index, group) \ 50017ec681f3Smrg bt_assert(group); \ 50027ec681f3Smrg for (int index = 0; index < bt->sizes[group]; index++) \ 50037ec681f3Smrg if (iris_group_index_to_bti(bt, group, index) != \ 50047ec681f3Smrg IRIS_SURFACE_NOT_USED) 50059f464c52Smaya 50067ec681f3Smrg foreach_surface_used(i, IRIS_SURFACE_GROUP_RENDER_TARGET_READ) { 50077ec681f3Smrg struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 50087ec681f3Smrg uint32_t addr; 50097ec681f3Smrg if (cso_fb->cbufs[i]) { 50107ec681f3Smrg addr = use_surface(ice, batch, cso_fb->cbufs[i], 50117ec681f3Smrg false, ice->state.draw_aux_usage[i], true, 50127ec681f3Smrg IRIS_DOMAIN_OTHER_READ); 50137ec681f3Smrg push_bt_entry(addr); 50147ec681f3Smrg } 50157ec681f3Smrg } 50169f464c52Smaya 50177ec681f3Smrg foreach_surface_used(i, IRIS_SURFACE_GROUP_TEXTURE) { 50189f464c52Smaya struct iris_sampler_view *view = shs->textures[i]; 50199f464c52Smaya uint32_t addr = view ? use_sampler_view(ice, batch, view) 50209f464c52Smaya : use_null_surface(batch, ice); 50219f464c52Smaya push_bt_entry(addr); 50229f464c52Smaya } 50239f464c52Smaya 50247ec681f3Smrg foreach_surface_used(i, IRIS_SURFACE_GROUP_IMAGE) { 50257ec681f3Smrg uint32_t addr = use_image(batch, ice, shs, info, i); 50269f464c52Smaya push_bt_entry(addr); 50279f464c52Smaya } 50289f464c52Smaya 50297ec681f3Smrg foreach_surface_used(i, IRIS_SURFACE_GROUP_UBO) { 50309f464c52Smaya uint32_t addr = use_ubo_ssbo(batch, ice, &shs->constbuf[i], 50317ec681f3Smrg &shs->constbuf_surf_state[i], false, 50327ec681f3Smrg IRIS_DOMAIN_OTHER_READ); 50339f464c52Smaya push_bt_entry(addr); 50349f464c52Smaya } 50359f464c52Smaya 50367ec681f3Smrg foreach_surface_used(i, IRIS_SURFACE_GROUP_SSBO) { 50377ec681f3Smrg uint32_t addr = 50387ec681f3Smrg use_ubo_ssbo(batch, ice, &shs->ssbo[i], &shs->ssbo_surf_state[i], 50397ec681f3Smrg shs->writable_ssbos & (1u << i), IRIS_DOMAIN_NONE); 50407ec681f3Smrg push_bt_entry(addr); 50419f464c52Smaya } 50429f464c52Smaya 50439f464c52Smaya#if 0 50449f464c52Smaya /* XXX: YUV surfaces not implemented yet */ 50459f464c52Smaya bt_assert(plane_start[1], ...); 50469f464c52Smaya bt_assert(plane_start[2], ...); 50479f464c52Smaya#endif 50489f464c52Smaya} 50499f464c52Smaya 50509f464c52Smayastatic void 50519f464c52Smayairis_use_optional_res(struct iris_batch *batch, 50529f464c52Smaya struct pipe_resource *res, 50537ec681f3Smrg bool writeable, 50547ec681f3Smrg enum iris_domain access) 50559f464c52Smaya{ 50569f464c52Smaya if (res) { 50579f464c52Smaya struct iris_bo *bo = iris_resource_bo(res); 50587ec681f3Smrg iris_use_pinned_bo(batch, bo, writeable, access); 50599f464c52Smaya } 50609f464c52Smaya} 50619f464c52Smaya 50629f464c52Smayastatic void 50639f464c52Smayapin_depth_and_stencil_buffers(struct iris_batch *batch, 50649f464c52Smaya struct pipe_surface *zsbuf, 50659f464c52Smaya struct iris_depth_stencil_alpha_state *cso_zsa) 50669f464c52Smaya{ 50679f464c52Smaya if (!zsbuf) 50689f464c52Smaya return; 50699f464c52Smaya 50709f464c52Smaya struct iris_resource *zres, *sres; 50719f464c52Smaya iris_get_depth_stencil_resources(zsbuf->texture, &zres, &sres); 50729f464c52Smaya 50739f464c52Smaya if (zres) { 50747ec681f3Smrg const enum iris_domain access = cso_zsa->depth_writes_enabled ? 50757ec681f3Smrg IRIS_DOMAIN_DEPTH_WRITE : IRIS_DOMAIN_OTHER_READ; 50767ec681f3Smrg iris_use_pinned_bo(batch, zres->bo, cso_zsa->depth_writes_enabled, 50777ec681f3Smrg access); 50789f464c52Smaya if (zres->aux.bo) { 50799f464c52Smaya iris_use_pinned_bo(batch, zres->aux.bo, 50807ec681f3Smrg cso_zsa->depth_writes_enabled, access); 50819f464c52Smaya } 50829f464c52Smaya } 50839f464c52Smaya 50849f464c52Smaya if (sres) { 50857ec681f3Smrg const enum iris_domain access = cso_zsa->stencil_writes_enabled ? 50867ec681f3Smrg IRIS_DOMAIN_DEPTH_WRITE : IRIS_DOMAIN_OTHER_READ; 50877ec681f3Smrg iris_use_pinned_bo(batch, sres->bo, cso_zsa->stencil_writes_enabled, 50887ec681f3Smrg access); 50897ec681f3Smrg } 50907ec681f3Smrg} 50917ec681f3Smrg 50927ec681f3Smrgstatic uint32_t 50937ec681f3Smrgpin_scratch_space(struct iris_context *ice, 50947ec681f3Smrg struct iris_batch *batch, 50957ec681f3Smrg const struct brw_stage_prog_data *prog_data, 50967ec681f3Smrg gl_shader_stage stage) 50977ec681f3Smrg{ 50987ec681f3Smrg uint32_t scratch_addr = 0; 50997ec681f3Smrg 51007ec681f3Smrg if (prog_data->total_scratch > 0) { 51017ec681f3Smrg struct iris_bo *scratch_bo = 51027ec681f3Smrg iris_get_scratch_space(ice, prog_data->total_scratch, stage); 51037ec681f3Smrg iris_use_pinned_bo(batch, scratch_bo, true, IRIS_DOMAIN_NONE); 51047ec681f3Smrg 51057ec681f3Smrg#if GFX_VERx10 >= 125 51067ec681f3Smrg const struct iris_state_ref *ref = 51077ec681f3Smrg iris_get_scratch_surf(ice, prog_data->total_scratch); 51087ec681f3Smrg iris_use_pinned_bo(batch, iris_resource_bo(ref->res), 51097ec681f3Smrg false, IRIS_DOMAIN_NONE); 51107ec681f3Smrg scratch_addr = ref->offset + 51117ec681f3Smrg iris_resource_bo(ref->res)->address - 51127ec681f3Smrg IRIS_MEMZONE_BINDLESS_START; 51137ec681f3Smrg assert((scratch_addr & 0x3f) == 0 && scratch_addr < (1 << 26)); 51147ec681f3Smrg#else 51157ec681f3Smrg scratch_addr = scratch_bo->address; 51167ec681f3Smrg#endif 51179f464c52Smaya } 51187ec681f3Smrg 51197ec681f3Smrg return scratch_addr; 51209f464c52Smaya} 51219f464c52Smaya 51229f464c52Smaya/* ------------------------------------------------------------------- */ 51239f464c52Smaya 51249f464c52Smaya/** 51259f464c52Smaya * Pin any BOs which were installed by a previous batch, and restored 51269f464c52Smaya * via the hardware logical context mechanism. 51279f464c52Smaya * 51289f464c52Smaya * We don't need to re-emit all state every batch - the hardware context 51299f464c52Smaya * mechanism will save and restore it for us. This includes pointers to 51309f464c52Smaya * various BOs...which won't exist unless we ask the kernel to pin them 51319f464c52Smaya * by adding them to the validation list. 51329f464c52Smaya * 51339f464c52Smaya * We can skip buffers if we've re-emitted those packets, as we're 51349f464c52Smaya * overwriting those stale pointers with new ones, and don't actually 51359f464c52Smaya * refer to the old BOs. 51369f464c52Smaya */ 51379f464c52Smayastatic void 51389f464c52Smayairis_restore_render_saved_bos(struct iris_context *ice, 51399f464c52Smaya struct iris_batch *batch, 51409f464c52Smaya const struct pipe_draw_info *draw) 51419f464c52Smaya{ 51429f464c52Smaya struct iris_genx_state *genx = ice->state.genx; 51439f464c52Smaya 51449f464c52Smaya const uint64_t clean = ~ice->state.dirty; 51457ec681f3Smrg const uint64_t stage_clean = ~ice->state.stage_dirty; 51469f464c52Smaya 51479f464c52Smaya if (clean & IRIS_DIRTY_CC_VIEWPORT) { 51487ec681f3Smrg iris_use_optional_res(batch, ice->state.last_res.cc_vp, false, 51497ec681f3Smrg IRIS_DOMAIN_NONE); 51509f464c52Smaya } 51519f464c52Smaya 51529f464c52Smaya if (clean & IRIS_DIRTY_SF_CL_VIEWPORT) { 51537ec681f3Smrg iris_use_optional_res(batch, ice->state.last_res.sf_cl_vp, false, 51547ec681f3Smrg IRIS_DOMAIN_NONE); 51559f464c52Smaya } 51569f464c52Smaya 51579f464c52Smaya if (clean & IRIS_DIRTY_BLEND_STATE) { 51587ec681f3Smrg iris_use_optional_res(batch, ice->state.last_res.blend, false, 51597ec681f3Smrg IRIS_DOMAIN_NONE); 51609f464c52Smaya } 51619f464c52Smaya 51629f464c52Smaya if (clean & IRIS_DIRTY_COLOR_CALC_STATE) { 51637ec681f3Smrg iris_use_optional_res(batch, ice->state.last_res.color_calc, false, 51647ec681f3Smrg IRIS_DOMAIN_NONE); 51659f464c52Smaya } 51669f464c52Smaya 51679f464c52Smaya if (clean & IRIS_DIRTY_SCISSOR_RECT) { 51687ec681f3Smrg iris_use_optional_res(batch, ice->state.last_res.scissor, false, 51697ec681f3Smrg IRIS_DOMAIN_NONE); 51709f464c52Smaya } 51719f464c52Smaya 51729f464c52Smaya if (ice->state.streamout_active && (clean & IRIS_DIRTY_SO_BUFFERS)) { 51739f464c52Smaya for (int i = 0; i < 4; i++) { 51749f464c52Smaya struct iris_stream_output_target *tgt = 51759f464c52Smaya (void *) ice->state.so_target[i]; 51769f464c52Smaya if (tgt) { 51779f464c52Smaya iris_use_pinned_bo(batch, iris_resource_bo(tgt->base.buffer), 51787ec681f3Smrg true, IRIS_DOMAIN_OTHER_WRITE); 51799f464c52Smaya iris_use_pinned_bo(batch, iris_resource_bo(tgt->offset.res), 51807ec681f3Smrg true, IRIS_DOMAIN_OTHER_WRITE); 51819f464c52Smaya } 51829f464c52Smaya } 51839f464c52Smaya } 51849f464c52Smaya 51859f464c52Smaya for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { 51867ec681f3Smrg if (!(stage_clean & (IRIS_STAGE_DIRTY_CONSTANTS_VS << stage))) 51879f464c52Smaya continue; 51889f464c52Smaya 51899f464c52Smaya struct iris_shader_state *shs = &ice->state.shaders[stage]; 51909f464c52Smaya struct iris_compiled_shader *shader = ice->shaders.prog[stage]; 51919f464c52Smaya 51929f464c52Smaya if (!shader) 51939f464c52Smaya continue; 51949f464c52Smaya 51959f464c52Smaya struct brw_stage_prog_data *prog_data = (void *) shader->prog_data; 51969f464c52Smaya 51979f464c52Smaya for (int i = 0; i < 4; i++) { 51989f464c52Smaya const struct brw_ubo_range *range = &prog_data->ubo_ranges[i]; 51999f464c52Smaya 52009f464c52Smaya if (range->length == 0) 52019f464c52Smaya continue; 52029f464c52Smaya 52037ec681f3Smrg /* Range block is a binding table index, map back to UBO index. */ 52047ec681f3Smrg unsigned block_index = iris_bti_to_group_index( 52057ec681f3Smrg &shader->bt, IRIS_SURFACE_GROUP_UBO, range->block); 52067ec681f3Smrg assert(block_index != IRIS_SURFACE_NOT_USED); 52077ec681f3Smrg 52087ec681f3Smrg struct pipe_shader_buffer *cbuf = &shs->constbuf[block_index]; 52099f464c52Smaya struct iris_resource *res = (void *) cbuf->buffer; 52109f464c52Smaya 52119f464c52Smaya if (res) 52127ec681f3Smrg iris_use_pinned_bo(batch, res->bo, false, IRIS_DOMAIN_OTHER_READ); 52139f464c52Smaya else 52147ec681f3Smrg iris_use_pinned_bo(batch, batch->screen->workaround_bo, false, 52157ec681f3Smrg IRIS_DOMAIN_OTHER_READ); 52169f464c52Smaya } 52179f464c52Smaya } 52189f464c52Smaya 52199f464c52Smaya for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { 52207ec681f3Smrg if (stage_clean & (IRIS_STAGE_DIRTY_BINDINGS_VS << stage)) { 52219f464c52Smaya /* Re-pin any buffers referred to by the binding table. */ 52229f464c52Smaya iris_populate_binding_table(ice, batch, stage, true); 52239f464c52Smaya } 52249f464c52Smaya } 52259f464c52Smaya 52269f464c52Smaya for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { 52279f464c52Smaya struct iris_shader_state *shs = &ice->state.shaders[stage]; 52289f464c52Smaya struct pipe_resource *res = shs->sampler_table.res; 52299f464c52Smaya if (res) 52307ec681f3Smrg iris_use_pinned_bo(batch, iris_resource_bo(res), false, 52317ec681f3Smrg IRIS_DOMAIN_NONE); 52329f464c52Smaya } 52339f464c52Smaya 52349f464c52Smaya for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { 52357ec681f3Smrg if (stage_clean & (IRIS_STAGE_DIRTY_VS << stage)) { 52369f464c52Smaya struct iris_compiled_shader *shader = ice->shaders.prog[stage]; 52379f464c52Smaya 52389f464c52Smaya if (shader) { 52399f464c52Smaya struct iris_bo *bo = iris_resource_bo(shader->assembly.res); 52407ec681f3Smrg iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_NONE); 52419f464c52Smaya 52427ec681f3Smrg pin_scratch_space(ice, batch, shader->prog_data, stage); 52439f464c52Smaya } 52449f464c52Smaya } 52459f464c52Smaya } 52469f464c52Smaya 52479f464c52Smaya if ((clean & IRIS_DIRTY_DEPTH_BUFFER) && 52489f464c52Smaya (clean & IRIS_DIRTY_WM_DEPTH_STENCIL)) { 52499f464c52Smaya struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 52509f464c52Smaya pin_depth_and_stencil_buffers(batch, cso_fb->zsbuf, ice->state.cso_zsa); 52519f464c52Smaya } 52529f464c52Smaya 52537ec681f3Smrg iris_use_optional_res(batch, ice->state.last_res.index_buffer, false, 52547ec681f3Smrg IRIS_DOMAIN_VF_READ); 52559f464c52Smaya 52569f464c52Smaya if (clean & IRIS_DIRTY_VERTEX_BUFFERS) { 52579f464c52Smaya uint64_t bound = ice->state.bound_vertex_buffers; 52589f464c52Smaya while (bound) { 52599f464c52Smaya const int i = u_bit_scan64(&bound); 52609f464c52Smaya struct pipe_resource *res = genx->vertex_buffers[i].resource; 52617ec681f3Smrg iris_use_pinned_bo(batch, iris_resource_bo(res), false, 52627ec681f3Smrg IRIS_DOMAIN_VF_READ); 52639f464c52Smaya } 52649f464c52Smaya } 52659f464c52Smaya} 52669f464c52Smaya 52679f464c52Smayastatic void 52689f464c52Smayairis_restore_compute_saved_bos(struct iris_context *ice, 52699f464c52Smaya struct iris_batch *batch, 52709f464c52Smaya const struct pipe_grid_info *grid) 52719f464c52Smaya{ 52727ec681f3Smrg const uint64_t stage_clean = ~ice->state.stage_dirty; 52739f464c52Smaya 52749f464c52Smaya const int stage = MESA_SHADER_COMPUTE; 52759f464c52Smaya struct iris_shader_state *shs = &ice->state.shaders[stage]; 52769f464c52Smaya 52777ec681f3Smrg if (stage_clean & IRIS_STAGE_DIRTY_BINDINGS_CS) { 52787ec681f3Smrg /* Re-pin any buffers referred to by the binding table. */ 52797ec681f3Smrg iris_populate_binding_table(ice, batch, stage, true); 52807ec681f3Smrg } 52817ec681f3Smrg 52827ec681f3Smrg struct pipe_resource *sampler_res = shs->sampler_table.res; 52837ec681f3Smrg if (sampler_res) 52847ec681f3Smrg iris_use_pinned_bo(batch, iris_resource_bo(sampler_res), false, 52857ec681f3Smrg IRIS_DOMAIN_NONE); 52867ec681f3Smrg 52877ec681f3Smrg if ((stage_clean & IRIS_STAGE_DIRTY_SAMPLER_STATES_CS) && 52887ec681f3Smrg (stage_clean & IRIS_STAGE_DIRTY_BINDINGS_CS) && 52897ec681f3Smrg (stage_clean & IRIS_STAGE_DIRTY_CONSTANTS_CS) && 52907ec681f3Smrg (stage_clean & IRIS_STAGE_DIRTY_CS)) { 52917ec681f3Smrg iris_use_optional_res(batch, ice->state.last_res.cs_desc, false, 52927ec681f3Smrg IRIS_DOMAIN_NONE); 52937ec681f3Smrg } 52947ec681f3Smrg 52957ec681f3Smrg if (stage_clean & IRIS_STAGE_DIRTY_CS) { 52967ec681f3Smrg struct iris_compiled_shader *shader = ice->shaders.prog[stage]; 52977ec681f3Smrg 52987ec681f3Smrg if (shader) { 52997ec681f3Smrg struct iris_bo *bo = iris_resource_bo(shader->assembly.res); 53007ec681f3Smrg iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_NONE); 53017ec681f3Smrg 53027ec681f3Smrg if (GFX_VERx10 < 125) { 53037ec681f3Smrg struct iris_bo *curbe_bo = 53047ec681f3Smrg iris_resource_bo(ice->state.last_res.cs_thread_ids); 53057ec681f3Smrg iris_use_pinned_bo(batch, curbe_bo, false, IRIS_DOMAIN_NONE); 53067ec681f3Smrg } 53077ec681f3Smrg 53087ec681f3Smrg pin_scratch_space(ice, batch, shader->prog_data, stage); 53097ec681f3Smrg } 53107ec681f3Smrg } 53117ec681f3Smrg} 53127ec681f3Smrg 53137ec681f3Smrg/** 53147ec681f3Smrg * Possibly emit STATE_BASE_ADDRESS to update Surface State Base Address. 53157ec681f3Smrg */ 53167ec681f3Smrgstatic void 53177ec681f3Smrgiris_update_surface_base_address(struct iris_batch *batch, 53187ec681f3Smrg struct iris_binder *binder) 53197ec681f3Smrg{ 53207ec681f3Smrg if (batch->last_surface_base_address == binder->bo->address) 53217ec681f3Smrg return; 53227ec681f3Smrg 53237ec681f3Smrg struct isl_device *isl_dev = &batch->screen->isl_dev; 53247ec681f3Smrg uint32_t mocs = isl_mocs(isl_dev, 0, false); 53257ec681f3Smrg 53267ec681f3Smrg iris_batch_sync_region_start(batch); 53277ec681f3Smrg 53287ec681f3Smrg flush_before_state_base_change(batch); 53297ec681f3Smrg 53307ec681f3Smrg#if GFX_VER == 12 53317ec681f3Smrg /* Wa_1607854226: 53327ec681f3Smrg * 53337ec681f3Smrg * Workaround the non pipelined state not applying in MEDIA/GPGPU pipeline 53347ec681f3Smrg * mode by putting the pipeline temporarily in 3D mode.. 53357ec681f3Smrg */ 53367ec681f3Smrg if (batch->name == IRIS_BATCH_COMPUTE) 53377ec681f3Smrg emit_pipeline_select(batch, _3D); 53387ec681f3Smrg#endif 53397ec681f3Smrg 53407ec681f3Smrg iris_emit_cmd(batch, GENX(STATE_BASE_ADDRESS), sba) { 53417ec681f3Smrg sba.SurfaceStateBaseAddressModifyEnable = true; 53427ec681f3Smrg sba.SurfaceStateBaseAddress = ro_bo(binder->bo, 0); 53437ec681f3Smrg 53447ec681f3Smrg /* The hardware appears to pay attention to the MOCS fields even 53457ec681f3Smrg * if you don't set the "Address Modify Enable" bit for the base. 53467ec681f3Smrg */ 53477ec681f3Smrg sba.GeneralStateMOCS = mocs; 53487ec681f3Smrg sba.StatelessDataPortAccessMOCS = mocs; 53497ec681f3Smrg sba.DynamicStateMOCS = mocs; 53507ec681f3Smrg sba.IndirectObjectMOCS = mocs; 53517ec681f3Smrg sba.InstructionMOCS = mocs; 53527ec681f3Smrg sba.SurfaceStateMOCS = mocs; 53537ec681f3Smrg#if GFX_VER >= 9 53547ec681f3Smrg sba.BindlessSurfaceStateMOCS = mocs; 53557ec681f3Smrg#endif 53567ec681f3Smrg } 53577ec681f3Smrg 53587ec681f3Smrg#if GFX_VER == 12 53597ec681f3Smrg /* Wa_1607854226: 53607ec681f3Smrg * 53617ec681f3Smrg * Put the pipeline back into compute mode. 53627ec681f3Smrg */ 53637ec681f3Smrg if (batch->name == IRIS_BATCH_COMPUTE) 53647ec681f3Smrg emit_pipeline_select(batch, GPGPU); 53657ec681f3Smrg#endif 53667ec681f3Smrg 53677ec681f3Smrg flush_after_state_base_change(batch); 53687ec681f3Smrg iris_batch_sync_region_end(batch); 53697ec681f3Smrg 53707ec681f3Smrg batch->last_surface_base_address = binder->bo->address; 53717ec681f3Smrg} 53727ec681f3Smrg 53737ec681f3Smrgstatic inline void 53747ec681f3Smrgiris_viewport_zmin_zmax(const struct pipe_viewport_state *vp, bool halfz, 53757ec681f3Smrg bool window_space_position, float *zmin, float *zmax) 53767ec681f3Smrg{ 53777ec681f3Smrg if (window_space_position) { 53787ec681f3Smrg *zmin = 0.f; 53797ec681f3Smrg *zmax = 1.f; 53807ec681f3Smrg return; 53817ec681f3Smrg } 53827ec681f3Smrg util_viewport_zmin_zmax(vp, halfz, zmin, zmax); 53837ec681f3Smrg} 53847ec681f3Smrg 53857ec681f3Smrg#if GFX_VER >= 12 53867ec681f3Smrgvoid 53877ec681f3SmrggenX(invalidate_aux_map_state)(struct iris_batch *batch) 53887ec681f3Smrg{ 53897ec681f3Smrg struct iris_screen *screen = batch->screen; 53907ec681f3Smrg void *aux_map_ctx = iris_bufmgr_get_aux_map_context(screen->bufmgr); 53917ec681f3Smrg if (!aux_map_ctx) 53927ec681f3Smrg return; 53937ec681f3Smrg uint32_t aux_map_state_num = intel_aux_map_get_state_num(aux_map_ctx); 53947ec681f3Smrg if (batch->last_aux_map_state != aux_map_state_num) { 53957ec681f3Smrg /* HSD 1209978178: docs say that before programming the aux table: 53967ec681f3Smrg * 53977ec681f3Smrg * "Driver must ensure that the engine is IDLE but ensure it doesn't 53987ec681f3Smrg * add extra flushes in the case it knows that the engine is already 53997ec681f3Smrg * IDLE." 54007ec681f3Smrg * 54017ec681f3Smrg * An end of pipe sync is needed here, otherwise we see GPU hangs in 54027ec681f3Smrg * dEQP-GLES31.functional.copy_image.* tests. 54037ec681f3Smrg */ 54047ec681f3Smrg iris_emit_end_of_pipe_sync(batch, "Invalidate aux map table", 54057ec681f3Smrg PIPE_CONTROL_CS_STALL); 54067ec681f3Smrg 54077ec681f3Smrg /* If the aux-map state number increased, then we need to rewrite the 54087ec681f3Smrg * register. Rewriting the register is used to both set the aux-map 54097ec681f3Smrg * translation table address, and also to invalidate any previously 54107ec681f3Smrg * cached translations. 54117ec681f3Smrg */ 54127ec681f3Smrg iris_load_register_imm32(batch, GENX(GFX_CCS_AUX_INV_num), 1); 54137ec681f3Smrg batch->last_aux_map_state = aux_map_state_num; 54147ec681f3Smrg } 54157ec681f3Smrg} 54167ec681f3Smrg 54177ec681f3Smrgstatic void 54187ec681f3Smrginit_aux_map_state(struct iris_batch *batch) 54197ec681f3Smrg{ 54207ec681f3Smrg struct iris_screen *screen = batch->screen; 54217ec681f3Smrg void *aux_map_ctx = iris_bufmgr_get_aux_map_context(screen->bufmgr); 54227ec681f3Smrg if (!aux_map_ctx) 54237ec681f3Smrg return; 54247ec681f3Smrg 54257ec681f3Smrg uint64_t base_addr = intel_aux_map_get_base(aux_map_ctx); 54267ec681f3Smrg assert(base_addr != 0 && align64(base_addr, 32 * 1024) == base_addr); 54277ec681f3Smrg iris_load_register_imm64(batch, GENX(GFX_AUX_TABLE_BASE_ADDR_num), 54287ec681f3Smrg base_addr); 54297ec681f3Smrg} 54307ec681f3Smrg#endif 54317ec681f3Smrg 54327ec681f3Smrgstruct push_bos { 54337ec681f3Smrg struct { 54347ec681f3Smrg struct iris_address addr; 54357ec681f3Smrg uint32_t length; 54367ec681f3Smrg } buffers[4]; 54377ec681f3Smrg int buffer_count; 54387ec681f3Smrg uint32_t max_length; 54397ec681f3Smrg}; 54407ec681f3Smrg 54417ec681f3Smrgstatic void 54427ec681f3Smrgsetup_constant_buffers(struct iris_context *ice, 54437ec681f3Smrg struct iris_batch *batch, 54447ec681f3Smrg int stage, 54457ec681f3Smrg struct push_bos *push_bos) 54467ec681f3Smrg{ 54477ec681f3Smrg struct iris_shader_state *shs = &ice->state.shaders[stage]; 54487ec681f3Smrg struct iris_compiled_shader *shader = ice->shaders.prog[stage]; 54497ec681f3Smrg struct brw_stage_prog_data *prog_data = (void *) shader->prog_data; 54507ec681f3Smrg 54517ec681f3Smrg uint32_t push_range_sum = 0; 54529f464c52Smaya 54537ec681f3Smrg int n = 0; 54547ec681f3Smrg for (int i = 0; i < 4; i++) { 54557ec681f3Smrg const struct brw_ubo_range *range = &prog_data->ubo_ranges[i]; 54569f464c52Smaya 54577ec681f3Smrg if (range->length == 0) 54587ec681f3Smrg continue; 54599f464c52Smaya 54607ec681f3Smrg push_range_sum += range->length; 54619f464c52Smaya 54627ec681f3Smrg if (range->length > push_bos->max_length) 54637ec681f3Smrg push_bos->max_length = range->length; 54649f464c52Smaya 54657ec681f3Smrg /* Range block is a binding table index, map back to UBO index. */ 54667ec681f3Smrg unsigned block_index = iris_bti_to_group_index( 54677ec681f3Smrg &shader->bt, IRIS_SURFACE_GROUP_UBO, range->block); 54687ec681f3Smrg assert(block_index != IRIS_SURFACE_NOT_USED); 54699f464c52Smaya 54707ec681f3Smrg struct pipe_shader_buffer *cbuf = &shs->constbuf[block_index]; 54717ec681f3Smrg struct iris_resource *res = (void *) cbuf->buffer; 54729f464c52Smaya 54737ec681f3Smrg assert(cbuf->buffer_offset % 32 == 0); 54749f464c52Smaya 54757ec681f3Smrg push_bos->buffers[n].length = range->length; 54767ec681f3Smrg push_bos->buffers[n].addr = 54777ec681f3Smrg res ? ro_bo(res->bo, range->start * 32 + cbuf->buffer_offset) 54787ec681f3Smrg : batch->screen->workaround_address; 54797ec681f3Smrg n++; 54807ec681f3Smrg } 54819f464c52Smaya 54827ec681f3Smrg /* From the 3DSTATE_CONSTANT_XS and 3DSTATE_CONSTANT_ALL programming notes: 54837ec681f3Smrg * 54847ec681f3Smrg * "The sum of all four read length fields must be less than or 54857ec681f3Smrg * equal to the size of 64." 54867ec681f3Smrg */ 54877ec681f3Smrg assert(push_range_sum <= 64); 54887ec681f3Smrg 54897ec681f3Smrg push_bos->buffer_count = n; 54907ec681f3Smrg} 54917ec681f3Smrg 54927ec681f3Smrgstatic void 54937ec681f3Smrgemit_push_constant_packets(struct iris_context *ice, 54947ec681f3Smrg struct iris_batch *batch, 54957ec681f3Smrg int stage, 54967ec681f3Smrg const struct push_bos *push_bos) 54977ec681f3Smrg{ 54987ec681f3Smrg UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev; 54997ec681f3Smrg struct iris_compiled_shader *shader = ice->shaders.prog[stage]; 55007ec681f3Smrg struct brw_stage_prog_data *prog_data = (void *) shader->prog_data; 55017ec681f3Smrg 55027ec681f3Smrg iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_VS), pkt) { 55037ec681f3Smrg pkt._3DCommandSubOpcode = push_constant_opcodes[stage]; 55047ec681f3Smrg#if GFX_VER >= 12 55057ec681f3Smrg pkt.MOCS = isl_mocs(isl_dev, 0, false); 55067ec681f3Smrg#endif 55077ec681f3Smrg if (prog_data) { 55087ec681f3Smrg /* The Skylake PRM contains the following restriction: 55097ec681f3Smrg * 55107ec681f3Smrg * "The driver must ensure The following case does not occur 55117ec681f3Smrg * without a flush to the 3D engine: 3DSTATE_CONSTANT_* with 55127ec681f3Smrg * buffer 3 read length equal to zero committed followed by a 55137ec681f3Smrg * 3DSTATE_CONSTANT_* with buffer 0 read length not equal to 55147ec681f3Smrg * zero committed." 55157ec681f3Smrg * 55167ec681f3Smrg * To avoid this, we program the buffers in the highest slots. 55177ec681f3Smrg * This way, slot 0 is only used if slot 3 is also used. 55187ec681f3Smrg */ 55197ec681f3Smrg int n = push_bos->buffer_count; 55207ec681f3Smrg assert(n <= 4); 55217ec681f3Smrg const unsigned shift = 4 - n; 55227ec681f3Smrg for (int i = 0; i < n; i++) { 55237ec681f3Smrg pkt.ConstantBody.ReadLength[i + shift] = 55247ec681f3Smrg push_bos->buffers[i].length; 55257ec681f3Smrg pkt.ConstantBody.Buffer[i + shift] = push_bos->buffers[i].addr; 55269f464c52Smaya } 55279f464c52Smaya } 55289f464c52Smaya } 55299f464c52Smaya} 55309f464c52Smaya 55317ec681f3Smrg#if GFX_VER >= 12 55329f464c52Smayastatic void 55337ec681f3Smrgemit_push_constant_packet_all(struct iris_context *ice, 55347ec681f3Smrg struct iris_batch *batch, 55357ec681f3Smrg uint32_t shader_mask, 55367ec681f3Smrg const struct push_bos *push_bos) 55379f464c52Smaya{ 55387ec681f3Smrg struct isl_device *isl_dev = &batch->screen->isl_dev; 55397ec681f3Smrg 55407ec681f3Smrg if (!push_bos) { 55417ec681f3Smrg iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_ALL), pc) { 55427ec681f3Smrg pc.ShaderUpdateEnable = shader_mask; 55437ec681f3Smrg } 55449f464c52Smaya return; 55457ec681f3Smrg } 55469f464c52Smaya 55477ec681f3Smrg const uint32_t n = push_bos->buffer_count; 55487ec681f3Smrg const uint32_t max_pointers = 4; 55497ec681f3Smrg const uint32_t num_dwords = 2 + 2 * n; 55507ec681f3Smrg uint32_t const_all[2 + 2 * max_pointers]; 55517ec681f3Smrg uint32_t *dw = &const_all[0]; 55529f464c52Smaya 55537ec681f3Smrg assert(n <= max_pointers); 55547ec681f3Smrg iris_pack_command(GENX(3DSTATE_CONSTANT_ALL), dw, all) { 55557ec681f3Smrg all.DWordLength = num_dwords - 2; 55567ec681f3Smrg all.MOCS = isl_mocs(isl_dev, 0, false); 55577ec681f3Smrg all.ShaderUpdateEnable = shader_mask; 55587ec681f3Smrg all.PointerBufferMask = (1 << n) - 1; 55597ec681f3Smrg } 55607ec681f3Smrg dw += 2; 55617ec681f3Smrg 55627ec681f3Smrg for (int i = 0; i < n; i++) { 55637ec681f3Smrg _iris_pack_state(batch, GENX(3DSTATE_CONSTANT_ALL_DATA), 55647ec681f3Smrg dw + i * 2, data) { 55657ec681f3Smrg data.PointerToConstantBuffer = push_bos->buffers[i].addr; 55667ec681f3Smrg data.ConstantBufferReadLength = push_bos->buffers[i].length; 55677ec681f3Smrg } 55687ec681f3Smrg } 55697ec681f3Smrg iris_batch_emit(batch, const_all, sizeof(uint32_t) * num_dwords); 55707ec681f3Smrg} 55717ec681f3Smrg#endif 55727ec681f3Smrg 55737ec681f3Smrgvoid 55747ec681f3SmrggenX(emit_depth_state_workarounds)(struct iris_context *ice, 55757ec681f3Smrg struct iris_batch *batch, 55767ec681f3Smrg const struct isl_surf *surf) 55777ec681f3Smrg{ 55787ec681f3Smrg#if GFX_VERx10 == 120 55797ec681f3Smrg const bool fmt_is_d16 = surf->format == ISL_FORMAT_R16_UNORM; 55807ec681f3Smrg 55817ec681f3Smrg switch (ice->state.genx->depth_reg_mode) { 55827ec681f3Smrg case IRIS_DEPTH_REG_MODE_HW_DEFAULT: 55837ec681f3Smrg if (!fmt_is_d16) 55847ec681f3Smrg return; 55857ec681f3Smrg break; 55867ec681f3Smrg case IRIS_DEPTH_REG_MODE_D16: 55877ec681f3Smrg if (fmt_is_d16) 55887ec681f3Smrg return; 55897ec681f3Smrg break; 55907ec681f3Smrg case IRIS_DEPTH_REG_MODE_UNKNOWN: 55917ec681f3Smrg break; 55927ec681f3Smrg } 55937ec681f3Smrg 55947ec681f3Smrg /* We'll change some CHICKEN registers depending on the depth surface 55957ec681f3Smrg * format. Do a depth flush and stall so the pipeline is not using these 55967ec681f3Smrg * settings while we change the registers. 55977ec681f3Smrg */ 55987ec681f3Smrg iris_emit_end_of_pipe_sync(batch, 55997ec681f3Smrg "Workaround: Stop pipeline for 14010455700", 56007ec681f3Smrg PIPE_CONTROL_DEPTH_STALL | 56017ec681f3Smrg PIPE_CONTROL_DEPTH_CACHE_FLUSH); 56027ec681f3Smrg 56037ec681f3Smrg /* Wa_14010455700 56047ec681f3Smrg * 56057ec681f3Smrg * To avoid sporadic corruptions “Set 0x7010[9] when Depth Buffer 56067ec681f3Smrg * Surface Format is D16_UNORM , surface type is not NULL & 1X_MSAA”. 56077ec681f3Smrg */ 56087ec681f3Smrg iris_emit_reg(batch, GENX(COMMON_SLICE_CHICKEN1), reg) { 56097ec681f3Smrg reg.HIZPlaneOptimizationdisablebit = fmt_is_d16 && surf->samples == 1; 56107ec681f3Smrg reg.HIZPlaneOptimizationdisablebitMask = true; 56117ec681f3Smrg } 56127ec681f3Smrg 56137ec681f3Smrg /* Wa_1806527549 56147ec681f3Smrg * 56157ec681f3Smrg * Set HIZ_CHICKEN (7018h) bit 13 = 1 when depth buffer is D16_UNORM. 56167ec681f3Smrg */ 56177ec681f3Smrg iris_emit_reg(batch, GENX(HIZ_CHICKEN), reg) { 56187ec681f3Smrg reg.HZDepthTestLEGEOptimizationDisable = fmt_is_d16; 56197ec681f3Smrg reg.HZDepthTestLEGEOptimizationDisableMask = true; 56209f464c52Smaya } 56219f464c52Smaya 56227ec681f3Smrg ice->state.genx->depth_reg_mode = 56237ec681f3Smrg fmt_is_d16 ? IRIS_DEPTH_REG_MODE_D16 : IRIS_DEPTH_REG_MODE_HW_DEFAULT; 56247ec681f3Smrg#endif 56259f464c52Smaya} 56269f464c52Smaya 56279f464c52Smayastatic void 56289f464c52Smayairis_upload_dirty_render_state(struct iris_context *ice, 56299f464c52Smaya struct iris_batch *batch, 56309f464c52Smaya const struct pipe_draw_info *draw) 56319f464c52Smaya{ 56329f464c52Smaya const uint64_t dirty = ice->state.dirty; 56337ec681f3Smrg const uint64_t stage_dirty = ice->state.stage_dirty; 56349f464c52Smaya 56357ec681f3Smrg if (!(dirty & IRIS_ALL_DIRTY_FOR_RENDER) && 56367ec681f3Smrg !(stage_dirty & IRIS_ALL_STAGE_DIRTY_FOR_RENDER)) 56379f464c52Smaya return; 56389f464c52Smaya 56399f464c52Smaya struct iris_genx_state *genx = ice->state.genx; 56409f464c52Smaya struct iris_binder *binder = &ice->state.binder; 56419f464c52Smaya struct brw_wm_prog_data *wm_prog_data = (void *) 56429f464c52Smaya ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data; 56439f464c52Smaya 56449f464c52Smaya if (dirty & IRIS_DIRTY_CC_VIEWPORT) { 56459f464c52Smaya const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; 56469f464c52Smaya uint32_t cc_vp_address; 56479f464c52Smaya 56489f464c52Smaya /* XXX: could avoid streaming for depth_clip [0,1] case. */ 56499f464c52Smaya uint32_t *cc_vp_map = 56509f464c52Smaya stream_state(batch, ice->state.dynamic_uploader, 56519f464c52Smaya &ice->state.last_res.cc_vp, 56529f464c52Smaya 4 * ice->state.num_viewports * 56539f464c52Smaya GENX(CC_VIEWPORT_length), 32, &cc_vp_address); 56549f464c52Smaya for (int i = 0; i < ice->state.num_viewports; i++) { 56559f464c52Smaya float zmin, zmax; 56567ec681f3Smrg iris_viewport_zmin_zmax(&ice->state.viewports[i], cso_rast->clip_halfz, 56577ec681f3Smrg ice->state.window_space_position, 56587ec681f3Smrg &zmin, &zmax); 56599f464c52Smaya if (cso_rast->depth_clip_near) 56609f464c52Smaya zmin = 0.0; 56619f464c52Smaya if (cso_rast->depth_clip_far) 56629f464c52Smaya zmax = 1.0; 56639f464c52Smaya 56649f464c52Smaya iris_pack_state(GENX(CC_VIEWPORT), cc_vp_map, ccv) { 56659f464c52Smaya ccv.MinimumDepth = zmin; 56669f464c52Smaya ccv.MaximumDepth = zmax; 56679f464c52Smaya } 56689f464c52Smaya 56699f464c52Smaya cc_vp_map += GENX(CC_VIEWPORT_length); 56709f464c52Smaya } 56719f464c52Smaya 56729f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), ptr) { 56739f464c52Smaya ptr.CCViewportPointer = cc_vp_address; 56749f464c52Smaya } 56759f464c52Smaya } 56769f464c52Smaya 56779f464c52Smaya if (dirty & IRIS_DIRTY_SF_CL_VIEWPORT) { 56789f464c52Smaya struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 56799f464c52Smaya uint32_t sf_cl_vp_address; 56809f464c52Smaya uint32_t *vp_map = 56819f464c52Smaya stream_state(batch, ice->state.dynamic_uploader, 56829f464c52Smaya &ice->state.last_res.sf_cl_vp, 56839f464c52Smaya 4 * ice->state.num_viewports * 56849f464c52Smaya GENX(SF_CLIP_VIEWPORT_length), 64, &sf_cl_vp_address); 56859f464c52Smaya 56869f464c52Smaya for (unsigned i = 0; i < ice->state.num_viewports; i++) { 56879f464c52Smaya const struct pipe_viewport_state *state = &ice->state.viewports[i]; 56889f464c52Smaya float gb_xmin, gb_xmax, gb_ymin, gb_ymax; 56899f464c52Smaya 56909f464c52Smaya float vp_xmin = viewport_extent(state, 0, -1.0f); 56919f464c52Smaya float vp_xmax = viewport_extent(state, 0, 1.0f); 56929f464c52Smaya float vp_ymin = viewport_extent(state, 1, -1.0f); 56939f464c52Smaya float vp_ymax = viewport_extent(state, 1, 1.0f); 56949f464c52Smaya 56957ec681f3Smrg intel_calculate_guardband_size(cso_fb->width, cso_fb->height, 56967ec681f3Smrg state->scale[0], state->scale[1], 56977ec681f3Smrg state->translate[0], state->translate[1], 56987ec681f3Smrg &gb_xmin, &gb_xmax, &gb_ymin, &gb_ymax); 56999f464c52Smaya 57009f464c52Smaya iris_pack_state(GENX(SF_CLIP_VIEWPORT), vp_map, vp) { 57019f464c52Smaya vp.ViewportMatrixElementm00 = state->scale[0]; 57029f464c52Smaya vp.ViewportMatrixElementm11 = state->scale[1]; 57039f464c52Smaya vp.ViewportMatrixElementm22 = state->scale[2]; 57049f464c52Smaya vp.ViewportMatrixElementm30 = state->translate[0]; 57059f464c52Smaya vp.ViewportMatrixElementm31 = state->translate[1]; 57069f464c52Smaya vp.ViewportMatrixElementm32 = state->translate[2]; 57079f464c52Smaya vp.XMinClipGuardband = gb_xmin; 57089f464c52Smaya vp.XMaxClipGuardband = gb_xmax; 57099f464c52Smaya vp.YMinClipGuardband = gb_ymin; 57109f464c52Smaya vp.YMaxClipGuardband = gb_ymax; 57119f464c52Smaya vp.XMinViewPort = MAX2(vp_xmin, 0); 57129f464c52Smaya vp.XMaxViewPort = MIN2(vp_xmax, cso_fb->width) - 1; 57139f464c52Smaya vp.YMinViewPort = MAX2(vp_ymin, 0); 57149f464c52Smaya vp.YMaxViewPort = MIN2(vp_ymax, cso_fb->height) - 1; 57159f464c52Smaya } 57169f464c52Smaya 57179f464c52Smaya vp_map += GENX(SF_CLIP_VIEWPORT_length); 57189f464c52Smaya } 57199f464c52Smaya 57209f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), ptr) { 57219f464c52Smaya ptr.SFClipViewportPointer = sf_cl_vp_address; 57229f464c52Smaya } 57239f464c52Smaya } 57249f464c52Smaya 57259f464c52Smaya if (dirty & IRIS_DIRTY_URB) { 57269f464c52Smaya for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { 57279f464c52Smaya if (!ice->shaders.prog[i]) { 57287ec681f3Smrg ice->shaders.urb.size[i] = 1; 57299f464c52Smaya } else { 57309f464c52Smaya struct brw_vue_prog_data *vue_prog_data = 57319f464c52Smaya (void *) ice->shaders.prog[i]->prog_data; 57327ec681f3Smrg ice->shaders.urb.size[i] = vue_prog_data->urb_entry_size; 57339f464c52Smaya } 57347ec681f3Smrg assert(ice->shaders.urb.size[i] != 0); 57359f464c52Smaya } 57369f464c52Smaya 57377ec681f3Smrg intel_get_urb_config(&batch->screen->devinfo, 57387ec681f3Smrg batch->screen->l3_config_3d, 57399f464c52Smaya ice->shaders.prog[MESA_SHADER_TESS_EVAL] != NULL, 57407ec681f3Smrg ice->shaders.prog[MESA_SHADER_GEOMETRY] != NULL, 57417ec681f3Smrg ice->shaders.urb.size, 57427ec681f3Smrg ice->shaders.urb.entries, 57437ec681f3Smrg ice->shaders.urb.start, 57447ec681f3Smrg &ice->state.urb_deref_block_size, 57457ec681f3Smrg &ice->shaders.urb.constrained); 57467ec681f3Smrg 57477ec681f3Smrg for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { 57487ec681f3Smrg iris_emit_cmd(batch, GENX(3DSTATE_URB_VS), urb) { 57497ec681f3Smrg urb._3DCommandSubOpcode += i; 57507ec681f3Smrg urb.VSURBStartingAddress = ice->shaders.urb.start[i]; 57517ec681f3Smrg urb.VSURBEntryAllocationSize = ice->shaders.urb.size[i] - 1; 57527ec681f3Smrg urb.VSNumberofURBEntries = ice->shaders.urb.entries[i]; 57537ec681f3Smrg } 57547ec681f3Smrg } 57559f464c52Smaya } 57569f464c52Smaya 57579f464c52Smaya if (dirty & IRIS_DIRTY_BLEND_STATE) { 57589f464c52Smaya struct iris_blend_state *cso_blend = ice->state.cso_blend; 57599f464c52Smaya struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 57609f464c52Smaya struct iris_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa; 57619f464c52Smaya const int header_dwords = GENX(BLEND_STATE_length); 57629f464c52Smaya 57639f464c52Smaya /* Always write at least one BLEND_STATE - the final RT message will 57649f464c52Smaya * reference BLEND_STATE[0] even if there aren't color writes. There 57659f464c52Smaya * may still be alpha testing, computed depth, and so on. 57669f464c52Smaya */ 57679f464c52Smaya const int rt_dwords = 57689f464c52Smaya MAX2(cso_fb->nr_cbufs, 1) * GENX(BLEND_STATE_ENTRY_length); 57699f464c52Smaya 57709f464c52Smaya uint32_t blend_offset; 57719f464c52Smaya uint32_t *blend_map = 57729f464c52Smaya stream_state(batch, ice->state.dynamic_uploader, 57739f464c52Smaya &ice->state.last_res.blend, 57749f464c52Smaya 4 * (header_dwords + rt_dwords), 64, &blend_offset); 57759f464c52Smaya 57769f464c52Smaya uint32_t blend_state_header; 57779f464c52Smaya iris_pack_state(GENX(BLEND_STATE), &blend_state_header, bs) { 57787ec681f3Smrg bs.AlphaTestEnable = cso_zsa->alpha_enabled; 57797ec681f3Smrg bs.AlphaTestFunction = translate_compare_func(cso_zsa->alpha_func); 57809f464c52Smaya } 57819f464c52Smaya 57829f464c52Smaya blend_map[0] = blend_state_header | cso_blend->blend_state[0]; 57839f464c52Smaya memcpy(&blend_map[1], &cso_blend->blend_state[1], 4 * rt_dwords); 57849f464c52Smaya 57859f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), ptr) { 57869f464c52Smaya ptr.BlendStatePointer = blend_offset; 57879f464c52Smaya ptr.BlendStatePointerValid = true; 57889f464c52Smaya } 57899f464c52Smaya } 57909f464c52Smaya 57919f464c52Smaya if (dirty & IRIS_DIRTY_COLOR_CALC_STATE) { 57929f464c52Smaya struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa; 57937ec681f3Smrg#if GFX_VER == 8 57949f464c52Smaya struct pipe_stencil_ref *p_stencil_refs = &ice->state.stencil_ref; 57959f464c52Smaya#endif 57969f464c52Smaya uint32_t cc_offset; 57979f464c52Smaya void *cc_map = 57989f464c52Smaya stream_state(batch, ice->state.dynamic_uploader, 57999f464c52Smaya &ice->state.last_res.color_calc, 58009f464c52Smaya sizeof(uint32_t) * GENX(COLOR_CALC_STATE_length), 58019f464c52Smaya 64, &cc_offset); 58029f464c52Smaya iris_pack_state(GENX(COLOR_CALC_STATE), cc_map, cc) { 58039f464c52Smaya cc.AlphaTestFormat = ALPHATEST_FLOAT32; 58047ec681f3Smrg cc.AlphaReferenceValueAsFLOAT32 = cso->alpha_ref_value; 58059f464c52Smaya cc.BlendConstantColorRed = ice->state.blend_color.color[0]; 58069f464c52Smaya cc.BlendConstantColorGreen = ice->state.blend_color.color[1]; 58079f464c52Smaya cc.BlendConstantColorBlue = ice->state.blend_color.color[2]; 58089f464c52Smaya cc.BlendConstantColorAlpha = ice->state.blend_color.color[3]; 58097ec681f3Smrg#if GFX_VER == 8 58109f464c52Smaya cc.StencilReferenceValue = p_stencil_refs->ref_value[0]; 58119f464c52Smaya cc.BackfaceStencilReferenceValue = p_stencil_refs->ref_value[1]; 58129f464c52Smaya#endif 58139f464c52Smaya } 58149f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), ptr) { 58159f464c52Smaya ptr.ColorCalcStatePointer = cc_offset; 58169f464c52Smaya ptr.ColorCalcStatePointerValid = true; 58179f464c52Smaya } 58189f464c52Smaya } 58199f464c52Smaya 58207ec681f3Smrg /* Wa_1604061319 58217ec681f3Smrg * 58227ec681f3Smrg * 3DSTATE_CONSTANT_* needs to be programmed before BTP_* 58237ec681f3Smrg * 58247ec681f3Smrg * Testing shows that all the 3DSTATE_CONSTANT_XS need to be emitted if 58257ec681f3Smrg * any stage has a dirty binding table. 58267ec681f3Smrg */ 58277ec681f3Smrg const bool emit_const_wa = GFX_VER >= 11 && 58287ec681f3Smrg ((dirty & IRIS_DIRTY_RENDER_BUFFER) || 58297ec681f3Smrg (stage_dirty & IRIS_ALL_STAGE_DIRTY_BINDINGS_FOR_RENDER)); 58307ec681f3Smrg 58317ec681f3Smrg#if GFX_VER >= 12 58327ec681f3Smrg uint32_t nobuffer_stages = 0; 58337ec681f3Smrg#endif 58347ec681f3Smrg 58359f464c52Smaya for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { 58367ec681f3Smrg if (!(stage_dirty & (IRIS_STAGE_DIRTY_CONSTANTS_VS << stage)) && 58377ec681f3Smrg !emit_const_wa) 58389f464c52Smaya continue; 58399f464c52Smaya 58409f464c52Smaya struct iris_shader_state *shs = &ice->state.shaders[stage]; 58419f464c52Smaya struct iris_compiled_shader *shader = ice->shaders.prog[stage]; 58429f464c52Smaya 58439f464c52Smaya if (!shader) 58449f464c52Smaya continue; 58459f464c52Smaya 58467ec681f3Smrg if (shs->sysvals_need_upload) 58477ec681f3Smrg upload_sysvals(ice, stage, NULL); 58489f464c52Smaya 58497ec681f3Smrg struct push_bos push_bos = {}; 58507ec681f3Smrg setup_constant_buffers(ice, batch, stage, &push_bos); 58519f464c52Smaya 58527ec681f3Smrg#if GFX_VER >= 12 58537ec681f3Smrg /* If this stage doesn't have any push constants, emit it later in a 58547ec681f3Smrg * single CONSTANT_ALL packet with all the other stages. 58557ec681f3Smrg */ 58567ec681f3Smrg if (push_bos.buffer_count == 0) { 58577ec681f3Smrg nobuffer_stages |= 1 << stage; 58587ec681f3Smrg continue; 58597ec681f3Smrg } 58609f464c52Smaya 58617ec681f3Smrg /* The Constant Buffer Read Length field from 3DSTATE_CONSTANT_ALL 58627ec681f3Smrg * contains only 5 bits, so we can only use it for buffers smaller than 58637ec681f3Smrg * 32. 58647ec681f3Smrg */ 58657ec681f3Smrg if (push_bos.max_length < 32) { 58667ec681f3Smrg emit_push_constant_packet_all(ice, batch, 1 << stage, &push_bos); 58677ec681f3Smrg continue; 58689f464c52Smaya } 58697ec681f3Smrg#endif 58707ec681f3Smrg emit_push_constant_packets(ice, batch, stage, &push_bos); 58719f464c52Smaya } 58729f464c52Smaya 58737ec681f3Smrg#if GFX_VER >= 12 58747ec681f3Smrg if (nobuffer_stages) 58757ec681f3Smrg emit_push_constant_packet_all(ice, batch, nobuffer_stages, NULL); 58767ec681f3Smrg#endif 58777ec681f3Smrg 58789f464c52Smaya for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { 58797ec681f3Smrg /* Gfx9 requires 3DSTATE_BINDING_TABLE_POINTERS_XS to be re-emitted 58807ec681f3Smrg * in order to commit constants. TODO: Investigate "Disable Gather 58817ec681f3Smrg * at Set Shader" to go back to legacy mode... 58827ec681f3Smrg */ 58837ec681f3Smrg if (stage_dirty & ((IRIS_STAGE_DIRTY_BINDINGS_VS | 58847ec681f3Smrg (GFX_VER == 9 ? IRIS_STAGE_DIRTY_CONSTANTS_VS : 0)) 58857ec681f3Smrg << stage)) { 58869f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), ptr) { 58879f464c52Smaya ptr._3DCommandSubOpcode = 38 + stage; 58889f464c52Smaya ptr.PointertoVSBindingTable = binder->bt_offset[stage]; 58899f464c52Smaya } 58909f464c52Smaya } 58919f464c52Smaya } 58929f464c52Smaya 58937ec681f3Smrg if (GFX_VER >= 11 && (dirty & IRIS_DIRTY_RENDER_BUFFER)) { 58947ec681f3Smrg // XXX: we may want to flag IRIS_DIRTY_MULTISAMPLE (or SAMPLE_MASK?) 58957ec681f3Smrg // XXX: see commit 979fc1bc9bcc64027ff2cfafd285676f31b930a6 58967ec681f3Smrg 58977ec681f3Smrg /* The PIPE_CONTROL command description says: 58987ec681f3Smrg * 58997ec681f3Smrg * "Whenever a Binding Table Index (BTI) used by a Render Target 59007ec681f3Smrg * Message points to a different RENDER_SURFACE_STATE, SW must issue a 59017ec681f3Smrg * Render Target Cache Flush by enabling this bit. When render target 59027ec681f3Smrg * flush is set due to new association of BTI, PS Scoreboard Stall bit 59037ec681f3Smrg * must be set in this packet." 59047ec681f3Smrg */ 59057ec681f3Smrg // XXX: does this need to happen at 3DSTATE_BTP_PS time? 59067ec681f3Smrg iris_emit_pipe_control_flush(batch, "workaround: RT BTI change [draw]", 59077ec681f3Smrg PIPE_CONTROL_RENDER_TARGET_FLUSH | 59087ec681f3Smrg PIPE_CONTROL_STALL_AT_SCOREBOARD); 59097ec681f3Smrg } 59107ec681f3Smrg 59119f464c52Smaya for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { 59127ec681f3Smrg if (stage_dirty & (IRIS_STAGE_DIRTY_BINDINGS_VS << stage)) { 59139f464c52Smaya iris_populate_binding_table(ice, batch, stage, false); 59149f464c52Smaya } 59159f464c52Smaya } 59169f464c52Smaya 59179f464c52Smaya for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { 59187ec681f3Smrg if (!(stage_dirty & (IRIS_STAGE_DIRTY_SAMPLER_STATES_VS << stage)) || 59199f464c52Smaya !ice->shaders.prog[stage]) 59209f464c52Smaya continue; 59219f464c52Smaya 59229f464c52Smaya iris_upload_sampler_states(ice, stage); 59239f464c52Smaya 59249f464c52Smaya struct iris_shader_state *shs = &ice->state.shaders[stage]; 59259f464c52Smaya struct pipe_resource *res = shs->sampler_table.res; 59269f464c52Smaya if (res) 59277ec681f3Smrg iris_use_pinned_bo(batch, iris_resource_bo(res), false, 59287ec681f3Smrg IRIS_DOMAIN_NONE); 59299f464c52Smaya 59309f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS), ptr) { 59319f464c52Smaya ptr._3DCommandSubOpcode = 43 + stage; 59329f464c52Smaya ptr.PointertoVSSamplerState = shs->sampler_table.offset; 59339f464c52Smaya } 59349f464c52Smaya } 59359f464c52Smaya 59369f464c52Smaya if (ice->state.need_border_colors) 59377ec681f3Smrg iris_use_pinned_bo(batch, ice->state.border_color_pool.bo, false, 59387ec681f3Smrg IRIS_DOMAIN_NONE); 59399f464c52Smaya 59409f464c52Smaya if (dirty & IRIS_DIRTY_MULTISAMPLE) { 59419f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_MULTISAMPLE), ms) { 59429f464c52Smaya ms.PixelLocation = 59439f464c52Smaya ice->state.cso_rast->half_pixel_center ? CENTER : UL_CORNER; 59449f464c52Smaya if (ice->state.framebuffer.samples > 0) 59459f464c52Smaya ms.NumberofMultisamples = ffs(ice->state.framebuffer.samples) - 1; 59469f464c52Smaya } 59479f464c52Smaya } 59489f464c52Smaya 59499f464c52Smaya if (dirty & IRIS_DIRTY_SAMPLE_MASK) { 59509f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_MASK), ms) { 59519f464c52Smaya ms.SampleMask = ice->state.sample_mask; 59529f464c52Smaya } 59539f464c52Smaya } 59549f464c52Smaya 59559f464c52Smaya for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { 59567ec681f3Smrg if (!(stage_dirty & (IRIS_STAGE_DIRTY_VS << stage))) 59579f464c52Smaya continue; 59589f464c52Smaya 59599f464c52Smaya struct iris_compiled_shader *shader = ice->shaders.prog[stage]; 59609f464c52Smaya 59619f464c52Smaya if (shader) { 59629f464c52Smaya struct brw_stage_prog_data *prog_data = shader->prog_data; 59639f464c52Smaya struct iris_resource *cache = (void *) shader->assembly.res; 59647ec681f3Smrg iris_use_pinned_bo(batch, cache->bo, false, IRIS_DOMAIN_NONE); 59657ec681f3Smrg 59667ec681f3Smrg uint32_t scratch_addr = 59677ec681f3Smrg pin_scratch_space(ice, batch, prog_data, stage); 59687ec681f3Smrg 59697ec681f3Smrg if (stage == MESA_SHADER_FRAGMENT) { 59707ec681f3Smrg UNUSED struct iris_rasterizer_state *cso = ice->state.cso_rast; 59717ec681f3Smrg struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 59727ec681f3Smrg 59737ec681f3Smrg uint32_t ps_state[GENX(3DSTATE_PS_length)] = {0}; 59747ec681f3Smrg _iris_pack_command(batch, GENX(3DSTATE_PS), ps_state, ps) { 59757ec681f3Smrg ps._8PixelDispatchEnable = wm_prog_data->dispatch_8; 59767ec681f3Smrg ps._16PixelDispatchEnable = wm_prog_data->dispatch_16; 59777ec681f3Smrg ps._32PixelDispatchEnable = wm_prog_data->dispatch_32; 59787ec681f3Smrg 59797ec681f3Smrg /* The docs for 3DSTATE_PS::32 Pixel Dispatch Enable say: 59807ec681f3Smrg * 59817ec681f3Smrg * "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, 59827ec681f3Smrg * SIMD32 Dispatch must not be enabled for PER_PIXEL dispatch 59837ec681f3Smrg * mode." 59847ec681f3Smrg * 59857ec681f3Smrg * 16x MSAA only exists on Gfx9+, so we can skip this on Gfx8. 59867ec681f3Smrg */ 59877ec681f3Smrg if (GFX_VER >= 9 && cso_fb->samples == 16 && 59887ec681f3Smrg !wm_prog_data->persample_dispatch) { 59897ec681f3Smrg assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable); 59907ec681f3Smrg ps._32PixelDispatchEnable = false; 59917ec681f3Smrg } 59929f464c52Smaya 59937ec681f3Smrg ps.DispatchGRFStartRegisterForConstantSetupData0 = 59947ec681f3Smrg brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0); 59957ec681f3Smrg ps.DispatchGRFStartRegisterForConstantSetupData1 = 59967ec681f3Smrg brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1); 59977ec681f3Smrg ps.DispatchGRFStartRegisterForConstantSetupData2 = 59987ec681f3Smrg brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2); 59997ec681f3Smrg 60007ec681f3Smrg ps.KernelStartPointer0 = KSP(shader) + 60017ec681f3Smrg brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0); 60027ec681f3Smrg ps.KernelStartPointer1 = KSP(shader) + 60037ec681f3Smrg brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1); 60047ec681f3Smrg ps.KernelStartPointer2 = KSP(shader) + 60057ec681f3Smrg brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2); 60067ec681f3Smrg 60077ec681f3Smrg#if GFX_VERx10 >= 125 60087ec681f3Smrg ps.ScratchSpaceBuffer = scratch_addr >> 4; 60097ec681f3Smrg#else 60107ec681f3Smrg ps.ScratchSpaceBasePointer = 60117ec681f3Smrg rw_bo(NULL, scratch_addr, IRIS_DOMAIN_NONE); 60127ec681f3Smrg#endif 60137ec681f3Smrg } 60149f464c52Smaya 60157ec681f3Smrg uint32_t psx_state[GENX(3DSTATE_PS_EXTRA_length)] = {0}; 60167ec681f3Smrg iris_pack_command(GENX(3DSTATE_PS_EXTRA), psx_state, psx) { 60177ec681f3Smrg#if GFX_VER >= 9 60187ec681f3Smrg if (!wm_prog_data->uses_sample_mask) 60197ec681f3Smrg psx.InputCoverageMaskState = ICMS_NONE; 60207ec681f3Smrg else if (wm_prog_data->post_depth_coverage) 60219f464c52Smaya psx.InputCoverageMaskState = ICMS_DEPTH_COVERAGE; 60227ec681f3Smrg else if (wm_prog_data->inner_coverage && 60237ec681f3Smrg cso->conservative_rasterization) 60249f464c52Smaya psx.InputCoverageMaskState = ICMS_INNER_CONSERVATIVE; 60259f464c52Smaya else 60269f464c52Smaya psx.InputCoverageMaskState = ICMS_NORMAL; 60277ec681f3Smrg#else 60287ec681f3Smrg psx.PixelShaderUsesInputCoverageMask = 60297ec681f3Smrg wm_prog_data->uses_sample_mask; 60307ec681f3Smrg#endif 60319f464c52Smaya } 60329f464c52Smaya 60337ec681f3Smrg uint32_t *shader_ps = (uint32_t *) shader->derived_data; 60347ec681f3Smrg uint32_t *shader_psx = shader_ps + GENX(3DSTATE_PS_length); 60357ec681f3Smrg iris_emit_merge(batch, shader_ps, ps_state, 60367ec681f3Smrg GENX(3DSTATE_PS_length)); 60377ec681f3Smrg iris_emit_merge(batch, shader_psx, psx_state, 60389f464c52Smaya GENX(3DSTATE_PS_EXTRA_length)); 60397ec681f3Smrg } else if (scratch_addr) { 60407ec681f3Smrg uint32_t *pkt = (uint32_t *) shader->derived_data; 60417ec681f3Smrg switch (stage) { 60427ec681f3Smrg case MESA_SHADER_VERTEX: MERGE_SCRATCH_ADDR(3DSTATE_VS); break; 60437ec681f3Smrg case MESA_SHADER_TESS_CTRL: MERGE_SCRATCH_ADDR(3DSTATE_HS); break; 60447ec681f3Smrg case MESA_SHADER_TESS_EVAL: MERGE_SCRATCH_ADDR(3DSTATE_DS); break; 60457ec681f3Smrg case MESA_SHADER_GEOMETRY: MERGE_SCRATCH_ADDR(3DSTATE_GS); break; 60467ec681f3Smrg } 60477ec681f3Smrg } else { 60489f464c52Smaya iris_batch_emit(batch, shader->derived_data, 60499f464c52Smaya iris_derived_program_state_size(stage)); 60507ec681f3Smrg } 60519f464c52Smaya } else { 60529f464c52Smaya if (stage == MESA_SHADER_TESS_EVAL) { 60539f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_HS), hs); 60549f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_TE), te); 60559f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_DS), ds); 60569f464c52Smaya } else if (stage == MESA_SHADER_GEOMETRY) { 60579f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_GS), gs); 60589f464c52Smaya } 60599f464c52Smaya } 60609f464c52Smaya } 60619f464c52Smaya 60629f464c52Smaya if (ice->state.streamout_active) { 60639f464c52Smaya if (dirty & IRIS_DIRTY_SO_BUFFERS) { 60649f464c52Smaya for (int i = 0; i < 4; i++) { 60659f464c52Smaya struct iris_stream_output_target *tgt = 60669f464c52Smaya (void *) ice->state.so_target[i]; 60677ec681f3Smrg const uint32_t dwords = GENX(3DSTATE_SO_BUFFER_length); 60687ec681f3Smrg uint32_t *so_buffers = genx->so_buffers + i * dwords; 60697ec681f3Smrg bool zero_offset = false; 60707ec681f3Smrg 60719f464c52Smaya if (tgt) { 60727ec681f3Smrg zero_offset = tgt->zero_offset; 60739f464c52Smaya iris_use_pinned_bo(batch, iris_resource_bo(tgt->base.buffer), 60747ec681f3Smrg true, IRIS_DOMAIN_OTHER_WRITE); 60759f464c52Smaya iris_use_pinned_bo(batch, iris_resource_bo(tgt->offset.res), 60767ec681f3Smrg true, IRIS_DOMAIN_OTHER_WRITE); 60777ec681f3Smrg } 60787ec681f3Smrg 60797ec681f3Smrg if (zero_offset) { 60807ec681f3Smrg /* Skip the last DWord which contains "Stream Offset" of 60817ec681f3Smrg * 0xFFFFFFFF and instead emit a dword of zero directly. 60827ec681f3Smrg */ 60837ec681f3Smrg STATIC_ASSERT(GENX(3DSTATE_SO_BUFFER_StreamOffset_start) == 60847ec681f3Smrg 32 * (dwords - 1)); 60857ec681f3Smrg const uint32_t zero = 0; 60867ec681f3Smrg iris_batch_emit(batch, so_buffers, 4 * (dwords - 1)); 60877ec681f3Smrg iris_batch_emit(batch, &zero, sizeof(zero)); 60887ec681f3Smrg tgt->zero_offset = false; 60897ec681f3Smrg } else { 60907ec681f3Smrg iris_batch_emit(batch, so_buffers, 4 * dwords); 60919f464c52Smaya } 60929f464c52Smaya } 60939f464c52Smaya } 60949f464c52Smaya 60959f464c52Smaya if ((dirty & IRIS_DIRTY_SO_DECL_LIST) && ice->state.streamout) { 60969f464c52Smaya uint32_t *decl_list = 60979f464c52Smaya ice->state.streamout + GENX(3DSTATE_STREAMOUT_length); 60989f464c52Smaya iris_batch_emit(batch, decl_list, 4 * ((decl_list[0] & 0xff) + 2)); 60999f464c52Smaya } 61009f464c52Smaya 61019f464c52Smaya if (dirty & IRIS_DIRTY_STREAMOUT) { 61029f464c52Smaya const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; 61039f464c52Smaya 61049f464c52Smaya uint32_t dynamic_sol[GENX(3DSTATE_STREAMOUT_length)]; 61059f464c52Smaya iris_pack_command(GENX(3DSTATE_STREAMOUT), dynamic_sol, sol) { 61069f464c52Smaya sol.SOFunctionEnable = true; 61079f464c52Smaya sol.SOStatisticsEnable = true; 61089f464c52Smaya 61099f464c52Smaya sol.RenderingDisable = cso_rast->rasterizer_discard && 61109f464c52Smaya !ice->state.prims_generated_query_active; 61119f464c52Smaya sol.ReorderMode = cso_rast->flatshade_first ? LEADING : TRAILING; 61129f464c52Smaya } 61139f464c52Smaya 61149f464c52Smaya assert(ice->state.streamout); 61159f464c52Smaya 61169f464c52Smaya iris_emit_merge(batch, ice->state.streamout, dynamic_sol, 61179f464c52Smaya GENX(3DSTATE_STREAMOUT_length)); 61189f464c52Smaya } 61199f464c52Smaya } else { 61209f464c52Smaya if (dirty & IRIS_DIRTY_STREAMOUT) { 61219f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_STREAMOUT), sol); 61229f464c52Smaya } 61239f464c52Smaya } 61249f464c52Smaya 61259f464c52Smaya if (dirty & IRIS_DIRTY_CLIP) { 61269f464c52Smaya struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; 61279f464c52Smaya struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 61289f464c52Smaya 61299f464c52Smaya bool gs_or_tes = ice->shaders.prog[MESA_SHADER_GEOMETRY] || 61309f464c52Smaya ice->shaders.prog[MESA_SHADER_TESS_EVAL]; 61319f464c52Smaya bool points_or_lines = cso_rast->fill_mode_point_or_line || 61329f464c52Smaya (gs_or_tes ? ice->shaders.output_topology_is_points_or_lines 61339f464c52Smaya : ice->state.prim_is_points_or_lines); 61349f464c52Smaya 61359f464c52Smaya uint32_t dynamic_clip[GENX(3DSTATE_CLIP_length)]; 61369f464c52Smaya iris_pack_command(GENX(3DSTATE_CLIP), &dynamic_clip, cl) { 61379f464c52Smaya cl.StatisticsEnable = ice->state.statistics_counters_enabled; 61387ec681f3Smrg if (cso_rast->rasterizer_discard) 61397ec681f3Smrg cl.ClipMode = CLIPMODE_REJECT_ALL; 61407ec681f3Smrg else if (ice->state.window_space_position) 61417ec681f3Smrg cl.ClipMode = CLIPMODE_ACCEPT_ALL; 61427ec681f3Smrg else 61437ec681f3Smrg cl.ClipMode = CLIPMODE_NORMAL; 61447ec681f3Smrg 61457ec681f3Smrg cl.PerspectiveDivideDisable = ice->state.window_space_position; 61469f464c52Smaya cl.ViewportXYClipTestEnable = !points_or_lines; 61479f464c52Smaya 61489f464c52Smaya if (wm_prog_data->barycentric_interp_modes & 61499f464c52Smaya BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) 61509f464c52Smaya cl.NonPerspectiveBarycentricEnable = true; 61519f464c52Smaya 61527ec681f3Smrg cl.ForceZeroRTAIndexEnable = cso_fb->layers <= 1; 61539f464c52Smaya cl.MaximumVPIndex = ice->state.num_viewports - 1; 61549f464c52Smaya } 61559f464c52Smaya iris_emit_merge(batch, cso_rast->clip, dynamic_clip, 61569f464c52Smaya ARRAY_SIZE(cso_rast->clip)); 61579f464c52Smaya } 61589f464c52Smaya 61597ec681f3Smrg if (dirty & (IRIS_DIRTY_RASTER | IRIS_DIRTY_URB)) { 61609f464c52Smaya struct iris_rasterizer_state *cso = ice->state.cso_rast; 61619f464c52Smaya iris_batch_emit(batch, cso->raster, sizeof(cso->raster)); 61629f464c52Smaya 61637ec681f3Smrg uint32_t dynamic_sf[GENX(3DSTATE_SF_length)]; 61647ec681f3Smrg iris_pack_command(GENX(3DSTATE_SF), &dynamic_sf, sf) { 61657ec681f3Smrg sf.ViewportTransformEnable = !ice->state.window_space_position; 61667ec681f3Smrg 61677ec681f3Smrg#if GFX_VER >= 12 61687ec681f3Smrg sf.DerefBlockSize = ice->state.urb_deref_block_size; 61697ec681f3Smrg#endif 61707ec681f3Smrg } 61717ec681f3Smrg iris_emit_merge(batch, cso->sf, dynamic_sf, 61727ec681f3Smrg ARRAY_SIZE(dynamic_sf)); 61739f464c52Smaya } 61749f464c52Smaya 61759f464c52Smaya if (dirty & IRIS_DIRTY_WM) { 61769f464c52Smaya struct iris_rasterizer_state *cso = ice->state.cso_rast; 61779f464c52Smaya uint32_t dynamic_wm[GENX(3DSTATE_WM_length)]; 61789f464c52Smaya 61799f464c52Smaya iris_pack_command(GENX(3DSTATE_WM), &dynamic_wm, wm) { 61809f464c52Smaya wm.StatisticsEnable = ice->state.statistics_counters_enabled; 61819f464c52Smaya 61829f464c52Smaya wm.BarycentricInterpolationMode = 61839f464c52Smaya wm_prog_data->barycentric_interp_modes; 61849f464c52Smaya 61859f464c52Smaya if (wm_prog_data->early_fragment_tests) 61869f464c52Smaya wm.EarlyDepthStencilControl = EDSC_PREPS; 61879f464c52Smaya else if (wm_prog_data->has_side_effects) 61889f464c52Smaya wm.EarlyDepthStencilControl = EDSC_PSEXEC; 61899f464c52Smaya 61909f464c52Smaya /* We could skip this bit if color writes are enabled. */ 61919f464c52Smaya if (wm_prog_data->has_side_effects || wm_prog_data->uses_kill) 61929f464c52Smaya wm.ForceThreadDispatchEnable = ForceON; 61939f464c52Smaya } 61949f464c52Smaya iris_emit_merge(batch, cso->wm, dynamic_wm, ARRAY_SIZE(cso->wm)); 61959f464c52Smaya } 61969f464c52Smaya 61979f464c52Smaya if (dirty & IRIS_DIRTY_SBE) { 61989f464c52Smaya iris_emit_sbe(batch, ice); 61999f464c52Smaya } 62009f464c52Smaya 62019f464c52Smaya if (dirty & IRIS_DIRTY_PS_BLEND) { 62029f464c52Smaya struct iris_blend_state *cso_blend = ice->state.cso_blend; 62039f464c52Smaya struct iris_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa; 62049f464c52Smaya const struct shader_info *fs_info = 62059f464c52Smaya iris_get_shader_info(ice, MESA_SHADER_FRAGMENT); 62069f464c52Smaya 62079f464c52Smaya uint32_t dynamic_pb[GENX(3DSTATE_PS_BLEND_length)]; 62089f464c52Smaya iris_pack_command(GENX(3DSTATE_PS_BLEND), &dynamic_pb, pb) { 62099f464c52Smaya pb.HasWriteableRT = has_writeable_rt(cso_blend, fs_info); 62107ec681f3Smrg pb.AlphaTestEnable = cso_zsa->alpha_enabled; 62119f464c52Smaya 62129f464c52Smaya /* The dual source blending docs caution against using SRC1 factors 62139f464c52Smaya * when the shader doesn't use a dual source render target write. 62149f464c52Smaya * Empirically, this can lead to GPU hangs, and the results are 62159f464c52Smaya * undefined anyway, so simply disable blending to avoid the hang. 62169f464c52Smaya */ 62179f464c52Smaya pb.ColorBufferBlendEnable = (cso_blend->blend_enables & 1) && 62189f464c52Smaya (!cso_blend->dual_color_blending || wm_prog_data->dual_src_blend); 62199f464c52Smaya } 62209f464c52Smaya 62219f464c52Smaya iris_emit_merge(batch, cso_blend->ps_blend, dynamic_pb, 62229f464c52Smaya ARRAY_SIZE(cso_blend->ps_blend)); 62239f464c52Smaya } 62249f464c52Smaya 62259f464c52Smaya if (dirty & IRIS_DIRTY_WM_DEPTH_STENCIL) { 62269f464c52Smaya struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa; 62277ec681f3Smrg#if GFX_VER >= 9 && GFX_VER < 12 62289f464c52Smaya struct pipe_stencil_ref *p_stencil_refs = &ice->state.stencil_ref; 62299f464c52Smaya uint32_t stencil_refs[GENX(3DSTATE_WM_DEPTH_STENCIL_length)]; 62309f464c52Smaya iris_pack_command(GENX(3DSTATE_WM_DEPTH_STENCIL), &stencil_refs, wmds) { 62319f464c52Smaya wmds.StencilReferenceValue = p_stencil_refs->ref_value[0]; 62329f464c52Smaya wmds.BackfaceStencilReferenceValue = p_stencil_refs->ref_value[1]; 62339f464c52Smaya } 62349f464c52Smaya iris_emit_merge(batch, cso->wmds, stencil_refs, ARRAY_SIZE(cso->wmds)); 62359f464c52Smaya#else 62367ec681f3Smrg /* Use modify disable fields which allow us to emit packets 62377ec681f3Smrg * directly instead of merging them later. 62387ec681f3Smrg */ 62399f464c52Smaya iris_batch_emit(batch, cso->wmds, sizeof(cso->wmds)); 62409f464c52Smaya#endif 62417ec681f3Smrg 62427ec681f3Smrg#if GFX_VER >= 12 62437ec681f3Smrg iris_batch_emit(batch, cso->depth_bounds, sizeof(cso->depth_bounds)); 62447ec681f3Smrg#endif 62457ec681f3Smrg } 62467ec681f3Smrg 62477ec681f3Smrg if (dirty & IRIS_DIRTY_STENCIL_REF) { 62487ec681f3Smrg#if GFX_VER >= 12 62497ec681f3Smrg /* Use modify disable fields which allow us to emit packets 62507ec681f3Smrg * directly instead of merging them later. 62517ec681f3Smrg */ 62527ec681f3Smrg struct pipe_stencil_ref *p_stencil_refs = &ice->state.stencil_ref; 62537ec681f3Smrg uint32_t stencil_refs[GENX(3DSTATE_WM_DEPTH_STENCIL_length)]; 62547ec681f3Smrg iris_pack_command(GENX(3DSTATE_WM_DEPTH_STENCIL), &stencil_refs, wmds) { 62557ec681f3Smrg wmds.StencilReferenceValue = p_stencil_refs->ref_value[0]; 62567ec681f3Smrg wmds.BackfaceStencilReferenceValue = p_stencil_refs->ref_value[1]; 62577ec681f3Smrg wmds.StencilTestMaskModifyDisable = true; 62587ec681f3Smrg wmds.StencilWriteMaskModifyDisable = true; 62597ec681f3Smrg wmds.StencilStateModifyDisable = true; 62607ec681f3Smrg wmds.DepthStateModifyDisable = true; 62617ec681f3Smrg } 62627ec681f3Smrg iris_batch_emit(batch, stencil_refs, sizeof(stencil_refs)); 62637ec681f3Smrg#endif 62649f464c52Smaya } 62659f464c52Smaya 62669f464c52Smaya if (dirty & IRIS_DIRTY_SCISSOR_RECT) { 62677ec681f3Smrg /* Wa_1409725701: 62687ec681f3Smrg * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is 62697ec681f3Smrg * stored as an array of up to 16 elements. The location of first 62707ec681f3Smrg * element of the array, as specified by Pointer to SCISSOR_RECT, 62717ec681f3Smrg * should be aligned to a 64-byte boundary. 62727ec681f3Smrg */ 62737ec681f3Smrg uint32_t alignment = 64; 62749f464c52Smaya uint32_t scissor_offset = 62759f464c52Smaya emit_state(batch, ice->state.dynamic_uploader, 62769f464c52Smaya &ice->state.last_res.scissor, 62779f464c52Smaya ice->state.scissors, 62789f464c52Smaya sizeof(struct pipe_scissor_state) * 62797ec681f3Smrg ice->state.num_viewports, alignment); 62809f464c52Smaya 62819f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_SCISSOR_STATE_POINTERS), ptr) { 62829f464c52Smaya ptr.ScissorRectPointer = scissor_offset; 62839f464c52Smaya } 62849f464c52Smaya } 62859f464c52Smaya 62869f464c52Smaya if (dirty & IRIS_DIRTY_DEPTH_BUFFER) { 62879f464c52Smaya struct iris_depth_buffer_state *cso_z = &ice->state.genx->depth_buffer; 62889f464c52Smaya 62897ec681f3Smrg /* Do not emit the cso yet. We may need to update clear params first. */ 62909f464c52Smaya struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 62917ec681f3Smrg struct iris_resource *zres = NULL, *sres = NULL; 62929f464c52Smaya if (cso_fb->zsbuf) { 62939f464c52Smaya iris_get_depth_stencil_resources(cso_fb->zsbuf->texture, 62949f464c52Smaya &zres, &sres); 62959f464c52Smaya } 62969f464c52Smaya 62977ec681f3Smrg if (zres && ice->state.hiz_usage != ISL_AUX_USAGE_NONE) { 62987ec681f3Smrg uint32_t *clear_params = 62997ec681f3Smrg cso_z->packets + ARRAY_SIZE(cso_z->packets) - 63007ec681f3Smrg GENX(3DSTATE_CLEAR_PARAMS_length); 63017ec681f3Smrg 63027ec681f3Smrg iris_pack_command(GENX(3DSTATE_CLEAR_PARAMS), clear_params, clear) { 63037ec681f3Smrg clear.DepthClearValueValid = true; 63047ec681f3Smrg clear.DepthClearValue = zres->aux.clear_color.f32[0]; 63057ec681f3Smrg } 63067ec681f3Smrg } 63077ec681f3Smrg 63087ec681f3Smrg iris_batch_emit(batch, cso_z->packets, sizeof(cso_z->packets)); 63097ec681f3Smrg 63107ec681f3Smrg if (zres) 63117ec681f3Smrg genX(emit_depth_state_workarounds)(ice, batch, &zres->surf); 63127ec681f3Smrg 63137ec681f3Smrg if (GFX_VER >= 12) { 63147ec681f3Smrg /* Wa_1408224581 63157ec681f3Smrg * 63167ec681f3Smrg * Workaround: Gfx12LP Astep only An additional pipe control with 63177ec681f3Smrg * post-sync = store dword operation would be required.( w/a is to 63187ec681f3Smrg * have an additional pipe control after the stencil state whenever 63197ec681f3Smrg * the surface state bits of this state is changing). 63207ec681f3Smrg */ 63217ec681f3Smrg iris_emit_pipe_control_write(batch, "WA for stencil state", 63227ec681f3Smrg PIPE_CONTROL_WRITE_IMMEDIATE, 63237ec681f3Smrg batch->screen->workaround_address.bo, 63247ec681f3Smrg batch->screen->workaround_address.offset, 0); 63259f464c52Smaya } 63269f464c52Smaya } 63279f464c52Smaya 63289f464c52Smaya if (dirty & (IRIS_DIRTY_DEPTH_BUFFER | IRIS_DIRTY_WM_DEPTH_STENCIL)) { 63299f464c52Smaya /* Listen for buffer changes, and also write enable changes. */ 63309f464c52Smaya struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 63319f464c52Smaya pin_depth_and_stencil_buffers(batch, cso_fb->zsbuf, ice->state.cso_zsa); 63329f464c52Smaya } 63339f464c52Smaya 63349f464c52Smaya if (dirty & IRIS_DIRTY_POLYGON_STIPPLE) { 63359f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_PATTERN), poly) { 63369f464c52Smaya for (int i = 0; i < 32; i++) { 63379f464c52Smaya poly.PatternRow[i] = ice->state.poly_stipple.stipple[i]; 63389f464c52Smaya } 63399f464c52Smaya } 63409f464c52Smaya } 63419f464c52Smaya 63429f464c52Smaya if (dirty & IRIS_DIRTY_LINE_STIPPLE) { 63439f464c52Smaya struct iris_rasterizer_state *cso = ice->state.cso_rast; 63449f464c52Smaya iris_batch_emit(batch, cso->line_stipple, sizeof(cso->line_stipple)); 63459f464c52Smaya } 63469f464c52Smaya 63479f464c52Smaya if (dirty & IRIS_DIRTY_VF_TOPOLOGY) { 63489f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) { 63499f464c52Smaya topo.PrimitiveTopologyType = 63507ec681f3Smrg translate_prim_type(draw->mode, ice->state.vertices_per_patch); 63519f464c52Smaya } 63529f464c52Smaya } 63539f464c52Smaya 63549f464c52Smaya if (dirty & IRIS_DIRTY_VERTEX_BUFFERS) { 63559f464c52Smaya int count = util_bitcount64(ice->state.bound_vertex_buffers); 63567ec681f3Smrg uint64_t dynamic_bound = ice->state.bound_vertex_buffers; 63579f464c52Smaya 63589f464c52Smaya if (ice->state.vs_uses_draw_params) { 63597ec681f3Smrg assert(ice->draw.draw_params.res); 63609f464c52Smaya 63619f464c52Smaya struct iris_vertex_buffer_state *state = 63629f464c52Smaya &(ice->state.genx->vertex_buffers[count]); 63637ec681f3Smrg pipe_resource_reference(&state->resource, ice->draw.draw_params.res); 63649f464c52Smaya struct iris_resource *res = (void *) state->resource; 63659f464c52Smaya 63669f464c52Smaya iris_pack_state(GENX(VERTEX_BUFFER_STATE), state->state, vb) { 63679f464c52Smaya vb.VertexBufferIndex = count; 63689f464c52Smaya vb.AddressModifyEnable = true; 63699f464c52Smaya vb.BufferPitch = 0; 63707ec681f3Smrg vb.BufferSize = res->bo->size - ice->draw.draw_params.offset; 63719f464c52Smaya vb.BufferStartingAddress = 63727ec681f3Smrg ro_bo(NULL, res->bo->address + 63737ec681f3Smrg (int) ice->draw.draw_params.offset); 63747ec681f3Smrg vb.MOCS = iris_mocs(res->bo, &batch->screen->isl_dev, 63757ec681f3Smrg ISL_SURF_USAGE_VERTEX_BUFFER_BIT); 63767ec681f3Smrg#if GFX_VER >= 12 63777ec681f3Smrg vb.L3BypassDisable = true; 63787ec681f3Smrg#endif 63799f464c52Smaya } 63809f464c52Smaya dynamic_bound |= 1ull << count; 63819f464c52Smaya count++; 63829f464c52Smaya } 63839f464c52Smaya 63849f464c52Smaya if (ice->state.vs_uses_derived_draw_params) { 63859f464c52Smaya struct iris_vertex_buffer_state *state = 63869f464c52Smaya &(ice->state.genx->vertex_buffers[count]); 63879f464c52Smaya pipe_resource_reference(&state->resource, 63887ec681f3Smrg ice->draw.derived_draw_params.res); 63897ec681f3Smrg struct iris_resource *res = (void *) ice->draw.derived_draw_params.res; 63909f464c52Smaya 63919f464c52Smaya iris_pack_state(GENX(VERTEX_BUFFER_STATE), state->state, vb) { 63929f464c52Smaya vb.VertexBufferIndex = count; 63939f464c52Smaya vb.AddressModifyEnable = true; 63949f464c52Smaya vb.BufferPitch = 0; 63959f464c52Smaya vb.BufferSize = 63967ec681f3Smrg res->bo->size - ice->draw.derived_draw_params.offset; 63979f464c52Smaya vb.BufferStartingAddress = 63987ec681f3Smrg ro_bo(NULL, res->bo->address + 63997ec681f3Smrg (int) ice->draw.derived_draw_params.offset); 64007ec681f3Smrg vb.MOCS = iris_mocs(res->bo, &batch->screen->isl_dev, 64017ec681f3Smrg ISL_SURF_USAGE_VERTEX_BUFFER_BIT); 64027ec681f3Smrg#if GFX_VER >= 12 64037ec681f3Smrg vb.L3BypassDisable = true; 64047ec681f3Smrg#endif 64059f464c52Smaya } 64069f464c52Smaya dynamic_bound |= 1ull << count; 64079f464c52Smaya count++; 64089f464c52Smaya } 64099f464c52Smaya 64109f464c52Smaya if (count) { 64117ec681f3Smrg#if GFX_VER >= 11 64127ec681f3Smrg /* Gfx11+ doesn't need the cache workaround below */ 64137ec681f3Smrg uint64_t bound = dynamic_bound; 64147ec681f3Smrg while (bound) { 64157ec681f3Smrg const int i = u_bit_scan64(&bound); 64167ec681f3Smrg iris_use_optional_res(batch, genx->vertex_buffers[i].resource, 64177ec681f3Smrg false, IRIS_DOMAIN_VF_READ); 64187ec681f3Smrg } 64197ec681f3Smrg#else 64209f464c52Smaya /* The VF cache designers cut corners, and made the cache key's 64219f464c52Smaya * <VertexBufferIndex, Memory Address> tuple only consider the bottom 64229f464c52Smaya * 32 bits of the address. If you have two vertex buffers which get 64239f464c52Smaya * placed exactly 4 GiB apart and use them in back-to-back draw calls, 64249f464c52Smaya * you can get collisions (even within a single batch). 64259f464c52Smaya * 64269f464c52Smaya * So, we need to do a VF cache invalidate if the buffer for a VB 64279f464c52Smaya * slot slot changes [48:32] address bits from the previous time. 64289f464c52Smaya */ 64299f464c52Smaya unsigned flush_flags = 0; 64309f464c52Smaya 64319f464c52Smaya uint64_t bound = dynamic_bound; 64329f464c52Smaya while (bound) { 64339f464c52Smaya const int i = u_bit_scan64(&bound); 64349f464c52Smaya uint16_t high_bits = 0; 64359f464c52Smaya 64369f464c52Smaya struct iris_resource *res = 64379f464c52Smaya (void *) genx->vertex_buffers[i].resource; 64389f464c52Smaya if (res) { 64397ec681f3Smrg iris_use_pinned_bo(batch, res->bo, false, IRIS_DOMAIN_VF_READ); 64409f464c52Smaya 64417ec681f3Smrg high_bits = res->bo->address >> 32ull; 64429f464c52Smaya if (high_bits != ice->state.last_vbo_high_bits[i]) { 64439f464c52Smaya flush_flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE | 64449f464c52Smaya PIPE_CONTROL_CS_STALL; 64459f464c52Smaya ice->state.last_vbo_high_bits[i] = high_bits; 64469f464c52Smaya } 64479f464c52Smaya } 64489f464c52Smaya } 64499f464c52Smaya 64507ec681f3Smrg if (flush_flags) { 64517ec681f3Smrg iris_emit_pipe_control_flush(batch, 64527ec681f3Smrg "workaround: VF cache 32-bit key [VB]", 64537ec681f3Smrg flush_flags); 64547ec681f3Smrg } 64557ec681f3Smrg#endif 64569f464c52Smaya 64579f464c52Smaya const unsigned vb_dwords = GENX(VERTEX_BUFFER_STATE_length); 64589f464c52Smaya 64599f464c52Smaya uint32_t *map = 64609f464c52Smaya iris_get_command_space(batch, 4 * (1 + vb_dwords * count)); 64619f464c52Smaya _iris_pack_command(batch, GENX(3DSTATE_VERTEX_BUFFERS), map, vb) { 64629f464c52Smaya vb.DWordLength = (vb_dwords * count + 1) - 2; 64639f464c52Smaya } 64649f464c52Smaya map += 1; 64659f464c52Smaya 64669f464c52Smaya bound = dynamic_bound; 64679f464c52Smaya while (bound) { 64689f464c52Smaya const int i = u_bit_scan64(&bound); 64699f464c52Smaya memcpy(map, genx->vertex_buffers[i].state, 64709f464c52Smaya sizeof(uint32_t) * vb_dwords); 64719f464c52Smaya map += vb_dwords; 64729f464c52Smaya } 64739f464c52Smaya } 64749f464c52Smaya } 64759f464c52Smaya 64769f464c52Smaya if (dirty & IRIS_DIRTY_VERTEX_ELEMENTS) { 64779f464c52Smaya struct iris_vertex_element_state *cso = ice->state.cso_vertex_elements; 64789f464c52Smaya const unsigned entries = MAX2(cso->count, 1); 64799f464c52Smaya if (!(ice->state.vs_needs_sgvs_element || 64809f464c52Smaya ice->state.vs_uses_derived_draw_params || 64819f464c52Smaya ice->state.vs_needs_edge_flag)) { 64829f464c52Smaya iris_batch_emit(batch, cso->vertex_elements, sizeof(uint32_t) * 64839f464c52Smaya (1 + entries * GENX(VERTEX_ELEMENT_STATE_length))); 64849f464c52Smaya } else { 64859f464c52Smaya uint32_t dynamic_ves[1 + 33 * GENX(VERTEX_ELEMENT_STATE_length)]; 64869f464c52Smaya const unsigned dyn_count = cso->count + 64879f464c52Smaya ice->state.vs_needs_sgvs_element + 64889f464c52Smaya ice->state.vs_uses_derived_draw_params; 64899f464c52Smaya 64909f464c52Smaya iris_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), 64919f464c52Smaya &dynamic_ves, ve) { 64929f464c52Smaya ve.DWordLength = 64939f464c52Smaya 1 + GENX(VERTEX_ELEMENT_STATE_length) * dyn_count - 2; 64949f464c52Smaya } 64959f464c52Smaya memcpy(&dynamic_ves[1], &cso->vertex_elements[1], 64969f464c52Smaya (cso->count - ice->state.vs_needs_edge_flag) * 64979f464c52Smaya GENX(VERTEX_ELEMENT_STATE_length) * sizeof(uint32_t)); 64989f464c52Smaya uint32_t *ve_pack_dest = 64999f464c52Smaya &dynamic_ves[1 + (cso->count - ice->state.vs_needs_edge_flag) * 65009f464c52Smaya GENX(VERTEX_ELEMENT_STATE_length)]; 65019f464c52Smaya 65029f464c52Smaya if (ice->state.vs_needs_sgvs_element) { 65039f464c52Smaya uint32_t base_ctrl = ice->state.vs_uses_draw_params ? 65049f464c52Smaya VFCOMP_STORE_SRC : VFCOMP_STORE_0; 65059f464c52Smaya iris_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) { 65069f464c52Smaya ve.Valid = true; 65079f464c52Smaya ve.VertexBufferIndex = 65089f464c52Smaya util_bitcount64(ice->state.bound_vertex_buffers); 65099f464c52Smaya ve.SourceElementFormat = ISL_FORMAT_R32G32_UINT; 65109f464c52Smaya ve.Component0Control = base_ctrl; 65119f464c52Smaya ve.Component1Control = base_ctrl; 65129f464c52Smaya ve.Component2Control = VFCOMP_STORE_0; 65139f464c52Smaya ve.Component3Control = VFCOMP_STORE_0; 65149f464c52Smaya } 65159f464c52Smaya ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length); 65169f464c52Smaya } 65179f464c52Smaya if (ice->state.vs_uses_derived_draw_params) { 65189f464c52Smaya iris_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) { 65199f464c52Smaya ve.Valid = true; 65209f464c52Smaya ve.VertexBufferIndex = 65219f464c52Smaya util_bitcount64(ice->state.bound_vertex_buffers) + 65229f464c52Smaya ice->state.vs_uses_draw_params; 65239f464c52Smaya ve.SourceElementFormat = ISL_FORMAT_R32G32_UINT; 65249f464c52Smaya ve.Component0Control = VFCOMP_STORE_SRC; 65259f464c52Smaya ve.Component1Control = VFCOMP_STORE_SRC; 65269f464c52Smaya ve.Component2Control = VFCOMP_STORE_0; 65279f464c52Smaya ve.Component3Control = VFCOMP_STORE_0; 65289f464c52Smaya } 65299f464c52Smaya ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length); 65309f464c52Smaya } 65319f464c52Smaya if (ice->state.vs_needs_edge_flag) { 65329f464c52Smaya for (int i = 0; i < GENX(VERTEX_ELEMENT_STATE_length); i++) 65339f464c52Smaya ve_pack_dest[i] = cso->edgeflag_ve[i]; 65349f464c52Smaya } 65359f464c52Smaya 65369f464c52Smaya iris_batch_emit(batch, &dynamic_ves, sizeof(uint32_t) * 65379f464c52Smaya (1 + dyn_count * GENX(VERTEX_ELEMENT_STATE_length))); 65389f464c52Smaya } 65399f464c52Smaya 65409f464c52Smaya if (!ice->state.vs_needs_edge_flag) { 65419f464c52Smaya iris_batch_emit(batch, cso->vf_instancing, sizeof(uint32_t) * 65429f464c52Smaya entries * GENX(3DSTATE_VF_INSTANCING_length)); 65439f464c52Smaya } else { 65449f464c52Smaya assert(cso->count > 0); 65459f464c52Smaya const unsigned edgeflag_index = cso->count - 1; 65469f464c52Smaya uint32_t dynamic_vfi[33 * GENX(3DSTATE_VF_INSTANCING_length)]; 65479f464c52Smaya memcpy(&dynamic_vfi[0], cso->vf_instancing, edgeflag_index * 65489f464c52Smaya GENX(3DSTATE_VF_INSTANCING_length) * sizeof(uint32_t)); 65499f464c52Smaya 65509f464c52Smaya uint32_t *vfi_pack_dest = &dynamic_vfi[0] + 65519f464c52Smaya edgeflag_index * GENX(3DSTATE_VF_INSTANCING_length); 65529f464c52Smaya iris_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) { 65539f464c52Smaya vi.VertexElementIndex = edgeflag_index + 65549f464c52Smaya ice->state.vs_needs_sgvs_element + 65559f464c52Smaya ice->state.vs_uses_derived_draw_params; 65569f464c52Smaya } 65579f464c52Smaya for (int i = 0; i < GENX(3DSTATE_VF_INSTANCING_length); i++) 65589f464c52Smaya vfi_pack_dest[i] |= cso->edgeflag_vfi[i]; 65599f464c52Smaya 65609f464c52Smaya iris_batch_emit(batch, &dynamic_vfi[0], sizeof(uint32_t) * 65619f464c52Smaya entries * GENX(3DSTATE_VF_INSTANCING_length)); 65629f464c52Smaya } 65639f464c52Smaya } 65649f464c52Smaya 65659f464c52Smaya if (dirty & IRIS_DIRTY_VF_SGVS) { 65669f464c52Smaya const struct brw_vs_prog_data *vs_prog_data = (void *) 65679f464c52Smaya ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data; 65689f464c52Smaya struct iris_vertex_element_state *cso = ice->state.cso_vertex_elements; 65699f464c52Smaya 65709f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS), sgv) { 65719f464c52Smaya if (vs_prog_data->uses_vertexid) { 65729f464c52Smaya sgv.VertexIDEnable = true; 65739f464c52Smaya sgv.VertexIDComponentNumber = 2; 65749f464c52Smaya sgv.VertexIDElementOffset = 65759f464c52Smaya cso->count - ice->state.vs_needs_edge_flag; 65769f464c52Smaya } 65779f464c52Smaya 65789f464c52Smaya if (vs_prog_data->uses_instanceid) { 65799f464c52Smaya sgv.InstanceIDEnable = true; 65809f464c52Smaya sgv.InstanceIDComponentNumber = 3; 65819f464c52Smaya sgv.InstanceIDElementOffset = 65829f464c52Smaya cso->count - ice->state.vs_needs_edge_flag; 65839f464c52Smaya } 65849f464c52Smaya } 65859f464c52Smaya } 65869f464c52Smaya 65879f464c52Smaya if (dirty & IRIS_DIRTY_VF) { 65889f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_VF), vf) { 65899f464c52Smaya if (draw->primitive_restart) { 65909f464c52Smaya vf.IndexedDrawCutIndexEnable = true; 65919f464c52Smaya vf.CutIndex = draw->restart_index; 65929f464c52Smaya } 65939f464c52Smaya } 65949f464c52Smaya } 65959f464c52Smaya 65969f464c52Smaya if (dirty & IRIS_DIRTY_VF_STATISTICS) { 65979f464c52Smaya iris_emit_cmd(batch, GENX(3DSTATE_VF_STATISTICS), vf) { 65989f464c52Smaya vf.StatisticsEnable = true; 65999f464c52Smaya } 66009f464c52Smaya } 66019f464c52Smaya 66027ec681f3Smrg#if GFX_VER == 8 66037ec681f3Smrg if (dirty & IRIS_DIRTY_PMA_FIX) { 66047ec681f3Smrg bool enable = want_pma_fix(ice); 66057ec681f3Smrg genX(update_pma_fix)(ice, batch, enable); 66067ec681f3Smrg } 66077ec681f3Smrg#endif 66087ec681f3Smrg 66097ec681f3Smrg if (ice->state.current_hash_scale != 1) 66107ec681f3Smrg genX(emit_hashing_mode)(ice, batch, UINT_MAX, UINT_MAX, 1); 66117ec681f3Smrg 66127ec681f3Smrg#if GFX_VER >= 12 66137ec681f3Smrg genX(invalidate_aux_map_state)(batch); 66147ec681f3Smrg#endif 66157ec681f3Smrg} 66167ec681f3Smrg 66177ec681f3Smrgstatic void 66187ec681f3Smrgflush_vbos(struct iris_context *ice, struct iris_batch *batch) 66197ec681f3Smrg{ 66207ec681f3Smrg struct iris_genx_state *genx = ice->state.genx; 66217ec681f3Smrg uint64_t bound = ice->state.bound_vertex_buffers; 66227ec681f3Smrg while (bound) { 66237ec681f3Smrg const int i = u_bit_scan64(&bound); 66247ec681f3Smrg struct iris_bo *bo = iris_resource_bo(genx->vertex_buffers[i].resource); 66257ec681f3Smrg iris_emit_buffer_barrier_for(batch, bo, IRIS_DOMAIN_VF_READ); 66267ec681f3Smrg } 66279f464c52Smaya} 66289f464c52Smaya 66299f464c52Smayastatic void 66309f464c52Smayairis_upload_render_state(struct iris_context *ice, 66319f464c52Smaya struct iris_batch *batch, 66327ec681f3Smrg const struct pipe_draw_info *draw, 66337ec681f3Smrg unsigned drawid_offset, 66347ec681f3Smrg const struct pipe_draw_indirect_info *indirect, 66357ec681f3Smrg const struct pipe_draw_start_count_bias *sc) 66369f464c52Smaya{ 66377ec681f3Smrg bool use_predicate = ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT; 66387ec681f3Smrg 66397ec681f3Smrg if (ice->state.dirty & IRIS_DIRTY_VERTEX_BUFFER_FLUSHES) 66407ec681f3Smrg flush_vbos(ice, batch); 66417ec681f3Smrg 66427ec681f3Smrg iris_batch_sync_region_start(batch); 66437ec681f3Smrg 66449f464c52Smaya /* Always pin the binder. If we're emitting new binding table pointers, 66459f464c52Smaya * we need it. If not, we're probably inheriting old tables via the 66469f464c52Smaya * context, and need it anyway. Since true zero-bindings cases are 66479f464c52Smaya * practically non-existent, just pin it and avoid last_res tracking. 66489f464c52Smaya */ 66497ec681f3Smrg iris_use_pinned_bo(batch, ice->state.binder.bo, false, 66507ec681f3Smrg IRIS_DOMAIN_NONE); 66519f464c52Smaya 66529f464c52Smaya if (!batch->contains_draw) { 66537ec681f3Smrg if (GFX_VER == 12) { 66547ec681f3Smrg /* Re-emit constants when starting a new batch buffer in order to 66557ec681f3Smrg * work around push constant corruption on context switch. 66567ec681f3Smrg * 66577ec681f3Smrg * XXX - Provide hardware spec quotation when available. 66587ec681f3Smrg */ 66597ec681f3Smrg ice->state.stage_dirty |= (IRIS_STAGE_DIRTY_CONSTANTS_VS | 66607ec681f3Smrg IRIS_STAGE_DIRTY_CONSTANTS_TCS | 66617ec681f3Smrg IRIS_STAGE_DIRTY_CONSTANTS_TES | 66627ec681f3Smrg IRIS_STAGE_DIRTY_CONSTANTS_GS | 66637ec681f3Smrg IRIS_STAGE_DIRTY_CONSTANTS_FS); 66647ec681f3Smrg } 66659f464c52Smaya batch->contains_draw = true; 66669f464c52Smaya } 66679f464c52Smaya 66687ec681f3Smrg if (!batch->contains_draw_with_next_seqno) { 66697ec681f3Smrg iris_restore_render_saved_bos(ice, batch, draw); 66707ec681f3Smrg batch->contains_draw_with_next_seqno = true; 66717ec681f3Smrg } 66727ec681f3Smrg 66739f464c52Smaya iris_upload_dirty_render_state(ice, batch, draw); 66749f464c52Smaya 66759f464c52Smaya if (draw->index_size > 0) { 66769f464c52Smaya unsigned offset; 66779f464c52Smaya 66789f464c52Smaya if (draw->has_user_indices) { 66797ec681f3Smrg unsigned start_offset = draw->index_size * sc->start; 66807ec681f3Smrg 66817ec681f3Smrg u_upload_data(ice->ctx.const_uploader, start_offset, 66827ec681f3Smrg sc->count * draw->index_size, 4, 66837ec681f3Smrg (char*)draw->index.user + start_offset, 66849f464c52Smaya &offset, &ice->state.last_res.index_buffer); 66857ec681f3Smrg offset -= start_offset; 66869f464c52Smaya } else { 66879f464c52Smaya struct iris_resource *res = (void *) draw->index.resource; 66889f464c52Smaya res->bind_history |= PIPE_BIND_INDEX_BUFFER; 66899f464c52Smaya 66909f464c52Smaya pipe_resource_reference(&ice->state.last_res.index_buffer, 66919f464c52Smaya draw->index.resource); 66929f464c52Smaya offset = 0; 66937ec681f3Smrg 66947ec681f3Smrg iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_VF_READ); 66959f464c52Smaya } 66969f464c52Smaya 66977ec681f3Smrg struct iris_genx_state *genx = ice->state.genx; 66989f464c52Smaya struct iris_bo *bo = iris_resource_bo(ice->state.last_res.index_buffer); 66999f464c52Smaya 67007ec681f3Smrg uint32_t ib_packet[GENX(3DSTATE_INDEX_BUFFER_length)]; 67017ec681f3Smrg iris_pack_command(GENX(3DSTATE_INDEX_BUFFER), ib_packet, ib) { 67029f464c52Smaya ib.IndexFormat = draw->index_size >> 1; 67037ec681f3Smrg ib.MOCS = iris_mocs(bo, &batch->screen->isl_dev, 67047ec681f3Smrg ISL_SURF_USAGE_INDEX_BUFFER_BIT); 67059f464c52Smaya ib.BufferSize = bo->size - offset; 67067ec681f3Smrg ib.BufferStartingAddress = ro_bo(NULL, bo->address + offset); 67077ec681f3Smrg#if GFX_VER >= 12 67087ec681f3Smrg ib.L3BypassDisable = true; 67097ec681f3Smrg#endif 67109f464c52Smaya } 67119f464c52Smaya 67127ec681f3Smrg if (memcmp(genx->last_index_buffer, ib_packet, sizeof(ib_packet)) != 0) { 67137ec681f3Smrg memcpy(genx->last_index_buffer, ib_packet, sizeof(ib_packet)); 67147ec681f3Smrg iris_batch_emit(batch, ib_packet, sizeof(ib_packet)); 67157ec681f3Smrg iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_VF_READ); 67167ec681f3Smrg } 67177ec681f3Smrg 67187ec681f3Smrg#if GFX_VER < 11 67199f464c52Smaya /* The VF cache key only uses 32-bits, see vertex buffer comment above */ 67207ec681f3Smrg uint16_t high_bits = bo->address >> 32ull; 67219f464c52Smaya if (high_bits != ice->state.last_index_bo_high_bits) { 67227ec681f3Smrg iris_emit_pipe_control_flush(batch, 67237ec681f3Smrg "workaround: VF cache 32-bit key [IB]", 67247ec681f3Smrg PIPE_CONTROL_VF_CACHE_INVALIDATE | 67257ec681f3Smrg PIPE_CONTROL_CS_STALL); 67269f464c52Smaya ice->state.last_index_bo_high_bits = high_bits; 67279f464c52Smaya } 67287ec681f3Smrg#endif 67299f464c52Smaya } 67309f464c52Smaya 67319f464c52Smaya#define _3DPRIM_END_OFFSET 0x2420 67329f464c52Smaya#define _3DPRIM_START_VERTEX 0x2430 67339f464c52Smaya#define _3DPRIM_VERTEX_COUNT 0x2434 67349f464c52Smaya#define _3DPRIM_INSTANCE_COUNT 0x2438 67359f464c52Smaya#define _3DPRIM_START_INSTANCE 0x243C 67369f464c52Smaya#define _3DPRIM_BASE_VERTEX 0x2440 67379f464c52Smaya 67387ec681f3Smrg if (indirect && !indirect->count_from_stream_output) { 67397ec681f3Smrg if (indirect->indirect_draw_count) { 67407ec681f3Smrg use_predicate = true; 67417ec681f3Smrg 67427ec681f3Smrg struct iris_bo *draw_count_bo = 67437ec681f3Smrg iris_resource_bo(indirect->indirect_draw_count); 67447ec681f3Smrg unsigned draw_count_offset = 67457ec681f3Smrg indirect->indirect_draw_count_offset; 67469f464c52Smaya 67477ec681f3Smrg if (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) { 67487ec681f3Smrg struct mi_builder b; 67497ec681f3Smrg mi_builder_init(&b, &batch->screen->devinfo, batch); 67507ec681f3Smrg 67517ec681f3Smrg /* comparison = draw id < draw count */ 67527ec681f3Smrg struct mi_value comparison = 67537ec681f3Smrg mi_ult(&b, mi_imm(drawid_offset), 67547ec681f3Smrg mi_mem32(ro_bo(draw_count_bo, draw_count_offset))); 67557ec681f3Smrg 67567ec681f3Smrg /* predicate = comparison & conditional rendering predicate */ 67577ec681f3Smrg mi_store(&b, mi_reg32(MI_PREDICATE_RESULT), 67587ec681f3Smrg mi_iand(&b, comparison, mi_reg32(CS_GPR(15)))); 67597ec681f3Smrg } else { 67607ec681f3Smrg uint32_t mi_predicate; 67617ec681f3Smrg 67627ec681f3Smrg /* Upload the id of the current primitive to MI_PREDICATE_SRC1. */ 67637ec681f3Smrg iris_load_register_imm64(batch, MI_PREDICATE_SRC1, drawid_offset); 67647ec681f3Smrg /* Upload the current draw count from the draw parameters buffer 67657ec681f3Smrg * to MI_PREDICATE_SRC0. 67667ec681f3Smrg */ 67677ec681f3Smrg iris_load_register_mem32(batch, MI_PREDICATE_SRC0, 67687ec681f3Smrg draw_count_bo, draw_count_offset); 67697ec681f3Smrg /* Zero the top 32-bits of MI_PREDICATE_SRC0 */ 67707ec681f3Smrg iris_load_register_imm32(batch, MI_PREDICATE_SRC0 + 4, 0); 67717ec681f3Smrg 67727ec681f3Smrg if (drawid_offset == 0) { 67737ec681f3Smrg mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV | 67747ec681f3Smrg MI_PREDICATE_COMBINEOP_SET | 67757ec681f3Smrg MI_PREDICATE_COMPAREOP_SRCS_EQUAL; 67767ec681f3Smrg } else { 67777ec681f3Smrg /* While draw_index < draw_count the predicate's result will be 67787ec681f3Smrg * (draw_index == draw_count) ^ TRUE = TRUE 67797ec681f3Smrg * When draw_index == draw_count the result is 67807ec681f3Smrg * (TRUE) ^ TRUE = FALSE 67817ec681f3Smrg * After this all results will be: 67827ec681f3Smrg * (FALSE) ^ FALSE = FALSE 67837ec681f3Smrg */ 67847ec681f3Smrg mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOAD | 67857ec681f3Smrg MI_PREDICATE_COMBINEOP_XOR | 67867ec681f3Smrg MI_PREDICATE_COMPAREOP_SRCS_EQUAL; 67877ec681f3Smrg } 67887ec681f3Smrg iris_batch_emit(batch, &mi_predicate, sizeof(uint32_t)); 67897ec681f3Smrg } 67907ec681f3Smrg } 67917ec681f3Smrg struct iris_bo *bo = iris_resource_bo(indirect->buffer); 67929f464c52Smaya assert(bo); 67939f464c52Smaya 67949f464c52Smaya iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 67959f464c52Smaya lrm.RegisterAddress = _3DPRIM_VERTEX_COUNT; 67967ec681f3Smrg lrm.MemoryAddress = ro_bo(bo, indirect->offset + 0); 67979f464c52Smaya } 67989f464c52Smaya iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 67999f464c52Smaya lrm.RegisterAddress = _3DPRIM_INSTANCE_COUNT; 68007ec681f3Smrg lrm.MemoryAddress = ro_bo(bo, indirect->offset + 4); 68019f464c52Smaya } 68029f464c52Smaya iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 68039f464c52Smaya lrm.RegisterAddress = _3DPRIM_START_VERTEX; 68047ec681f3Smrg lrm.MemoryAddress = ro_bo(bo, indirect->offset + 8); 68059f464c52Smaya } 68069f464c52Smaya if (draw->index_size) { 68079f464c52Smaya iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 68089f464c52Smaya lrm.RegisterAddress = _3DPRIM_BASE_VERTEX; 68097ec681f3Smrg lrm.MemoryAddress = ro_bo(bo, indirect->offset + 12); 68109f464c52Smaya } 68119f464c52Smaya iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 68129f464c52Smaya lrm.RegisterAddress = _3DPRIM_START_INSTANCE; 68137ec681f3Smrg lrm.MemoryAddress = ro_bo(bo, indirect->offset + 16); 68149f464c52Smaya } 68159f464c52Smaya } else { 68169f464c52Smaya iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 68179f464c52Smaya lrm.RegisterAddress = _3DPRIM_START_INSTANCE; 68187ec681f3Smrg lrm.MemoryAddress = ro_bo(bo, indirect->offset + 12); 68199f464c52Smaya } 68209f464c52Smaya iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { 68219f464c52Smaya lri.RegisterOffset = _3DPRIM_BASE_VERTEX; 68229f464c52Smaya lri.DataDWord = 0; 68239f464c52Smaya } 68249f464c52Smaya } 68257ec681f3Smrg } else if (indirect && indirect->count_from_stream_output) { 68269f464c52Smaya struct iris_stream_output_target *so = 68277ec681f3Smrg (void *) indirect->count_from_stream_output; 68289f464c52Smaya 68299f464c52Smaya /* XXX: Replace with actual cache tracking */ 68307ec681f3Smrg iris_emit_pipe_control_flush(batch, 68317ec681f3Smrg "draw count from stream output stall", 68327ec681f3Smrg PIPE_CONTROL_CS_STALL); 68339f464c52Smaya 68347ec681f3Smrg struct mi_builder b; 68357ec681f3Smrg mi_builder_init(&b, &batch->screen->devinfo, batch); 68367ec681f3Smrg 68377ec681f3Smrg struct iris_address addr = 68387ec681f3Smrg ro_bo(iris_resource_bo(so->offset.res), so->offset.offset); 68397ec681f3Smrg struct mi_value offset = 68407ec681f3Smrg mi_iadd_imm(&b, mi_mem32(addr), -so->base.buffer_offset); 68417ec681f3Smrg 68427ec681f3Smrg mi_store(&b, mi_reg32(_3DPRIM_VERTEX_COUNT), 68437ec681f3Smrg mi_udiv32_imm(&b, offset, so->stride)); 68449f464c52Smaya 68459f464c52Smaya _iris_emit_lri(batch, _3DPRIM_START_VERTEX, 0); 68469f464c52Smaya _iris_emit_lri(batch, _3DPRIM_BASE_VERTEX, 0); 68479f464c52Smaya _iris_emit_lri(batch, _3DPRIM_START_INSTANCE, 0); 68489f464c52Smaya _iris_emit_lri(batch, _3DPRIM_INSTANCE_COUNT, draw->instance_count); 68499f464c52Smaya } 68509f464c52Smaya 68517ec681f3Smrg iris_measure_snapshot(ice, batch, INTEL_SNAPSHOT_DRAW, draw, indirect, sc); 68527ec681f3Smrg 68539f464c52Smaya iris_emit_cmd(batch, GENX(3DPRIMITIVE), prim) { 68549f464c52Smaya prim.VertexAccessType = draw->index_size > 0 ? RANDOM : SEQUENTIAL; 68557ec681f3Smrg prim.PredicateEnable = use_predicate; 68569f464c52Smaya 68577ec681f3Smrg if (indirect) { 68589f464c52Smaya prim.IndirectParameterEnable = true; 68599f464c52Smaya } else { 68609f464c52Smaya prim.StartInstanceLocation = draw->start_instance; 68619f464c52Smaya prim.InstanceCount = draw->instance_count; 68627ec681f3Smrg prim.VertexCountPerInstance = sc->count; 68639f464c52Smaya 68647ec681f3Smrg prim.StartVertexLocation = sc->start; 68659f464c52Smaya 68669f464c52Smaya if (draw->index_size) { 68677ec681f3Smrg prim.BaseVertexLocation += sc->index_bias; 68689f464c52Smaya } 68699f464c52Smaya } 68709f464c52Smaya } 68717ec681f3Smrg 68727ec681f3Smrg iris_batch_sync_region_end(batch); 68739f464c52Smaya} 68749f464c52Smaya 68759f464c52Smayastatic void 68767ec681f3Smrgiris_load_indirect_location(struct iris_context *ice, 68777ec681f3Smrg struct iris_batch *batch, 68787ec681f3Smrg const struct pipe_grid_info *grid) 68799f464c52Smaya{ 68807ec681f3Smrg#define GPGPU_DISPATCHDIMX 0x2500 68817ec681f3Smrg#define GPGPU_DISPATCHDIMY 0x2504 68827ec681f3Smrg#define GPGPU_DISPATCHDIMZ 0x2508 68837ec681f3Smrg 68847ec681f3Smrg assert(grid->indirect); 68857ec681f3Smrg 68867ec681f3Smrg struct iris_state_ref *grid_size = &ice->state.grid_size; 68877ec681f3Smrg struct iris_bo *bo = iris_resource_bo(grid_size->res); 68887ec681f3Smrg iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 68897ec681f3Smrg lrm.RegisterAddress = GPGPU_DISPATCHDIMX; 68907ec681f3Smrg lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 0); 68917ec681f3Smrg } 68927ec681f3Smrg iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 68937ec681f3Smrg lrm.RegisterAddress = GPGPU_DISPATCHDIMY; 68947ec681f3Smrg lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 4); 68957ec681f3Smrg } 68967ec681f3Smrg iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 68977ec681f3Smrg lrm.RegisterAddress = GPGPU_DISPATCHDIMZ; 68987ec681f3Smrg lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 8); 68997ec681f3Smrg } 69007ec681f3Smrg} 69017ec681f3Smrg 69027ec681f3Smrg#if GFX_VERx10 >= 125 69037ec681f3Smrg 69047ec681f3Smrgstatic void 69057ec681f3Smrgiris_upload_compute_walker(struct iris_context *ice, 69067ec681f3Smrg struct iris_batch *batch, 69077ec681f3Smrg const struct pipe_grid_info *grid) 69087ec681f3Smrg{ 69097ec681f3Smrg const uint64_t stage_dirty = ice->state.stage_dirty; 69109f464c52Smaya struct iris_screen *screen = batch->screen; 69117ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 69129f464c52Smaya struct iris_binder *binder = &ice->state.binder; 69139f464c52Smaya struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE]; 69149f464c52Smaya struct iris_compiled_shader *shader = 69159f464c52Smaya ice->shaders.prog[MESA_SHADER_COMPUTE]; 69169f464c52Smaya struct brw_stage_prog_data *prog_data = shader->prog_data; 69179f464c52Smaya struct brw_cs_prog_data *cs_prog_data = (void *) prog_data; 69187ec681f3Smrg const struct brw_cs_dispatch_info dispatch = 69197ec681f3Smrg brw_cs_get_dispatch_info(devinfo, cs_prog_data, grid->block); 69209f464c52Smaya 69217ec681f3Smrg if (stage_dirty & IRIS_STAGE_DIRTY_CS) { 69227ec681f3Smrg iris_emit_cmd(batch, GENX(CFE_STATE), cfe) { 69237ec681f3Smrg cfe.MaximumNumberofThreads = 69247ec681f3Smrg devinfo->max_cs_threads * devinfo->subslice_total - 1; 69257ec681f3Smrg if (prog_data->total_scratch > 0) { 69267ec681f3Smrg cfe.ScratchSpaceBuffer = 69277ec681f3Smrg iris_get_scratch_surf(ice, prog_data->total_scratch)->offset >> 4; 69287ec681f3Smrg } 69297ec681f3Smrg } 69307ec681f3Smrg } 69319f464c52Smaya 69327ec681f3Smrg if (grid->indirect) 69337ec681f3Smrg iris_load_indirect_location(ice, batch, grid); 69347ec681f3Smrg 69357ec681f3Smrg iris_emit_cmd(batch, GENX(COMPUTE_WALKER), cw) { 69367ec681f3Smrg cw.IndirectParameterEnable = grid->indirect; 69377ec681f3Smrg cw.SIMDSize = dispatch.simd_size / 16; 69387ec681f3Smrg cw.LocalXMaximum = grid->block[0] - 1; 69397ec681f3Smrg cw.LocalYMaximum = grid->block[1] - 1; 69407ec681f3Smrg cw.LocalZMaximum = grid->block[2] - 1; 69417ec681f3Smrg cw.ThreadGroupIDXDimension = grid->grid[0]; 69427ec681f3Smrg cw.ThreadGroupIDYDimension = grid->grid[1]; 69437ec681f3Smrg cw.ThreadGroupIDZDimension = grid->grid[2]; 69447ec681f3Smrg cw.ExecutionMask = dispatch.right_mask; 69457ec681f3Smrg 69467ec681f3Smrg cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) { 69477ec681f3Smrg .KernelStartPointer = KSP(shader), 69487ec681f3Smrg .NumberofThreadsinGPGPUThreadGroup = dispatch.threads, 69497ec681f3Smrg .SharedLocalMemorySize = 69507ec681f3Smrg encode_slm_size(GFX_VER, prog_data->total_shared), 69517ec681f3Smrg .NumberOfBarriers = cs_prog_data->uses_barrier, 69527ec681f3Smrg .SamplerStatePointer = shs->sampler_table.offset, 69537ec681f3Smrg .BindingTablePointer = binder->bt_offset[MESA_SHADER_COMPUTE], 69547ec681f3Smrg }; 69559f464c52Smaya 69567ec681f3Smrg assert(brw_cs_push_const_total_size(cs_prog_data, dispatch.threads) == 0); 69577ec681f3Smrg } 69589f464c52Smaya 69597ec681f3Smrg} 69609f464c52Smaya 69617ec681f3Smrg#else /* #if GFX_VERx10 >= 125 */ 69629f464c52Smaya 69637ec681f3Smrgstatic void 69647ec681f3Smrgiris_upload_gpgpu_walker(struct iris_context *ice, 69657ec681f3Smrg struct iris_batch *batch, 69667ec681f3Smrg const struct pipe_grid_info *grid) 69677ec681f3Smrg{ 69687ec681f3Smrg const uint64_t stage_dirty = ice->state.stage_dirty; 69697ec681f3Smrg struct iris_screen *screen = batch->screen; 69707ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 69717ec681f3Smrg struct iris_binder *binder = &ice->state.binder; 69727ec681f3Smrg struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE]; 69737ec681f3Smrg struct iris_uncompiled_shader *ish = 69747ec681f3Smrg ice->shaders.uncompiled[MESA_SHADER_COMPUTE]; 69757ec681f3Smrg struct iris_compiled_shader *shader = 69767ec681f3Smrg ice->shaders.prog[MESA_SHADER_COMPUTE]; 69777ec681f3Smrg struct brw_stage_prog_data *prog_data = shader->prog_data; 69787ec681f3Smrg struct brw_cs_prog_data *cs_prog_data = (void *) prog_data; 69797ec681f3Smrg const struct brw_cs_dispatch_info dispatch = 69807ec681f3Smrg brw_cs_get_dispatch_info(devinfo, cs_prog_data, grid->block); 69819f464c52Smaya 69827ec681f3Smrg if ((stage_dirty & IRIS_STAGE_DIRTY_CS) || 69837ec681f3Smrg cs_prog_data->local_size[0] == 0 /* Variable local group size */) { 69847ec681f3Smrg /* The MEDIA_VFE_STATE documentation for Gfx8+ says: 69859f464c52Smaya * 69869f464c52Smaya * "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless 69879f464c52Smaya * the only bits that are changed are scoreboard related: Scoreboard 69889f464c52Smaya * Enable, Scoreboard Type, Scoreboard Mask, Scoreboard Delta. For 69899f464c52Smaya * these scoreboard related states, a MEDIA_STATE_FLUSH is 69909f464c52Smaya * sufficient." 69919f464c52Smaya */ 69927ec681f3Smrg iris_emit_pipe_control_flush(batch, 69937ec681f3Smrg "workaround: stall before MEDIA_VFE_STATE", 69947ec681f3Smrg PIPE_CONTROL_CS_STALL); 69959f464c52Smaya 69969f464c52Smaya iris_emit_cmd(batch, GENX(MEDIA_VFE_STATE), vfe) { 69979f464c52Smaya if (prog_data->total_scratch) { 69987ec681f3Smrg uint32_t scratch_addr = 69997ec681f3Smrg pin_scratch_space(ice, batch, prog_data, MESA_SHADER_COMPUTE); 70007ec681f3Smrg 70019f464c52Smaya vfe.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11; 70027ec681f3Smrg vfe.ScratchSpaceBasePointer = 70037ec681f3Smrg rw_bo(NULL, scratch_addr, IRIS_DOMAIN_NONE); 70049f464c52Smaya } 70059f464c52Smaya 70069f464c52Smaya vfe.MaximumNumberofThreads = 70077ec681f3Smrg devinfo->max_cs_threads * devinfo->subslice_total - 1; 70087ec681f3Smrg#if GFX_VER < 11 70099f464c52Smaya vfe.ResetGatewayTimer = 70109f464c52Smaya Resettingrelativetimerandlatchingtheglobaltimestamp; 70119f464c52Smaya#endif 70127ec681f3Smrg#if GFX_VER == 8 70139f464c52Smaya vfe.BypassGatewayControl = true; 70149f464c52Smaya#endif 70159f464c52Smaya vfe.NumberofURBEntries = 2; 70169f464c52Smaya vfe.URBEntryAllocationSize = 2; 70179f464c52Smaya 70189f464c52Smaya vfe.CURBEAllocationSize = 70197ec681f3Smrg ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads + 70209f464c52Smaya cs_prog_data->push.cross_thread.regs, 2); 70219f464c52Smaya } 70229f464c52Smaya } 70239f464c52Smaya 70249f464c52Smaya /* TODO: Combine subgroup-id with cbuf0 so we can push regular uniforms */ 70257ec681f3Smrg if ((stage_dirty & IRIS_STAGE_DIRTY_CS) || 70267ec681f3Smrg cs_prog_data->local_size[0] == 0 /* Variable local group size */) { 70277ec681f3Smrg uint32_t curbe_data_offset = 0; 70287ec681f3Smrg assert(cs_prog_data->push.cross_thread.dwords == 0 && 70297ec681f3Smrg cs_prog_data->push.per_thread.dwords == 1 && 70307ec681f3Smrg cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID); 70317ec681f3Smrg const unsigned push_const_size = 70327ec681f3Smrg brw_cs_push_const_total_size(cs_prog_data, dispatch.threads); 70337ec681f3Smrg uint32_t *curbe_data_map = 70347ec681f3Smrg stream_state(batch, ice->state.dynamic_uploader, 70357ec681f3Smrg &ice->state.last_res.cs_thread_ids, 70367ec681f3Smrg ALIGN(push_const_size, 64), 64, 70377ec681f3Smrg &curbe_data_offset); 70387ec681f3Smrg assert(curbe_data_map); 70397ec681f3Smrg memset(curbe_data_map, 0x5a, ALIGN(push_const_size, 64)); 70407ec681f3Smrg iris_fill_cs_push_const_buffer(cs_prog_data, dispatch.threads, 70417ec681f3Smrg curbe_data_map); 70427ec681f3Smrg 70439f464c52Smaya iris_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) { 70447ec681f3Smrg curbe.CURBETotalDataLength = ALIGN(push_const_size, 64); 70459f464c52Smaya curbe.CURBEDataStartAddress = curbe_data_offset; 70469f464c52Smaya } 70479f464c52Smaya } 70489f464c52Smaya 70497ec681f3Smrg for (unsigned i = 0; i < IRIS_MAX_GLOBAL_BINDINGS; i++) { 70507ec681f3Smrg struct pipe_resource *res = ice->state.global_bindings[i]; 70517ec681f3Smrg if (!res) 70527ec681f3Smrg continue; 70537ec681f3Smrg 70547ec681f3Smrg iris_use_pinned_bo(batch, iris_resource_bo(res), 70557ec681f3Smrg true, IRIS_DOMAIN_NONE); 70567ec681f3Smrg } 70577ec681f3Smrg 70587ec681f3Smrg if (stage_dirty & (IRIS_STAGE_DIRTY_SAMPLER_STATES_CS | 70597ec681f3Smrg IRIS_STAGE_DIRTY_BINDINGS_CS | 70607ec681f3Smrg IRIS_STAGE_DIRTY_CONSTANTS_CS | 70617ec681f3Smrg IRIS_STAGE_DIRTY_CS)) { 70629f464c52Smaya uint32_t desc[GENX(INTERFACE_DESCRIPTOR_DATA_length)]; 70639f464c52Smaya 70649f464c52Smaya iris_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), desc, idd) { 70657ec681f3Smrg idd.SharedLocalMemorySize = 70667ec681f3Smrg encode_slm_size(GFX_VER, ish->kernel_shared_size); 70677ec681f3Smrg idd.KernelStartPointer = 70687ec681f3Smrg KSP(shader) + brw_cs_prog_data_prog_offset(cs_prog_data, 70697ec681f3Smrg dispatch.simd_size); 70709f464c52Smaya idd.SamplerStatePointer = shs->sampler_table.offset; 70719f464c52Smaya idd.BindingTablePointer = binder->bt_offset[MESA_SHADER_COMPUTE]; 70727ec681f3Smrg idd.NumberofThreadsinGPGPUThreadGroup = dispatch.threads; 70739f464c52Smaya } 70749f464c52Smaya 70759f464c52Smaya for (int i = 0; i < GENX(INTERFACE_DESCRIPTOR_DATA_length); i++) 70769f464c52Smaya desc[i] |= ((uint32_t *) shader->derived_data)[i]; 70779f464c52Smaya 70789f464c52Smaya iris_emit_cmd(batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), load) { 70799f464c52Smaya load.InterfaceDescriptorTotalLength = 70809f464c52Smaya GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t); 70819f464c52Smaya load.InterfaceDescriptorDataStartAddress = 70829f464c52Smaya emit_state(batch, ice->state.dynamic_uploader, 70837ec681f3Smrg &ice->state.last_res.cs_desc, desc, sizeof(desc), 64); 70849f464c52Smaya } 70859f464c52Smaya } 70869f464c52Smaya 70877ec681f3Smrg if (grid->indirect) 70887ec681f3Smrg iris_load_indirect_location(ice, batch, grid); 70899f464c52Smaya 70907ec681f3Smrg iris_measure_snapshot(ice, batch, INTEL_SNAPSHOT_COMPUTE, NULL, NULL, NULL); 70919f464c52Smaya 70929f464c52Smaya iris_emit_cmd(batch, GENX(GPGPU_WALKER), ggw) { 70939f464c52Smaya ggw.IndirectParameterEnable = grid->indirect != NULL; 70947ec681f3Smrg ggw.SIMDSize = dispatch.simd_size / 16; 70959f464c52Smaya ggw.ThreadDepthCounterMaximum = 0; 70969f464c52Smaya ggw.ThreadHeightCounterMaximum = 0; 70977ec681f3Smrg ggw.ThreadWidthCounterMaximum = dispatch.threads - 1; 70989f464c52Smaya ggw.ThreadGroupIDXDimension = grid->grid[0]; 70999f464c52Smaya ggw.ThreadGroupIDYDimension = grid->grid[1]; 71009f464c52Smaya ggw.ThreadGroupIDZDimension = grid->grid[2]; 71017ec681f3Smrg ggw.RightExecutionMask = dispatch.right_mask; 71029f464c52Smaya ggw.BottomExecutionMask = 0xffffffff; 71039f464c52Smaya } 71049f464c52Smaya 71059f464c52Smaya iris_emit_cmd(batch, GENX(MEDIA_STATE_FLUSH), msf); 71067ec681f3Smrg} 71079f464c52Smaya 71087ec681f3Smrg#endif /* #if GFX_VERx10 >= 125 */ 71097ec681f3Smrg 71107ec681f3Smrgstatic void 71117ec681f3Smrgiris_upload_compute_state(struct iris_context *ice, 71127ec681f3Smrg struct iris_batch *batch, 71137ec681f3Smrg const struct pipe_grid_info *grid) 71147ec681f3Smrg{ 71157ec681f3Smrg const uint64_t stage_dirty = ice->state.stage_dirty; 71167ec681f3Smrg struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE]; 71177ec681f3Smrg struct iris_compiled_shader *shader = 71187ec681f3Smrg ice->shaders.prog[MESA_SHADER_COMPUTE]; 71197ec681f3Smrg 71207ec681f3Smrg iris_batch_sync_region_start(batch); 71217ec681f3Smrg 71227ec681f3Smrg /* Always pin the binder. If we're emitting new binding table pointers, 71237ec681f3Smrg * we need it. If not, we're probably inheriting old tables via the 71247ec681f3Smrg * context, and need it anyway. Since true zero-bindings cases are 71257ec681f3Smrg * practically non-existent, just pin it and avoid last_res tracking. 71267ec681f3Smrg */ 71277ec681f3Smrg iris_use_pinned_bo(batch, ice->state.binder.bo, false, IRIS_DOMAIN_NONE); 71287ec681f3Smrg 71297ec681f3Smrg if (((stage_dirty & IRIS_STAGE_DIRTY_CONSTANTS_CS) && 71307ec681f3Smrg shs->sysvals_need_upload) || 71317ec681f3Smrg shader->kernel_input_size > 0) 71327ec681f3Smrg upload_sysvals(ice, MESA_SHADER_COMPUTE, grid); 71337ec681f3Smrg 71347ec681f3Smrg if (stage_dirty & IRIS_STAGE_DIRTY_BINDINGS_CS) 71357ec681f3Smrg iris_populate_binding_table(ice, batch, MESA_SHADER_COMPUTE, false); 71367ec681f3Smrg 71377ec681f3Smrg if (stage_dirty & IRIS_STAGE_DIRTY_SAMPLER_STATES_CS) 71387ec681f3Smrg iris_upload_sampler_states(ice, MESA_SHADER_COMPUTE); 71397ec681f3Smrg 71407ec681f3Smrg iris_use_optional_res(batch, shs->sampler_table.res, false, 71417ec681f3Smrg IRIS_DOMAIN_NONE); 71427ec681f3Smrg iris_use_pinned_bo(batch, iris_resource_bo(shader->assembly.res), false, 71437ec681f3Smrg IRIS_DOMAIN_NONE); 71447ec681f3Smrg 71457ec681f3Smrg if (ice->state.need_border_colors) 71467ec681f3Smrg iris_use_pinned_bo(batch, ice->state.border_color_pool.bo, false, 71477ec681f3Smrg IRIS_DOMAIN_NONE); 71487ec681f3Smrg 71497ec681f3Smrg#if GFX_VER >= 12 71507ec681f3Smrg genX(invalidate_aux_map_state)(batch); 71517ec681f3Smrg#endif 71527ec681f3Smrg 71537ec681f3Smrg#if GFX_VERx10 >= 125 71547ec681f3Smrg iris_upload_compute_walker(ice, batch, grid); 71557ec681f3Smrg#else 71567ec681f3Smrg iris_upload_gpgpu_walker(ice, batch, grid); 71577ec681f3Smrg#endif 71587ec681f3Smrg 71597ec681f3Smrg if (!batch->contains_draw_with_next_seqno) { 71609f464c52Smaya iris_restore_compute_saved_bos(ice, batch, grid); 71617ec681f3Smrg batch->contains_draw_with_next_seqno = batch->contains_draw = true; 71629f464c52Smaya } 71637ec681f3Smrg 71647ec681f3Smrg iris_batch_sync_region_end(batch); 71659f464c52Smaya} 71669f464c52Smaya 71679f464c52Smaya/** 71689f464c52Smaya * State module teardown. 71699f464c52Smaya */ 71709f464c52Smayastatic void 71719f464c52Smayairis_destroy_state(struct iris_context *ice) 71729f464c52Smaya{ 71739f464c52Smaya struct iris_genx_state *genx = ice->state.genx; 71749f464c52Smaya 71757ec681f3Smrg pipe_resource_reference(&ice->draw.draw_params.res, NULL); 71767ec681f3Smrg pipe_resource_reference(&ice->draw.derived_draw_params.res, NULL); 71777ec681f3Smrg 71787ec681f3Smrg /* Loop over all VBOs, including ones for draw parameters */ 71797ec681f3Smrg for (unsigned i = 0; i < ARRAY_SIZE(genx->vertex_buffers); i++) { 71809f464c52Smaya pipe_resource_reference(&genx->vertex_buffers[i].resource, NULL); 71819f464c52Smaya } 71827ec681f3Smrg 71839f464c52Smaya free(ice->state.genx); 71849f464c52Smaya 71857ec681f3Smrg for (int i = 0; i < 4; i++) { 71867ec681f3Smrg pipe_so_target_reference(&ice->state.so_target[i], NULL); 71877ec681f3Smrg } 71887ec681f3Smrg 71899f464c52Smaya for (unsigned i = 0; i < ice->state.framebuffer.nr_cbufs; i++) { 71909f464c52Smaya pipe_surface_reference(&ice->state.framebuffer.cbufs[i], NULL); 71919f464c52Smaya } 71929f464c52Smaya pipe_surface_reference(&ice->state.framebuffer.zsbuf, NULL); 71939f464c52Smaya 71949f464c52Smaya for (int stage = 0; stage < MESA_SHADER_STAGES; stage++) { 71959f464c52Smaya struct iris_shader_state *shs = &ice->state.shaders[stage]; 71969f464c52Smaya pipe_resource_reference(&shs->sampler_table.res, NULL); 71979f464c52Smaya for (int i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { 71989f464c52Smaya pipe_resource_reference(&shs->constbuf[i].buffer, NULL); 71999f464c52Smaya pipe_resource_reference(&shs->constbuf_surf_state[i].res, NULL); 72009f464c52Smaya } 72019f464c52Smaya for (int i = 0; i < PIPE_MAX_SHADER_IMAGES; i++) { 72029f464c52Smaya pipe_resource_reference(&shs->image[i].base.resource, NULL); 72037ec681f3Smrg pipe_resource_reference(&shs->image[i].surface_state.ref.res, NULL); 72047ec681f3Smrg free(shs->image[i].surface_state.cpu); 72059f464c52Smaya } 72069f464c52Smaya for (int i = 0; i < PIPE_MAX_SHADER_BUFFERS; i++) { 72079f464c52Smaya pipe_resource_reference(&shs->ssbo[i].buffer, NULL); 72089f464c52Smaya pipe_resource_reference(&shs->ssbo_surf_state[i].res, NULL); 72099f464c52Smaya } 72109f464c52Smaya for (int i = 0; i < IRIS_MAX_TEXTURE_SAMPLERS; i++) { 72119f464c52Smaya pipe_sampler_view_reference((struct pipe_sampler_view **) 72129f464c52Smaya &shs->textures[i], NULL); 72139f464c52Smaya } 72149f464c52Smaya } 72159f464c52Smaya 72169f464c52Smaya pipe_resource_reference(&ice->state.grid_size.res, NULL); 72179f464c52Smaya pipe_resource_reference(&ice->state.grid_surf_state.res, NULL); 72189f464c52Smaya 72199f464c52Smaya pipe_resource_reference(&ice->state.null_fb.res, NULL); 72209f464c52Smaya pipe_resource_reference(&ice->state.unbound_tex.res, NULL); 72219f464c52Smaya 72229f464c52Smaya pipe_resource_reference(&ice->state.last_res.cc_vp, NULL); 72239f464c52Smaya pipe_resource_reference(&ice->state.last_res.sf_cl_vp, NULL); 72249f464c52Smaya pipe_resource_reference(&ice->state.last_res.color_calc, NULL); 72259f464c52Smaya pipe_resource_reference(&ice->state.last_res.scissor, NULL); 72269f464c52Smaya pipe_resource_reference(&ice->state.last_res.blend, NULL); 72279f464c52Smaya pipe_resource_reference(&ice->state.last_res.index_buffer, NULL); 72287ec681f3Smrg pipe_resource_reference(&ice->state.last_res.cs_thread_ids, NULL); 72297ec681f3Smrg pipe_resource_reference(&ice->state.last_res.cs_desc, NULL); 72309f464c52Smaya} 72319f464c52Smaya 72329f464c52Smaya/* ------------------------------------------------------------------- */ 72339f464c52Smaya 72349f464c52Smayastatic void 72359f464c52Smayairis_rebind_buffer(struct iris_context *ice, 72367ec681f3Smrg struct iris_resource *res) 72379f464c52Smaya{ 72389f464c52Smaya struct pipe_context *ctx = &ice->ctx; 72399f464c52Smaya struct iris_genx_state *genx = ice->state.genx; 72409f464c52Smaya 72417ec681f3Smrg assert(res->base.b.target == PIPE_BUFFER); 72429f464c52Smaya 72439f464c52Smaya /* Buffers can't be framebuffer attachments, nor display related, 72449f464c52Smaya * and we don't have upstream Clover support. 72459f464c52Smaya */ 72469f464c52Smaya assert(!(res->bind_history & (PIPE_BIND_DEPTH_STENCIL | 72479f464c52Smaya PIPE_BIND_RENDER_TARGET | 72489f464c52Smaya PIPE_BIND_BLENDABLE | 72499f464c52Smaya PIPE_BIND_DISPLAY_TARGET | 72509f464c52Smaya PIPE_BIND_CURSOR | 72519f464c52Smaya PIPE_BIND_COMPUTE_RESOURCE | 72529f464c52Smaya PIPE_BIND_GLOBAL))); 72539f464c52Smaya 72549f464c52Smaya if (res->bind_history & PIPE_BIND_VERTEX_BUFFER) { 72559f464c52Smaya uint64_t bound_vbs = ice->state.bound_vertex_buffers; 72569f464c52Smaya while (bound_vbs) { 72579f464c52Smaya const int i = u_bit_scan64(&bound_vbs); 72589f464c52Smaya struct iris_vertex_buffer_state *state = &genx->vertex_buffers[i]; 72599f464c52Smaya 72609f464c52Smaya /* Update the CPU struct */ 72619f464c52Smaya STATIC_ASSERT(GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_start) == 32); 72629f464c52Smaya STATIC_ASSERT(GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_bits) == 64); 72639f464c52Smaya uint64_t *addr = (uint64_t *) &state->state[1]; 72647ec681f3Smrg struct iris_bo *bo = iris_resource_bo(state->resource); 72659f464c52Smaya 72667ec681f3Smrg if (*addr != bo->address + state->offset) { 72677ec681f3Smrg *addr = bo->address + state->offset; 72687ec681f3Smrg ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS | 72697ec681f3Smrg IRIS_DIRTY_VERTEX_BUFFER_FLUSHES; 72709f464c52Smaya } 72719f464c52Smaya } 72729f464c52Smaya } 72739f464c52Smaya 72747ec681f3Smrg /* We don't need to handle PIPE_BIND_INDEX_BUFFER here: we re-emit 72757ec681f3Smrg * the 3DSTATE_INDEX_BUFFER packet whenever the address changes. 72767ec681f3Smrg * 72777ec681f3Smrg * There is also no need to handle these: 72789f464c52Smaya * - PIPE_BIND_COMMAND_ARGS_BUFFER (emitted for every indirect draw) 72799f464c52Smaya * - PIPE_BIND_QUERY_BUFFER (no persistent state references) 72809f464c52Smaya */ 72819f464c52Smaya 72829f464c52Smaya if (res->bind_history & PIPE_BIND_STREAM_OUTPUT) { 72837ec681f3Smrg uint32_t *so_buffers = genx->so_buffers; 72847ec681f3Smrg for (unsigned i = 0; i < 4; i++, 72857ec681f3Smrg so_buffers += GENX(3DSTATE_SO_BUFFER_length)) { 72867ec681f3Smrg 72877ec681f3Smrg /* There are no other fields in bits 127:64 */ 72887ec681f3Smrg uint64_t *addr = (uint64_t *) &so_buffers[2]; 72897ec681f3Smrg STATIC_ASSERT(GENX(3DSTATE_SO_BUFFER_SurfaceBaseAddress_start) == 66); 72907ec681f3Smrg STATIC_ASSERT(GENX(3DSTATE_SO_BUFFER_SurfaceBaseAddress_bits) == 46); 72917ec681f3Smrg 72927ec681f3Smrg struct pipe_stream_output_target *tgt = ice->state.so_target[i]; 72937ec681f3Smrg if (tgt) { 72947ec681f3Smrg struct iris_bo *bo = iris_resource_bo(tgt->buffer); 72957ec681f3Smrg if (*addr != bo->address + tgt->buffer_offset) { 72967ec681f3Smrg *addr = bo->address + tgt->buffer_offset; 72977ec681f3Smrg ice->state.dirty |= IRIS_DIRTY_SO_BUFFERS; 72987ec681f3Smrg } 72997ec681f3Smrg } 73007ec681f3Smrg } 73019f464c52Smaya } 73029f464c52Smaya 73039f464c52Smaya for (int s = MESA_SHADER_VERTEX; s < MESA_SHADER_STAGES; s++) { 73049f464c52Smaya struct iris_shader_state *shs = &ice->state.shaders[s]; 73059f464c52Smaya enum pipe_shader_type p_stage = stage_to_pipe(s); 73069f464c52Smaya 73077ec681f3Smrg if (!(res->bind_stages & (1 << s))) 73087ec681f3Smrg continue; 73097ec681f3Smrg 73109f464c52Smaya if (res->bind_history & PIPE_BIND_CONSTANT_BUFFER) { 73119f464c52Smaya /* Skip constant buffer 0, it's for regular uniforms, not UBOs */ 73129f464c52Smaya uint32_t bound_cbufs = shs->bound_cbufs & ~1u; 73139f464c52Smaya while (bound_cbufs) { 73149f464c52Smaya const int i = u_bit_scan(&bound_cbufs); 73159f464c52Smaya struct pipe_shader_buffer *cbuf = &shs->constbuf[i]; 73169f464c52Smaya struct iris_state_ref *surf_state = &shs->constbuf_surf_state[i]; 73179f464c52Smaya 73189f464c52Smaya if (res->bo == iris_resource_bo(cbuf->buffer)) { 73197ec681f3Smrg pipe_resource_reference(&surf_state->res, NULL); 73207ec681f3Smrg shs->dirty_cbufs |= 1u << i; 73217ec681f3Smrg ice->state.dirty |= (IRIS_DIRTY_RENDER_MISC_BUFFER_FLUSHES | 73227ec681f3Smrg IRIS_DIRTY_COMPUTE_MISC_BUFFER_FLUSHES); 73237ec681f3Smrg ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_VS << s; 73249f464c52Smaya } 73259f464c52Smaya } 73269f464c52Smaya } 73279f464c52Smaya 73289f464c52Smaya if (res->bind_history & PIPE_BIND_SHADER_BUFFER) { 73299f464c52Smaya uint32_t bound_ssbos = shs->bound_ssbos; 73309f464c52Smaya while (bound_ssbos) { 73319f464c52Smaya const int i = u_bit_scan(&bound_ssbos); 73329f464c52Smaya struct pipe_shader_buffer *ssbo = &shs->ssbo[i]; 73339f464c52Smaya 73349f464c52Smaya if (res->bo == iris_resource_bo(ssbo->buffer)) { 73359f464c52Smaya struct pipe_shader_buffer buf = { 73367ec681f3Smrg .buffer = &res->base.b, 73379f464c52Smaya .buffer_offset = ssbo->buffer_offset, 73389f464c52Smaya .buffer_size = ssbo->buffer_size, 73399f464c52Smaya }; 73409f464c52Smaya iris_set_shader_buffers(ctx, p_stage, i, 1, &buf, 73419f464c52Smaya (shs->writable_ssbos >> i) & 1); 73429f464c52Smaya } 73439f464c52Smaya } 73449f464c52Smaya } 73459f464c52Smaya 73469f464c52Smaya if (res->bind_history & PIPE_BIND_SAMPLER_VIEW) { 73479f464c52Smaya uint32_t bound_sampler_views = shs->bound_sampler_views; 73489f464c52Smaya while (bound_sampler_views) { 73499f464c52Smaya const int i = u_bit_scan(&bound_sampler_views); 73509f464c52Smaya struct iris_sampler_view *isv = shs->textures[i]; 73517ec681f3Smrg struct iris_bo *bo = isv->res->bo; 73529f464c52Smaya 73537ec681f3Smrg if (update_surface_state_addrs(ice->state.surface_uploader, 73547ec681f3Smrg &isv->surface_state, bo)) { 73557ec681f3Smrg ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_VS << s; 73569f464c52Smaya } 73579f464c52Smaya } 73589f464c52Smaya } 73599f464c52Smaya 73609f464c52Smaya if (res->bind_history & PIPE_BIND_SHADER_IMAGE) { 73619f464c52Smaya uint32_t bound_image_views = shs->bound_image_views; 73629f464c52Smaya while (bound_image_views) { 73639f464c52Smaya const int i = u_bit_scan(&bound_image_views); 73649f464c52Smaya struct iris_image_view *iv = &shs->image[i]; 73657ec681f3Smrg struct iris_bo *bo = iris_resource_bo(iv->base.resource); 73669f464c52Smaya 73677ec681f3Smrg if (update_surface_state_addrs(ice->state.surface_uploader, 73687ec681f3Smrg &iv->surface_state, bo)) { 73697ec681f3Smrg ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_VS << s; 73709f464c52Smaya } 73719f464c52Smaya } 73729f464c52Smaya } 73739f464c52Smaya } 73749f464c52Smaya} 73759f464c52Smaya 73769f464c52Smaya/* ------------------------------------------------------------------- */ 73779f464c52Smaya 73787ec681f3Smrg/** 73797ec681f3Smrg * Introduce a batch synchronization boundary, and update its cache coherency 73807ec681f3Smrg * status to reflect the execution of a PIPE_CONTROL command with the 73817ec681f3Smrg * specified flags. 73827ec681f3Smrg */ 73839f464c52Smayastatic void 73847ec681f3Smrgbatch_mark_sync_for_pipe_control(struct iris_batch *batch, uint32_t flags) 73859f464c52Smaya{ 73867ec681f3Smrg iris_batch_sync_boundary(batch); 73879f464c52Smaya 73887ec681f3Smrg if ((flags & PIPE_CONTROL_CS_STALL)) { 73897ec681f3Smrg if ((flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) 73907ec681f3Smrg iris_batch_mark_flush_sync(batch, IRIS_DOMAIN_RENDER_WRITE); 73919f464c52Smaya 73927ec681f3Smrg if ((flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH)) 73937ec681f3Smrg iris_batch_mark_flush_sync(batch, IRIS_DOMAIN_DEPTH_WRITE); 73949f464c52Smaya 73957ec681f3Smrg if ((flags & PIPE_CONTROL_DATA_CACHE_FLUSH)) 73967ec681f3Smrg iris_batch_mark_flush_sync(batch, IRIS_DOMAIN_DATA_WRITE); 73979f464c52Smaya 73987ec681f3Smrg if ((flags & PIPE_CONTROL_FLUSH_ENABLE)) 73997ec681f3Smrg iris_batch_mark_flush_sync(batch, IRIS_DOMAIN_OTHER_WRITE); 74009f464c52Smaya 74017ec681f3Smrg if ((flags & (PIPE_CONTROL_CACHE_FLUSH_BITS | 74027ec681f3Smrg PIPE_CONTROL_STALL_AT_SCOREBOARD))) { 74037ec681f3Smrg iris_batch_mark_flush_sync(batch, IRIS_DOMAIN_VF_READ); 74047ec681f3Smrg iris_batch_mark_flush_sync(batch, IRIS_DOMAIN_OTHER_READ); 74057ec681f3Smrg } 74069f464c52Smaya } 74079f464c52Smaya 74087ec681f3Smrg if ((flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) 74097ec681f3Smrg iris_batch_mark_invalidate_sync(batch, IRIS_DOMAIN_RENDER_WRITE); 74109f464c52Smaya 74117ec681f3Smrg if ((flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH)) 74127ec681f3Smrg iris_batch_mark_invalidate_sync(batch, IRIS_DOMAIN_DEPTH_WRITE); 74139f464c52Smaya 74147ec681f3Smrg if ((flags & PIPE_CONTROL_DATA_CACHE_FLUSH)) 74157ec681f3Smrg iris_batch_mark_invalidate_sync(batch, IRIS_DOMAIN_DATA_WRITE); 74169f464c52Smaya 74177ec681f3Smrg if ((flags & PIPE_CONTROL_FLUSH_ENABLE)) 74187ec681f3Smrg iris_batch_mark_invalidate_sync(batch, IRIS_DOMAIN_OTHER_WRITE); 74199f464c52Smaya 74207ec681f3Smrg if ((flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) 74217ec681f3Smrg iris_batch_mark_invalidate_sync(batch, IRIS_DOMAIN_VF_READ); 74229f464c52Smaya 74237ec681f3Smrg if ((flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE) && 74247ec681f3Smrg (flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE)) 74257ec681f3Smrg iris_batch_mark_invalidate_sync(batch, IRIS_DOMAIN_OTHER_READ); 74267ec681f3Smrg} 74279f464c52Smaya 74289f464c52Smayastatic unsigned 74299f464c52Smayaflags_to_post_sync_op(uint32_t flags) 74309f464c52Smaya{ 74319f464c52Smaya if (flags & PIPE_CONTROL_WRITE_IMMEDIATE) 74329f464c52Smaya return WriteImmediateData; 74339f464c52Smaya 74349f464c52Smaya if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT) 74359f464c52Smaya return WritePSDepthCount; 74369f464c52Smaya 74379f464c52Smaya if (flags & PIPE_CONTROL_WRITE_TIMESTAMP) 74389f464c52Smaya return WriteTimestamp; 74399f464c52Smaya 74409f464c52Smaya return 0; 74419f464c52Smaya} 74429f464c52Smaya 74439f464c52Smaya/** 74449f464c52Smaya * Do the given flags have a Post Sync or LRI Post Sync operation? 74459f464c52Smaya */ 74469f464c52Smayastatic enum pipe_control_flags 74479f464c52Smayaget_post_sync_flags(enum pipe_control_flags flags) 74489f464c52Smaya{ 74499f464c52Smaya flags &= PIPE_CONTROL_WRITE_IMMEDIATE | 74509f464c52Smaya PIPE_CONTROL_WRITE_DEPTH_COUNT | 74519f464c52Smaya PIPE_CONTROL_WRITE_TIMESTAMP | 74529f464c52Smaya PIPE_CONTROL_LRI_POST_SYNC_OP; 74539f464c52Smaya 74549f464c52Smaya /* Only one "Post Sync Op" is allowed, and it's mutually exclusive with 74559f464c52Smaya * "LRI Post Sync Operation". So more than one bit set would be illegal. 74569f464c52Smaya */ 74579f464c52Smaya assert(util_bitcount(flags) <= 1); 74589f464c52Smaya 74599f464c52Smaya return flags; 74609f464c52Smaya} 74619f464c52Smaya 74629f464c52Smaya#define IS_COMPUTE_PIPELINE(batch) (batch->name == IRIS_BATCH_COMPUTE) 74639f464c52Smaya 74649f464c52Smaya/** 74659f464c52Smaya * Emit a series of PIPE_CONTROL commands, taking into account any 74669f464c52Smaya * workarounds necessary to actually accomplish the caller's request. 74679f464c52Smaya * 74689f464c52Smaya * Unless otherwise noted, spec quotations in this function come from: 74699f464c52Smaya * 74709f464c52Smaya * Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming 74719f464c52Smaya * Restrictions for PIPE_CONTROL. 74729f464c52Smaya * 74739f464c52Smaya * You should not use this function directly. Use the helpers in 74749f464c52Smaya * iris_pipe_control.c instead, which may split the pipe control further. 74759f464c52Smaya */ 74769f464c52Smayastatic void 74777ec681f3Smrgiris_emit_raw_pipe_control(struct iris_batch *batch, 74787ec681f3Smrg const char *reason, 74797ec681f3Smrg uint32_t flags, 74807ec681f3Smrg struct iris_bo *bo, 74817ec681f3Smrg uint32_t offset, 74827ec681f3Smrg uint64_t imm) 74837ec681f3Smrg{ 74847ec681f3Smrg UNUSED const struct intel_device_info *devinfo = &batch->screen->devinfo; 74859f464c52Smaya enum pipe_control_flags post_sync_flags = get_post_sync_flags(flags); 74869f464c52Smaya enum pipe_control_flags non_lri_post_sync_flags = 74879f464c52Smaya post_sync_flags & ~PIPE_CONTROL_LRI_POST_SYNC_OP; 74889f464c52Smaya 74899f464c52Smaya /* Recursive PIPE_CONTROL workarounds -------------------------------- 74909f464c52Smaya * (http://knowyourmeme.com/memes/xzibit-yo-dawg) 74919f464c52Smaya * 74929f464c52Smaya * We do these first because we want to look at the original operation, 74939f464c52Smaya * rather than any workarounds we set. 74949f464c52Smaya */ 74957ec681f3Smrg if (GFX_VER == 9 && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) { 74969f464c52Smaya /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description 74979f464c52Smaya * lists several workarounds: 74989f464c52Smaya * 74999f464c52Smaya * "Project: SKL, KBL, BXT 75009f464c52Smaya * 75019f464c52Smaya * If the VF Cache Invalidation Enable is set to a 1 in a 75029f464c52Smaya * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields 75039f464c52Smaya * sets to 0, with the VF Cache Invalidation Enable set to 0 75049f464c52Smaya * needs to be sent prior to the PIPE_CONTROL with VF Cache 75059f464c52Smaya * Invalidation Enable set to a 1." 75069f464c52Smaya */ 75077ec681f3Smrg iris_emit_raw_pipe_control(batch, 75087ec681f3Smrg "workaround: recursive VF cache invalidate", 75097ec681f3Smrg 0, NULL, 0, 0); 75107ec681f3Smrg } 75117ec681f3Smrg 75127ec681f3Smrg /* Wa_1409226450, Wait for EU to be idle before pipe control which 75137ec681f3Smrg * invalidates the instruction cache 75147ec681f3Smrg */ 75157ec681f3Smrg if (GFX_VER == 12 && (flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE)) { 75167ec681f3Smrg iris_emit_raw_pipe_control(batch, 75177ec681f3Smrg "workaround: CS stall before instruction " 75187ec681f3Smrg "cache invalidate", 75197ec681f3Smrg PIPE_CONTROL_CS_STALL | 75207ec681f3Smrg PIPE_CONTROL_STALL_AT_SCOREBOARD, bo, offset, 75217ec681f3Smrg imm); 75229f464c52Smaya } 75239f464c52Smaya 75247ec681f3Smrg if ((GFX_VER == 9 || (GFX_VER == 12 && devinfo->revision == 0 /* A0*/)) && 75257ec681f3Smrg IS_COMPUTE_PIPELINE(batch) && post_sync_flags) { 75269f464c52Smaya /* Project: SKL / Argument: LRI Post Sync Operation [23] 75279f464c52Smaya * 75289f464c52Smaya * "PIPECONTROL command with “Command Streamer Stall Enable” must be 75299f464c52Smaya * programmed prior to programming a PIPECONTROL command with "LRI 75309f464c52Smaya * Post Sync Operation" in GPGPU mode of operation (i.e when 75319f464c52Smaya * PIPELINE_SELECT command is set to GPGPU mode of operation)." 75329f464c52Smaya * 75339f464c52Smaya * The same text exists a few rows below for Post Sync Op. 75347ec681f3Smrg * 75357ec681f3Smrg * On Gfx12 this is Wa_1607156449. 75369f464c52Smaya */ 75377ec681f3Smrg iris_emit_raw_pipe_control(batch, 75387ec681f3Smrg "workaround: CS stall before gpgpu post-sync", 75397ec681f3Smrg PIPE_CONTROL_CS_STALL, bo, offset, imm); 75409f464c52Smaya } 75419f464c52Smaya 75429f464c52Smaya /* "Flush Types" workarounds --------------------------------------------- 75439f464c52Smaya * We do these now because they may add post-sync operations or CS stalls. 75449f464c52Smaya */ 75459f464c52Smaya 75467ec681f3Smrg if (GFX_VER < 11 && flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) { 75479f464c52Smaya /* Project: BDW, SKL+ (stopping at CNL) / Argument: VF Invalidate 75489f464c52Smaya * 75499f464c52Smaya * "'Post Sync Operation' must be enabled to 'Write Immediate Data' or 75509f464c52Smaya * 'Write PS Depth Count' or 'Write Timestamp'." 75519f464c52Smaya */ 75529f464c52Smaya if (!bo) { 75539f464c52Smaya flags |= PIPE_CONTROL_WRITE_IMMEDIATE; 75549f464c52Smaya post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE; 75559f464c52Smaya non_lri_post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE; 75567ec681f3Smrg bo = batch->screen->workaround_address.bo; 75577ec681f3Smrg offset = batch->screen->workaround_address.offset; 75589f464c52Smaya } 75599f464c52Smaya } 75609f464c52Smaya 75619f464c52Smaya if (flags & PIPE_CONTROL_DEPTH_STALL) { 75629f464c52Smaya /* From the PIPE_CONTROL instruction table, bit 13 (Depth Stall Enable): 75639f464c52Smaya * 75649f464c52Smaya * "This bit must be DISABLED for operations other than writing 75659f464c52Smaya * PS_DEPTH_COUNT." 75669f464c52Smaya * 75679f464c52Smaya * This seems like nonsense. An Ivybridge workaround requires us to 75689f464c52Smaya * emit a PIPE_CONTROL with a depth stall and write immediate post-sync 75697ec681f3Smrg * operation. Gfx8+ requires us to emit depth stalls and depth cache 75709f464c52Smaya * flushes together. So, it's hard to imagine this means anything other 75719f464c52Smaya * than "we originally intended this to be used for PS_DEPTH_COUNT". 75729f464c52Smaya * 75739f464c52Smaya * We ignore the supposed restriction and do nothing. 75749f464c52Smaya */ 75759f464c52Smaya } 75769f464c52Smaya 75779f464c52Smaya if (flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH | 75789f464c52Smaya PIPE_CONTROL_STALL_AT_SCOREBOARD)) { 75799f464c52Smaya /* From the PIPE_CONTROL instruction table, bit 12 and bit 1: 75809f464c52Smaya * 75819f464c52Smaya * "This bit must be DISABLED for End-of-pipe (Read) fences, 75829f464c52Smaya * PS_DEPTH_COUNT or TIMESTAMP queries." 75839f464c52Smaya * 75849f464c52Smaya * TODO: Implement end-of-pipe checking. 75859f464c52Smaya */ 75869f464c52Smaya assert(!(post_sync_flags & (PIPE_CONTROL_WRITE_DEPTH_COUNT | 75879f464c52Smaya PIPE_CONTROL_WRITE_TIMESTAMP))); 75889f464c52Smaya } 75899f464c52Smaya 75907ec681f3Smrg if (GFX_VER < 11 && (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD)) { 75919f464c52Smaya /* From the PIPE_CONTROL instruction table, bit 1: 75929f464c52Smaya * 75939f464c52Smaya * "This bit is ignored if Depth Stall Enable is set. 75949f464c52Smaya * Further, the render cache is not flushed even if Write Cache 75959f464c52Smaya * Flush Enable bit is set." 75969f464c52Smaya * 75979f464c52Smaya * We assert that the caller doesn't do this combination, to try and 75989f464c52Smaya * prevent mistakes. It shouldn't hurt the GPU, though. 75999f464c52Smaya * 76007ec681f3Smrg * We skip this check on Gfx11+ as the "Stall at Pixel Scoreboard" 76019f464c52Smaya * and "Render Target Flush" combo is explicitly required for BTI 76029f464c52Smaya * update workarounds. 76039f464c52Smaya */ 76049f464c52Smaya assert(!(flags & (PIPE_CONTROL_DEPTH_STALL | 76059f464c52Smaya PIPE_CONTROL_RENDER_TARGET_FLUSH))); 76069f464c52Smaya } 76079f464c52Smaya 76089f464c52Smaya /* PIPE_CONTROL page workarounds ------------------------------------- */ 76099f464c52Smaya 76107ec681f3Smrg if (GFX_VER <= 8 && (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE)) { 76119f464c52Smaya /* From the PIPE_CONTROL page itself: 76129f464c52Smaya * 76139f464c52Smaya * "IVB, HSW, BDW 76149f464c52Smaya * Restriction: Pipe_control with CS-stall bit set must be issued 76159f464c52Smaya * before a pipe-control command that has the State Cache 76169f464c52Smaya * Invalidate bit set." 76179f464c52Smaya */ 76189f464c52Smaya flags |= PIPE_CONTROL_CS_STALL; 76199f464c52Smaya } 76209f464c52Smaya 76219f464c52Smaya if (flags & PIPE_CONTROL_FLUSH_LLC) { 76229f464c52Smaya /* From the PIPE_CONTROL instruction table, bit 26 (Flush LLC): 76239f464c52Smaya * 76249f464c52Smaya * "Project: ALL 76259f464c52Smaya * SW must always program Post-Sync Operation to "Write Immediate 76269f464c52Smaya * Data" when Flush LLC is set." 76279f464c52Smaya * 76289f464c52Smaya * For now, we just require the caller to do it. 76299f464c52Smaya */ 76309f464c52Smaya assert(flags & PIPE_CONTROL_WRITE_IMMEDIATE); 76319f464c52Smaya } 76329f464c52Smaya 76339f464c52Smaya /* "Post-Sync Operation" workarounds -------------------------------- */ 76349f464c52Smaya 76359f464c52Smaya /* Project: All / Argument: Global Snapshot Count Reset [19] 76369f464c52Smaya * 76379f464c52Smaya * "This bit must not be exercised on any product. 76389f464c52Smaya * Requires stall bit ([20] of DW1) set." 76399f464c52Smaya * 76409f464c52Smaya * We don't use this, so we just assert that it isn't used. The 76419f464c52Smaya * PIPE_CONTROL instruction page indicates that they intended this 76429f464c52Smaya * as a debug feature and don't think it is useful in production, 76439f464c52Smaya * but it may actually be usable, should we ever want to. 76449f464c52Smaya */ 76459f464c52Smaya assert((flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) == 0); 76469f464c52Smaya 76479f464c52Smaya if (flags & (PIPE_CONTROL_MEDIA_STATE_CLEAR | 76489f464c52Smaya PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE)) { 76499f464c52Smaya /* Project: All / Arguments: 76509f464c52Smaya * 76519f464c52Smaya * - Generic Media State Clear [16] 76529f464c52Smaya * - Indirect State Pointers Disable [16] 76539f464c52Smaya * 76549f464c52Smaya * "Requires stall bit ([20] of DW1) set." 76559f464c52Smaya * 76569f464c52Smaya * Also, the PIPE_CONTROL instruction table, bit 16 (Generic Media 76579f464c52Smaya * State Clear) says: 76589f464c52Smaya * 76599f464c52Smaya * "PIPECONTROL command with “Command Streamer Stall Enable” must be 76609f464c52Smaya * programmed prior to programming a PIPECONTROL command with "Media 76619f464c52Smaya * State Clear" set in GPGPU mode of operation" 76629f464c52Smaya * 76639f464c52Smaya * This is a subset of the earlier rule, so there's nothing to do. 76649f464c52Smaya */ 76659f464c52Smaya flags |= PIPE_CONTROL_CS_STALL; 76669f464c52Smaya } 76679f464c52Smaya 76689f464c52Smaya if (flags & PIPE_CONTROL_STORE_DATA_INDEX) { 76699f464c52Smaya /* Project: All / Argument: Store Data Index 76709f464c52Smaya * 76719f464c52Smaya * "Post-Sync Operation ([15:14] of DW1) must be set to something other 76729f464c52Smaya * than '0'." 76739f464c52Smaya * 76749f464c52Smaya * For now, we just assert that the caller does this. We might want to 76759f464c52Smaya * automatically add a write to the workaround BO... 76769f464c52Smaya */ 76779f464c52Smaya assert(non_lri_post_sync_flags != 0); 76789f464c52Smaya } 76799f464c52Smaya 76809f464c52Smaya if (flags & PIPE_CONTROL_SYNC_GFDT) { 76819f464c52Smaya /* Project: All / Argument: Sync GFDT 76829f464c52Smaya * 76839f464c52Smaya * "Post-Sync Operation ([15:14] of DW1) must be set to something other 76849f464c52Smaya * than '0' or 0x2520[13] must be set." 76859f464c52Smaya * 76869f464c52Smaya * For now, we just assert that the caller does this. 76879f464c52Smaya */ 76889f464c52Smaya assert(non_lri_post_sync_flags != 0); 76899f464c52Smaya } 76909f464c52Smaya 76919f464c52Smaya if (flags & PIPE_CONTROL_TLB_INVALIDATE) { 76929f464c52Smaya /* Project: IVB+ / Argument: TLB inv 76939f464c52Smaya * 76949f464c52Smaya * "Requires stall bit ([20] of DW1) set." 76959f464c52Smaya * 76969f464c52Smaya * Also, from the PIPE_CONTROL instruction table: 76979f464c52Smaya * 76989f464c52Smaya * "Project: SKL+ 76999f464c52Smaya * Post Sync Operation or CS stall must be set to ensure a TLB 77009f464c52Smaya * invalidation occurs. Otherwise no cycle will occur to the TLB 77019f464c52Smaya * cache to invalidate." 77029f464c52Smaya * 77039f464c52Smaya * This is not a subset of the earlier rule, so there's nothing to do. 77049f464c52Smaya */ 77059f464c52Smaya flags |= PIPE_CONTROL_CS_STALL; 77069f464c52Smaya } 77079f464c52Smaya 77087ec681f3Smrg if (GFX_VER == 9 && devinfo->gt == 4) { 77099f464c52Smaya /* TODO: The big Skylake GT4 post sync op workaround */ 77109f464c52Smaya } 77119f464c52Smaya 77129f464c52Smaya /* "GPGPU specific workarounds" (both post-sync and flush) ------------ */ 77139f464c52Smaya 77149f464c52Smaya if (IS_COMPUTE_PIPELINE(batch)) { 77157ec681f3Smrg if (GFX_VER >= 9 && (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE)) { 77169f464c52Smaya /* Project: SKL+ / Argument: Tex Invalidate 77179f464c52Smaya * "Requires stall bit ([20] of DW) set for all GPGPU Workloads." 77189f464c52Smaya */ 77199f464c52Smaya flags |= PIPE_CONTROL_CS_STALL; 77209f464c52Smaya } 77219f464c52Smaya 77227ec681f3Smrg if (GFX_VER == 8 && (post_sync_flags || 77239f464c52Smaya (flags & (PIPE_CONTROL_NOTIFY_ENABLE | 77249f464c52Smaya PIPE_CONTROL_DEPTH_STALL | 77259f464c52Smaya PIPE_CONTROL_RENDER_TARGET_FLUSH | 77269f464c52Smaya PIPE_CONTROL_DEPTH_CACHE_FLUSH | 77279f464c52Smaya PIPE_CONTROL_DATA_CACHE_FLUSH)))) { 77289f464c52Smaya /* Project: BDW / Arguments: 77299f464c52Smaya * 77309f464c52Smaya * - LRI Post Sync Operation [23] 77319f464c52Smaya * - Post Sync Op [15:14] 77329f464c52Smaya * - Notify En [8] 77339f464c52Smaya * - Depth Stall [13] 77349f464c52Smaya * - Render Target Cache Flush [12] 77359f464c52Smaya * - Depth Cache Flush [0] 77369f464c52Smaya * - DC Flush Enable [5] 77379f464c52Smaya * 77389f464c52Smaya * "Requires stall bit ([20] of DW) set for all GPGPU and Media 77399f464c52Smaya * Workloads." 77409f464c52Smaya */ 77419f464c52Smaya flags |= PIPE_CONTROL_CS_STALL; 77429f464c52Smaya 77439f464c52Smaya /* Also, from the PIPE_CONTROL instruction table, bit 20: 77449f464c52Smaya * 77459f464c52Smaya * "Project: BDW 77469f464c52Smaya * This bit must be always set when PIPE_CONTROL command is 77479f464c52Smaya * programmed by GPGPU and MEDIA workloads, except for the cases 77489f464c52Smaya * when only Read Only Cache Invalidation bits are set (State 77499f464c52Smaya * Cache Invalidation Enable, Instruction cache Invalidation 77509f464c52Smaya * Enable, Texture Cache Invalidation Enable, Constant Cache 77519f464c52Smaya * Invalidation Enable). This is to WA FFDOP CG issue, this WA 77529f464c52Smaya * need not implemented when FF_DOP_CG is disable via "Fixed 77539f464c52Smaya * Function DOP Clock Gate Disable" bit in RC_PSMI_CTRL register." 77549f464c52Smaya * 77559f464c52Smaya * It sounds like we could avoid CS stalls in some cases, but we 77569f464c52Smaya * don't currently bother. This list isn't exactly the list above, 77579f464c52Smaya * either... 77589f464c52Smaya */ 77599f464c52Smaya } 77609f464c52Smaya } 77619f464c52Smaya 77629f464c52Smaya /* "Stall" workarounds ---------------------------------------------- 77639f464c52Smaya * These have to come after the earlier ones because we may have added 77649f464c52Smaya * some additional CS stalls above. 77659f464c52Smaya */ 77669f464c52Smaya 77677ec681f3Smrg if (GFX_VER < 9 && (flags & PIPE_CONTROL_CS_STALL)) { 77689f464c52Smaya /* Project: PRE-SKL, VLV, CHV 77699f464c52Smaya * 77709f464c52Smaya * "[All Stepping][All SKUs]: 77719f464c52Smaya * 77729f464c52Smaya * One of the following must also be set: 77739f464c52Smaya * 77749f464c52Smaya * - Render Target Cache Flush Enable ([12] of DW1) 77759f464c52Smaya * - Depth Cache Flush Enable ([0] of DW1) 77769f464c52Smaya * - Stall at Pixel Scoreboard ([1] of DW1) 77779f464c52Smaya * - Depth Stall ([13] of DW1) 77789f464c52Smaya * - Post-Sync Operation ([13] of DW1) 77799f464c52Smaya * - DC Flush Enable ([5] of DW1)" 77809f464c52Smaya * 77819f464c52Smaya * If we don't already have one of those bits set, we choose to add 77829f464c52Smaya * "Stall at Pixel Scoreboard". Some of the other bits require a 77839f464c52Smaya * CS stall as a workaround (see above), which would send us into 77849f464c52Smaya * an infinite recursion of PIPE_CONTROLs. "Stall at Pixel Scoreboard" 77859f464c52Smaya * appears to be safe, so we choose that. 77869f464c52Smaya */ 77879f464c52Smaya const uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH | 77889f464c52Smaya PIPE_CONTROL_DEPTH_CACHE_FLUSH | 77899f464c52Smaya PIPE_CONTROL_WRITE_IMMEDIATE | 77909f464c52Smaya PIPE_CONTROL_WRITE_DEPTH_COUNT | 77919f464c52Smaya PIPE_CONTROL_WRITE_TIMESTAMP | 77929f464c52Smaya PIPE_CONTROL_STALL_AT_SCOREBOARD | 77939f464c52Smaya PIPE_CONTROL_DEPTH_STALL | 77949f464c52Smaya PIPE_CONTROL_DATA_CACHE_FLUSH; 77959f464c52Smaya if (!(flags & wa_bits)) 77969f464c52Smaya flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; 77979f464c52Smaya } 77989f464c52Smaya 77997ec681f3Smrg if (GFX_VER >= 12 && (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH)) { 78007ec681f3Smrg /* Wa_1409600907: 78017ec681f3Smrg * 78027ec681f3Smrg * "PIPE_CONTROL with Depth Stall Enable bit must be set 78037ec681f3Smrg * with any PIPE_CONTROL with Depth Flush Enable bit set. 78047ec681f3Smrg */ 78057ec681f3Smrg flags |= PIPE_CONTROL_DEPTH_STALL; 78067ec681f3Smrg } 78077ec681f3Smrg 78089f464c52Smaya /* Emit --------------------------------------------------------------- */ 78099f464c52Smaya 78107ec681f3Smrg if (INTEL_DEBUG(DEBUG_PIPE_CONTROL)) { 78117ec681f3Smrg fprintf(stderr, 78127ec681f3Smrg " PC [%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%"PRIx64"]: %s\n", 78137ec681f3Smrg (flags & PIPE_CONTROL_FLUSH_ENABLE) ? "PipeCon " : "", 78147ec681f3Smrg (flags & PIPE_CONTROL_CS_STALL) ? "CS " : "", 78157ec681f3Smrg (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD) ? "Scoreboard " : "", 78167ec681f3Smrg (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) ? "VF " : "", 78177ec681f3Smrg (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) ? "RT " : "", 78187ec681f3Smrg (flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE) ? "Const " : "", 78197ec681f3Smrg (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE) ? "TC " : "", 78207ec681f3Smrg (flags & PIPE_CONTROL_DATA_CACHE_FLUSH) ? "DC " : "", 78217ec681f3Smrg (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH) ? "ZFlush " : "", 78227ec681f3Smrg (flags & PIPE_CONTROL_TILE_CACHE_FLUSH) ? "Tile " : "", 78237ec681f3Smrg (flags & PIPE_CONTROL_DEPTH_STALL) ? "ZStall " : "", 78247ec681f3Smrg (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE) ? "State " : "", 78257ec681f3Smrg (flags & PIPE_CONTROL_TLB_INVALIDATE) ? "TLB " : "", 78267ec681f3Smrg (flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE) ? "Inst " : "", 78277ec681f3Smrg (flags & PIPE_CONTROL_MEDIA_STATE_CLEAR) ? "MediaClear " : "", 78287ec681f3Smrg (flags & PIPE_CONTROL_NOTIFY_ENABLE) ? "Notify " : "", 78297ec681f3Smrg (flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) ? 78307ec681f3Smrg "SnapRes" : "", 78317ec681f3Smrg (flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE) ? 78327ec681f3Smrg "ISPDis" : "", 78337ec681f3Smrg (flags & PIPE_CONTROL_WRITE_IMMEDIATE) ? "WriteImm " : "", 78347ec681f3Smrg (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT) ? "WriteZCount " : "", 78357ec681f3Smrg (flags & PIPE_CONTROL_WRITE_TIMESTAMP) ? "WriteTimestamp " : "", 78367ec681f3Smrg (flags & PIPE_CONTROL_FLUSH_HDC) ? "HDC " : "", 78377ec681f3Smrg imm, reason); 78387ec681f3Smrg } 78397ec681f3Smrg 78407ec681f3Smrg batch_mark_sync_for_pipe_control(batch, flags); 78417ec681f3Smrg iris_batch_sync_region_start(batch); 78427ec681f3Smrg 78439f464c52Smaya iris_emit_cmd(batch, GENX(PIPE_CONTROL), pc) { 78447ec681f3Smrg#if GFX_VER >= 12 78457ec681f3Smrg pc.TileCacheFlushEnable = flags & PIPE_CONTROL_TILE_CACHE_FLUSH; 78467ec681f3Smrg#endif 78477ec681f3Smrg#if GFX_VER >= 11 78487ec681f3Smrg pc.HDCPipelineFlushEnable = flags & PIPE_CONTROL_FLUSH_HDC; 78497ec681f3Smrg#endif 78509f464c52Smaya pc.LRIPostSyncOperation = NoLRIOperation; 78519f464c52Smaya pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE; 78529f464c52Smaya pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH; 78539f464c52Smaya pc.StoreDataIndex = 0; 78549f464c52Smaya pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL; 78559f464c52Smaya pc.GlobalSnapshotCountReset = 78569f464c52Smaya flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET; 78579f464c52Smaya pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE; 78589f464c52Smaya pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR; 78599f464c52Smaya pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD; 78609f464c52Smaya pc.RenderTargetCacheFlushEnable = 78619f464c52Smaya flags & PIPE_CONTROL_RENDER_TARGET_FLUSH; 78629f464c52Smaya pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH; 78639f464c52Smaya pc.StateCacheInvalidationEnable = 78649f464c52Smaya flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE; 78657ec681f3Smrg#if GFX_VER >= 12 78667ec681f3Smrg /* Invalidates the L3 cache part in which index & vertex data is loaded 78677ec681f3Smrg * when VERTEX_BUFFER_STATE::L3BypassDisable is set. 78687ec681f3Smrg */ 78697ec681f3Smrg pc.L3ReadOnlyCacheInvalidationEnable = 78707ec681f3Smrg flags & PIPE_CONTROL_VF_CACHE_INVALIDATE; 78717ec681f3Smrg#endif 78729f464c52Smaya pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE; 78739f464c52Smaya pc.ConstantCacheInvalidationEnable = 78749f464c52Smaya flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE; 78759f464c52Smaya pc.PostSyncOperation = flags_to_post_sync_op(flags); 78769f464c52Smaya pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL; 78779f464c52Smaya pc.InstructionCacheInvalidateEnable = 78789f464c52Smaya flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE; 78799f464c52Smaya pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE; 78809f464c52Smaya pc.IndirectStatePointersDisable = 78819f464c52Smaya flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE; 78829f464c52Smaya pc.TextureCacheInvalidationEnable = 78839f464c52Smaya flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 78847ec681f3Smrg pc.Address = rw_bo(bo, offset, IRIS_DOMAIN_OTHER_WRITE); 78859f464c52Smaya pc.ImmediateData = imm; 78869f464c52Smaya } 78879f464c52Smaya 78887ec681f3Smrg iris_batch_sync_region_end(batch); 78899f464c52Smaya} 78909f464c52Smaya 78917ec681f3Smrg#if GFX_VER == 9 78929f464c52Smaya/** 78937ec681f3Smrg * Preemption on Gfx9 has to be enabled or disabled in various cases. 78949f464c52Smaya * 78959f464c52Smaya * See these workarounds for preemption: 78969f464c52Smaya * - WaDisableMidObjectPreemptionForGSLineStripAdj 78979f464c52Smaya * - WaDisableMidObjectPreemptionForTrifanOrPolygon 78989f464c52Smaya * - WaDisableMidObjectPreemptionForLineLoop 78999f464c52Smaya * - WA#0798 79009f464c52Smaya * 79017ec681f3Smrg * We don't put this in the vtable because it's only used on Gfx9. 79029f464c52Smaya */ 79039f464c52Smayavoid 79047ec681f3Smrggfx9_toggle_preemption(struct iris_context *ice, 79059f464c52Smaya struct iris_batch *batch, 79069f464c52Smaya const struct pipe_draw_info *draw) 79079f464c52Smaya{ 79089f464c52Smaya struct iris_genx_state *genx = ice->state.genx; 79099f464c52Smaya bool object_preemption = true; 79109f464c52Smaya 79119f464c52Smaya /* WaDisableMidObjectPreemptionForGSLineStripAdj 79129f464c52Smaya * 79139f464c52Smaya * "WA: Disable mid-draw preemption when draw-call is a linestrip_adj 79149f464c52Smaya * and GS is enabled." 79159f464c52Smaya */ 79169f464c52Smaya if (draw->mode == PIPE_PRIM_LINE_STRIP_ADJACENCY && 79179f464c52Smaya ice->shaders.prog[MESA_SHADER_GEOMETRY]) 79189f464c52Smaya object_preemption = false; 79199f464c52Smaya 79209f464c52Smaya /* WaDisableMidObjectPreemptionForTrifanOrPolygon 79219f464c52Smaya * 79229f464c52Smaya * "TriFan miscompare in Execlist Preemption test. Cut index that is 79239f464c52Smaya * on a previous context. End the previous, the resume another context 79249f464c52Smaya * with a tri-fan or polygon, and the vertex count is corrupted. If we 79259f464c52Smaya * prempt again we will cause corruption. 79269f464c52Smaya * 79279f464c52Smaya * WA: Disable mid-draw preemption when draw-call has a tri-fan." 79289f464c52Smaya */ 79299f464c52Smaya if (draw->mode == PIPE_PRIM_TRIANGLE_FAN) 79309f464c52Smaya object_preemption = false; 79319f464c52Smaya 79329f464c52Smaya /* WaDisableMidObjectPreemptionForLineLoop 79339f464c52Smaya * 79349f464c52Smaya * "VF Stats Counters Missing a vertex when preemption enabled. 79359f464c52Smaya * 79369f464c52Smaya * WA: Disable mid-draw preemption when the draw uses a lineloop 79379f464c52Smaya * topology." 79389f464c52Smaya */ 79399f464c52Smaya if (draw->mode == PIPE_PRIM_LINE_LOOP) 79409f464c52Smaya object_preemption = false; 79419f464c52Smaya 79429f464c52Smaya /* WA#0798 79439f464c52Smaya * 79449f464c52Smaya * "VF is corrupting GAFS data when preempted on an instance boundary 79459f464c52Smaya * and replayed with instancing enabled. 79469f464c52Smaya * 79479f464c52Smaya * WA: Disable preemption when using instanceing." 79489f464c52Smaya */ 79499f464c52Smaya if (draw->instance_count > 1) 79509f464c52Smaya object_preemption = false; 79519f464c52Smaya 79529f464c52Smaya if (genx->object_preemption != object_preemption) { 79539f464c52Smaya iris_enable_obj_preemption(batch, object_preemption); 79549f464c52Smaya genx->object_preemption = object_preemption; 79559f464c52Smaya } 79569f464c52Smaya} 79579f464c52Smaya#endif 79589f464c52Smaya 79597ec681f3Smrgstatic void 79607ec681f3Smrgiris_lost_genx_state(struct iris_context *ice, struct iris_batch *batch) 79617ec681f3Smrg{ 79627ec681f3Smrg struct iris_genx_state *genx = ice->state.genx; 79637ec681f3Smrg 79647ec681f3Smrg#if GFX_VERx10 == 120 79657ec681f3Smrg genx->depth_reg_mode = IRIS_DEPTH_REG_MODE_UNKNOWN; 79667ec681f3Smrg#endif 79677ec681f3Smrg 79687ec681f3Smrg memset(genx->last_index_buffer, 0, sizeof(genx->last_index_buffer)); 79697ec681f3Smrg} 79707ec681f3Smrg 79717ec681f3Smrgstatic void 79727ec681f3Smrgiris_emit_mi_report_perf_count(struct iris_batch *batch, 79737ec681f3Smrg struct iris_bo *bo, 79747ec681f3Smrg uint32_t offset_in_bytes, 79757ec681f3Smrg uint32_t report_id) 79767ec681f3Smrg{ 79777ec681f3Smrg iris_batch_sync_region_start(batch); 79787ec681f3Smrg iris_emit_cmd(batch, GENX(MI_REPORT_PERF_COUNT), mi_rpc) { 79797ec681f3Smrg mi_rpc.MemoryAddress = rw_bo(bo, offset_in_bytes, 79807ec681f3Smrg IRIS_DOMAIN_OTHER_WRITE); 79817ec681f3Smrg mi_rpc.ReportID = report_id; 79827ec681f3Smrg } 79837ec681f3Smrg iris_batch_sync_region_end(batch); 79847ec681f3Smrg} 79857ec681f3Smrg 79867ec681f3Smrg/** 79877ec681f3Smrg * Update the pixel hashing modes that determine the balancing of PS threads 79887ec681f3Smrg * across subslices and slices. 79897ec681f3Smrg * 79907ec681f3Smrg * \param width Width bound of the rendering area (already scaled down if \p 79917ec681f3Smrg * scale is greater than 1). 79927ec681f3Smrg * \param height Height bound of the rendering area (already scaled down if \p 79937ec681f3Smrg * scale is greater than 1). 79947ec681f3Smrg * \param scale The number of framebuffer samples that could potentially be 79957ec681f3Smrg * affected by an individual channel of the PS thread. This is 79967ec681f3Smrg * typically one for single-sampled rendering, but for operations 79977ec681f3Smrg * like CCS resolves and fast clears a single PS invocation may 79987ec681f3Smrg * update a huge number of pixels, in which case a finer 79997ec681f3Smrg * balancing is desirable in order to maximally utilize the 80007ec681f3Smrg * bandwidth available. UINT_MAX can be used as shorthand for 80017ec681f3Smrg * "finest hashing mode available". 80027ec681f3Smrg */ 80037ec681f3Smrgvoid 80047ec681f3SmrggenX(emit_hashing_mode)(struct iris_context *ice, struct iris_batch *batch, 80057ec681f3Smrg unsigned width, unsigned height, unsigned scale) 80067ec681f3Smrg{ 80077ec681f3Smrg#if GFX_VER == 9 80087ec681f3Smrg const struct intel_device_info *devinfo = &batch->screen->devinfo; 80097ec681f3Smrg const unsigned slice_hashing[] = { 80107ec681f3Smrg /* Because all Gfx9 platforms with more than one slice require 80117ec681f3Smrg * three-way subslice hashing, a single "normal" 16x16 slice hashing 80127ec681f3Smrg * block is guaranteed to suffer from substantial imbalance, with one 80137ec681f3Smrg * subslice receiving twice as much work as the other two in the 80147ec681f3Smrg * slice. 80157ec681f3Smrg * 80167ec681f3Smrg * The performance impact of that would be particularly severe when 80177ec681f3Smrg * three-way hashing is also in use for slice balancing (which is the 80187ec681f3Smrg * case for all Gfx9 GT4 platforms), because one of the slices 80197ec681f3Smrg * receives one every three 16x16 blocks in either direction, which 80207ec681f3Smrg * is roughly the periodicity of the underlying subslice imbalance 80217ec681f3Smrg * pattern ("roughly" because in reality the hardware's 80227ec681f3Smrg * implementation of three-way hashing doesn't do exact modulo 3 80237ec681f3Smrg * arithmetic, which somewhat decreases the magnitude of this effect 80247ec681f3Smrg * in practice). This leads to a systematic subslice imbalance 80257ec681f3Smrg * within that slice regardless of the size of the primitive. The 80267ec681f3Smrg * 32x32 hashing mode guarantees that the subslice imbalance within a 80277ec681f3Smrg * single slice hashing block is minimal, largely eliminating this 80287ec681f3Smrg * effect. 80297ec681f3Smrg */ 80307ec681f3Smrg _32x32, 80317ec681f3Smrg /* Finest slice hashing mode available. */ 80327ec681f3Smrg NORMAL 80337ec681f3Smrg }; 80347ec681f3Smrg const unsigned subslice_hashing[] = { 80357ec681f3Smrg /* 16x16 would provide a slight cache locality benefit especially 80367ec681f3Smrg * visible in the sampler L1 cache efficiency of low-bandwidth 80377ec681f3Smrg * non-LLC platforms, but it comes at the cost of greater subslice 80387ec681f3Smrg * imbalance for primitives of dimensions approximately intermediate 80397ec681f3Smrg * between 16x4 and 16x16. 80407ec681f3Smrg */ 80417ec681f3Smrg _16x4, 80427ec681f3Smrg /* Finest subslice hashing mode available. */ 80437ec681f3Smrg _8x4 80447ec681f3Smrg }; 80457ec681f3Smrg /* Dimensions of the smallest hashing block of a given hashing mode. If 80467ec681f3Smrg * the rendering area is smaller than this there can't possibly be any 80477ec681f3Smrg * benefit from switching to this mode, so we optimize out the 80487ec681f3Smrg * transition. 80497ec681f3Smrg */ 80507ec681f3Smrg const unsigned min_size[][2] = { 80517ec681f3Smrg { 16, 4 }, 80527ec681f3Smrg { 8, 4 } 80537ec681f3Smrg }; 80547ec681f3Smrg const unsigned idx = scale > 1; 80557ec681f3Smrg 80567ec681f3Smrg if (width > min_size[idx][0] || height > min_size[idx][1]) { 80577ec681f3Smrg iris_emit_raw_pipe_control(batch, 80587ec681f3Smrg "workaround: CS stall before GT_MODE LRI", 80597ec681f3Smrg PIPE_CONTROL_STALL_AT_SCOREBOARD | 80607ec681f3Smrg PIPE_CONTROL_CS_STALL, 80617ec681f3Smrg NULL, 0, 0); 80627ec681f3Smrg 80637ec681f3Smrg iris_emit_reg(batch, GENX(GT_MODE), reg) { 80647ec681f3Smrg reg.SliceHashing = (devinfo->num_slices > 1 ? slice_hashing[idx] : 0); 80657ec681f3Smrg reg.SliceHashingMask = (devinfo->num_slices > 1 ? -1 : 0); 80667ec681f3Smrg reg.SubsliceHashing = subslice_hashing[idx]; 80677ec681f3Smrg reg.SubsliceHashingMask = -1; 80687ec681f3Smrg }; 80697ec681f3Smrg 80707ec681f3Smrg ice->state.current_hash_scale = scale; 80717ec681f3Smrg } 80727ec681f3Smrg#endif 80737ec681f3Smrg} 80747ec681f3Smrg 80757ec681f3Smrgstatic void 80767ec681f3Smrgiris_set_frontend_noop(struct pipe_context *ctx, bool enable) 80777ec681f3Smrg{ 80787ec681f3Smrg struct iris_context *ice = (struct iris_context *) ctx; 80797ec681f3Smrg 80807ec681f3Smrg if (iris_batch_prepare_noop(&ice->batches[IRIS_BATCH_RENDER], enable)) { 80817ec681f3Smrg ice->state.dirty |= IRIS_ALL_DIRTY_FOR_RENDER; 80827ec681f3Smrg ice->state.stage_dirty |= IRIS_ALL_STAGE_DIRTY_FOR_RENDER; 80837ec681f3Smrg } 80847ec681f3Smrg 80857ec681f3Smrg if (iris_batch_prepare_noop(&ice->batches[IRIS_BATCH_COMPUTE], enable)) { 80867ec681f3Smrg ice->state.dirty |= IRIS_ALL_DIRTY_FOR_COMPUTE; 80877ec681f3Smrg ice->state.stage_dirty |= IRIS_ALL_STAGE_DIRTY_FOR_COMPUTE; 80887ec681f3Smrg } 80897ec681f3Smrg} 80907ec681f3Smrg 80917ec681f3Smrgvoid 80927ec681f3SmrggenX(init_screen_state)(struct iris_screen *screen) 80937ec681f3Smrg{ 80947ec681f3Smrg assert(screen->devinfo.verx10 == GFX_VERx10); 80957ec681f3Smrg screen->vtbl.destroy_state = iris_destroy_state; 80967ec681f3Smrg screen->vtbl.init_render_context = iris_init_render_context; 80977ec681f3Smrg screen->vtbl.init_compute_context = iris_init_compute_context; 80987ec681f3Smrg screen->vtbl.upload_render_state = iris_upload_render_state; 80997ec681f3Smrg screen->vtbl.update_surface_base_address = iris_update_surface_base_address; 81007ec681f3Smrg screen->vtbl.upload_compute_state = iris_upload_compute_state; 81017ec681f3Smrg screen->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control; 81027ec681f3Smrg screen->vtbl.emit_mi_report_perf_count = iris_emit_mi_report_perf_count; 81037ec681f3Smrg screen->vtbl.rebind_buffer = iris_rebind_buffer; 81047ec681f3Smrg screen->vtbl.load_register_reg32 = iris_load_register_reg32; 81057ec681f3Smrg screen->vtbl.load_register_reg64 = iris_load_register_reg64; 81067ec681f3Smrg screen->vtbl.load_register_imm32 = iris_load_register_imm32; 81077ec681f3Smrg screen->vtbl.load_register_imm64 = iris_load_register_imm64; 81087ec681f3Smrg screen->vtbl.load_register_mem32 = iris_load_register_mem32; 81097ec681f3Smrg screen->vtbl.load_register_mem64 = iris_load_register_mem64; 81107ec681f3Smrg screen->vtbl.store_register_mem32 = iris_store_register_mem32; 81117ec681f3Smrg screen->vtbl.store_register_mem64 = iris_store_register_mem64; 81127ec681f3Smrg screen->vtbl.store_data_imm32 = iris_store_data_imm32; 81137ec681f3Smrg screen->vtbl.store_data_imm64 = iris_store_data_imm64; 81147ec681f3Smrg screen->vtbl.copy_mem_mem = iris_copy_mem_mem; 81157ec681f3Smrg screen->vtbl.derived_program_state_size = iris_derived_program_state_size; 81167ec681f3Smrg screen->vtbl.store_derived_program_state = iris_store_derived_program_state; 81177ec681f3Smrg screen->vtbl.create_so_decl_list = iris_create_so_decl_list; 81187ec681f3Smrg screen->vtbl.populate_vs_key = iris_populate_vs_key; 81197ec681f3Smrg screen->vtbl.populate_tcs_key = iris_populate_tcs_key; 81207ec681f3Smrg screen->vtbl.populate_tes_key = iris_populate_tes_key; 81217ec681f3Smrg screen->vtbl.populate_gs_key = iris_populate_gs_key; 81227ec681f3Smrg screen->vtbl.populate_fs_key = iris_populate_fs_key; 81237ec681f3Smrg screen->vtbl.populate_cs_key = iris_populate_cs_key; 81247ec681f3Smrg screen->vtbl.lost_genx_state = iris_lost_genx_state; 81257ec681f3Smrg} 81267ec681f3Smrg 81279f464c52Smayavoid 81289f464c52SmayagenX(init_state)(struct iris_context *ice) 81299f464c52Smaya{ 81309f464c52Smaya struct pipe_context *ctx = &ice->ctx; 81319f464c52Smaya struct iris_screen *screen = (struct iris_screen *)ctx->screen; 81329f464c52Smaya 81339f464c52Smaya ctx->create_blend_state = iris_create_blend_state; 81349f464c52Smaya ctx->create_depth_stencil_alpha_state = iris_create_zsa_state; 81359f464c52Smaya ctx->create_rasterizer_state = iris_create_rasterizer_state; 81369f464c52Smaya ctx->create_sampler_state = iris_create_sampler_state; 81379f464c52Smaya ctx->create_sampler_view = iris_create_sampler_view; 81389f464c52Smaya ctx->create_surface = iris_create_surface; 81399f464c52Smaya ctx->create_vertex_elements_state = iris_create_vertex_elements; 81409f464c52Smaya ctx->bind_blend_state = iris_bind_blend_state; 81419f464c52Smaya ctx->bind_depth_stencil_alpha_state = iris_bind_zsa_state; 81429f464c52Smaya ctx->bind_sampler_states = iris_bind_sampler_states; 81439f464c52Smaya ctx->bind_rasterizer_state = iris_bind_rasterizer_state; 81449f464c52Smaya ctx->bind_vertex_elements_state = iris_bind_vertex_elements_state; 81459f464c52Smaya ctx->delete_blend_state = iris_delete_state; 81469f464c52Smaya ctx->delete_depth_stencil_alpha_state = iris_delete_state; 81479f464c52Smaya ctx->delete_rasterizer_state = iris_delete_state; 81489f464c52Smaya ctx->delete_sampler_state = iris_delete_state; 81499f464c52Smaya ctx->delete_vertex_elements_state = iris_delete_state; 81509f464c52Smaya ctx->set_blend_color = iris_set_blend_color; 81519f464c52Smaya ctx->set_clip_state = iris_set_clip_state; 81529f464c52Smaya ctx->set_constant_buffer = iris_set_constant_buffer; 81539f464c52Smaya ctx->set_shader_buffers = iris_set_shader_buffers; 81549f464c52Smaya ctx->set_shader_images = iris_set_shader_images; 81559f464c52Smaya ctx->set_sampler_views = iris_set_sampler_views; 81567ec681f3Smrg ctx->set_compute_resources = iris_set_compute_resources; 81577ec681f3Smrg ctx->set_global_binding = iris_set_global_binding; 81589f464c52Smaya ctx->set_tess_state = iris_set_tess_state; 81597ec681f3Smrg ctx->set_patch_vertices = iris_set_patch_vertices; 81609f464c52Smaya ctx->set_framebuffer_state = iris_set_framebuffer_state; 81619f464c52Smaya ctx->set_polygon_stipple = iris_set_polygon_stipple; 81629f464c52Smaya ctx->set_sample_mask = iris_set_sample_mask; 81639f464c52Smaya ctx->set_scissor_states = iris_set_scissor_states; 81649f464c52Smaya ctx->set_stencil_ref = iris_set_stencil_ref; 81659f464c52Smaya ctx->set_vertex_buffers = iris_set_vertex_buffers; 81669f464c52Smaya ctx->set_viewport_states = iris_set_viewport_states; 81679f464c52Smaya ctx->sampler_view_destroy = iris_sampler_view_destroy; 81689f464c52Smaya ctx->surface_destroy = iris_surface_destroy; 81699f464c52Smaya ctx->draw_vbo = iris_draw_vbo; 81709f464c52Smaya ctx->launch_grid = iris_launch_grid; 81719f464c52Smaya ctx->create_stream_output_target = iris_create_stream_output_target; 81729f464c52Smaya ctx->stream_output_target_destroy = iris_stream_output_target_destroy; 81739f464c52Smaya ctx->set_stream_output_targets = iris_set_stream_output_targets; 81747ec681f3Smrg ctx->set_frontend_noop = iris_set_frontend_noop; 81759f464c52Smaya 81769f464c52Smaya ice->state.dirty = ~0ull; 81777ec681f3Smrg ice->state.stage_dirty = ~0ull; 81789f464c52Smaya 81799f464c52Smaya ice->state.statistics_counters_enabled = true; 81809f464c52Smaya 81819f464c52Smaya ice->state.sample_mask = 0xffff; 81829f464c52Smaya ice->state.num_viewports = 1; 81837ec681f3Smrg ice->state.prim_mode = PIPE_PRIM_MAX; 81849f464c52Smaya ice->state.genx = calloc(1, sizeof(struct iris_genx_state)); 81857ec681f3Smrg ice->draw.derived_params.drawid = -1; 81869f464c52Smaya 81879f464c52Smaya /* Make a 1x1x1 null surface for unbound textures */ 81889f464c52Smaya void *null_surf_map = 81899f464c52Smaya upload_state(ice->state.surface_uploader, &ice->state.unbound_tex, 81909f464c52Smaya 4 * GENX(RENDER_SURFACE_STATE_length), 64); 81917ec681f3Smrg isl_null_fill_state(&screen->isl_dev, null_surf_map, 81927ec681f3Smrg .size = isl_extent3d(1, 1, 1)); 81939f464c52Smaya ice->state.unbound_tex.offset += 81949f464c52Smaya iris_bo_offset_from_base_address(iris_resource_bo(ice->state.unbound_tex.res)); 81959f464c52Smaya 81969f464c52Smaya /* Default all scissor rectangles to be empty regions. */ 81979f464c52Smaya for (int i = 0; i < IRIS_MAX_VIEWPORTS; i++) { 81989f464c52Smaya ice->state.scissors[i] = (struct pipe_scissor_state) { 81999f464c52Smaya .minx = 1, .maxx = 0, .miny = 1, .maxy = 0, 82009f464c52Smaya }; 82019f464c52Smaya } 82029f464c52Smaya} 8203