1/* 2 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27#ifndef FREEDRENO_UTIL_H_ 28#define FREEDRENO_UTIL_H_ 29 30#include "drm/freedreno_drmif.h" 31#include "drm/freedreno_ringbuffer.h" 32 33#include "pipe/p_format.h" 34#include "pipe/p_state.h" 35#include "util/u_debug.h" 36#include "util/u_math.h" 37#include "util/u_half.h" 38#include "util/u_dynarray.h" 39#include "util/u_pack_color.h" 40 41#include "disasm.h" 42#include "adreno_common.xml.h" 43#include "adreno_pm4.xml.h" 44 45enum adreno_rb_depth_format fd_pipe2depth(enum pipe_format format); 46enum pc_di_index_size fd_pipe2index(enum pipe_format format); 47enum pipe_format fd_gmem_restore_format(enum pipe_format format); 48enum adreno_rb_blend_factor fd_blend_factor(unsigned factor); 49enum adreno_pa_su_sc_draw fd_polygon_mode(unsigned mode); 50enum adreno_stencil_op fd_stencil_op(unsigned op); 51 52#define A3XX_MAX_MIP_LEVELS 14 53/* TBD if it is same on a2xx, but for now: */ 54#define MAX_MIP_LEVELS A3XX_MAX_MIP_LEVELS 55 56#define A2XX_MAX_RENDER_TARGETS 1 57#define A3XX_MAX_RENDER_TARGETS 4 58#define A4XX_MAX_RENDER_TARGETS 8 59#define A5XX_MAX_RENDER_TARGETS 8 60#define A6XX_MAX_RENDER_TARGETS 8 61 62#define MAX_RENDER_TARGETS A6XX_MAX_RENDER_TARGETS 63 64#define FD_DBG_MSGS 0x0001 65#define FD_DBG_DISASM 0x0002 66#define FD_DBG_DCLEAR 0x0004 67#define FD_DBG_DDRAW 0x0008 68#define FD_DBG_NOSCIS 0x0010 69#define FD_DBG_DIRECT 0x0020 70#define FD_DBG_NOBYPASS 0x0040 71#define FD_DBG_FRAGHALF 0x0080 72#define FD_DBG_NOBIN 0x0100 73/* unused 0x0200 */ 74#define FD_DBG_GLSL120 0x0400 75#define FD_DBG_SHADERDB 0x0800 76#define FD_DBG_FLUSH 0x1000 77#define FD_DBG_DEQP 0x2000 78#define FD_DBG_INORDER 0x4000 79#define FD_DBG_BSTAT 0x8000 80#define FD_DBG_NOGROW 0x10000 81#define FD_DBG_LRZ 0x20000 82#define FD_DBG_NOINDR 0x40000 83#define FD_DBG_NOBLIT 0x80000 84#define FD_DBG_HIPRIO 0x100000 85#define FD_DBG_TTILE 0x200000 86#define FD_DBG_PERFC 0x400000 87#define FD_DBG_SOFTPIN 0x800000 88#define FD_DBG_UBWC 0x1000000 89extern int fd_mesa_debug; 90extern bool fd_binning_enabled; 91 92#define DBG(fmt, ...) \ 93 do { if (fd_mesa_debug & FD_DBG_MSGS) \ 94 debug_printf("%s:%d: "fmt "\n", \ 95 __FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0) 96 97/* for conditionally setting boolean flag(s): */ 98#define COND(bool, val) ((bool) ? (val) : 0) 99 100#define CP_REG(reg) ((0x4 << 16) | ((unsigned int)((reg) - (0x2000)))) 101 102static inline uint32_t DRAW(enum pc_di_primtype prim_type, 103 enum pc_di_src_sel source_select, enum pc_di_index_size index_size, 104 enum pc_di_vis_cull_mode vis_cull_mode, 105 uint8_t instances) 106{ 107 return (prim_type << 0) | 108 (source_select << 6) | 109 ((index_size & 1) << 11) | 110 ((index_size >> 1) << 13) | 111 (vis_cull_mode << 9) | 112 (1 << 14) | 113 (instances << 24); 114} 115 116static inline uint32_t DRAW_A20X(enum pc_di_primtype prim_type, 117 enum pc_di_face_cull_sel faceness_cull_select, 118 enum pc_di_src_sel source_select, enum pc_di_index_size index_size, 119 bool pre_fetch_cull_enable, 120 bool grp_cull_enable, 121 uint16_t count) 122{ 123 return (prim_type << 0) | 124 (source_select << 6) | 125 (faceness_cull_select << 8) | 126 ((index_size & 1) << 11) | 127 ((index_size >> 1) << 13) | 128 (pre_fetch_cull_enable << 14) | 129 (grp_cull_enable << 15) | 130 (count << 16); 131} 132 133/* for tracking cmdstream positions that need to be patched: */ 134struct fd_cs_patch { 135 uint32_t *cs; 136 uint32_t val; 137}; 138#define fd_patch_num_elements(buf) ((buf)->size / sizeof(struct fd_cs_patch)) 139#define fd_patch_element(buf, i) util_dynarray_element(buf, struct fd_cs_patch, i) 140 141static inline enum pipe_format 142pipe_surface_format(struct pipe_surface *psurf) 143{ 144 if (!psurf) 145 return PIPE_FORMAT_NONE; 146 return psurf->format; 147} 148 149static inline bool 150fd_surface_half_precision(const struct pipe_surface *psurf) 151{ 152 enum pipe_format format; 153 154 if (!psurf) 155 return true; 156 157 format = psurf->format; 158 159 /* colors are provided in consts, which go through cov.f32f16, which will 160 * break these values 161 */ 162 if (util_format_is_pure_integer(format)) 163 return false; 164 165 /* avoid losing precision on 32-bit float formats */ 166 if (util_format_is_float(format) && 167 util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) == 32) 168 return false; 169 170 return true; 171} 172 173static inline unsigned 174fd_sampler_first_level(const struct pipe_sampler_view *view) 175{ 176 if (view->target == PIPE_BUFFER) 177 return 0; 178 return view->u.tex.first_level; 179} 180 181static inline unsigned 182fd_sampler_last_level(const struct pipe_sampler_view *view) 183{ 184 if (view->target == PIPE_BUFFER) 185 return 0; 186 return view->u.tex.last_level; 187} 188 189static inline bool 190fd_half_precision(struct pipe_framebuffer_state *pfb) 191{ 192 unsigned i; 193 194 for (i = 0; i < pfb->nr_cbufs; i++) 195 if (!fd_surface_half_precision(pfb->cbufs[i])) 196 return false; 197 198 return true; 199} 200 201/* Note sure if this is same on all gens, but seems to be same on the later 202 * gen's 203 */ 204static inline unsigned 205fd_calc_guardband(unsigned x) 206{ 207 float l = log2(x); 208 if (l <= 8) 209 return 511; 210 return 511 - ((l - 8) * 65); 211} 212 213#define LOG_DWORDS 0 214 215static inline void emit_marker(struct fd_ringbuffer *ring, int scratch_idx); 216 217static inline void 218OUT_RING(struct fd_ringbuffer *ring, uint32_t data) 219{ 220 if (LOG_DWORDS) { 221 DBG("ring[%p]: OUT_RING %04x: %08x", ring, 222 (uint32_t)(ring->cur - ring->start), data); 223 } 224 fd_ringbuffer_emit(ring, data); 225} 226 227/* like OUT_RING() but appends a cmdstream patch point to 'buf' */ 228static inline void 229OUT_RINGP(struct fd_ringbuffer *ring, uint32_t data, 230 struct util_dynarray *buf) 231{ 232 if (LOG_DWORDS) { 233 DBG("ring[%p]: OUT_RINGP %04x: %08x", ring, 234 (uint32_t)(ring->cur - ring->start), data); 235 } 236 util_dynarray_append(buf, struct fd_cs_patch, ((struct fd_cs_patch){ 237 .cs = ring->cur++, 238 .val = data, 239 })); 240} 241 242/* 243 * NOTE: OUT_RELOC*() is 2 dwords (64b) on a5xx+ 244 */ 245 246static inline void 247__out_reloc(struct fd_ringbuffer *ring, struct fd_bo *bo, 248 uint32_t offset, uint64_t or, int32_t shift, uint32_t flags) 249{ 250 if (LOG_DWORDS) { 251 DBG("ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring, 252 (uint32_t)(ring->cur - ring->start), bo, offset, shift); 253 } 254 debug_assert(offset < fd_bo_size(bo)); 255 fd_ringbuffer_reloc(ring, &(struct fd_reloc){ 256 .bo = bo, 257 .flags = flags, 258 .offset = offset, 259 .or = or, 260 .shift = shift, 261 .orhi = or >> 32, 262 }); 263} 264 265static inline void 266OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo, 267 uint32_t offset, uint64_t or, int32_t shift) 268{ 269 __out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ); 270} 271 272static inline void 273OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo, 274 uint32_t offset, uint64_t or, int32_t shift) 275{ 276 __out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ | FD_RELOC_WRITE); 277} 278 279static inline void 280OUT_RELOCD(struct fd_ringbuffer *ring, struct fd_bo *bo, 281 uint32_t offset, uint64_t or, int32_t shift) 282{ 283 __out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ | FD_RELOC_DUMP); 284} 285 286static inline void 287OUT_RB(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) 288{ 289 fd_ringbuffer_emit_reloc_ring_full(ring, target, 0); 290} 291 292static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords) 293{ 294 if (ring->cur + ndwords > ring->end) 295 fd_ringbuffer_grow(ring, ndwords); 296} 297 298static inline void 299OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt) 300{ 301 BEGIN_RING(ring, cnt+1); 302 OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF)); 303} 304 305static inline void 306OUT_PKT2(struct fd_ringbuffer *ring) 307{ 308 BEGIN_RING(ring, 1); 309 OUT_RING(ring, CP_TYPE2_PKT); 310} 311 312static inline void 313OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt) 314{ 315 BEGIN_RING(ring, cnt+1); 316 OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8)); 317} 318 319/* 320 * Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3 321 */ 322 323static inline unsigned 324_odd_parity_bit(unsigned val) 325{ 326 /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel 327 * note that we want odd parity so 0x6996 is inverted. 328 */ 329 val ^= val >> 16; 330 val ^= val >> 8; 331 val ^= val >> 4; 332 val &= 0xf; 333 return (~0x6996 >> val) & 1; 334} 335 336static inline void 337OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt) 338{ 339 BEGIN_RING(ring, cnt+1); 340 OUT_RING(ring, CP_TYPE4_PKT | cnt | 341 (_odd_parity_bit(cnt) << 7) | 342 ((regindx & 0x3ffff) << 8) | 343 ((_odd_parity_bit(regindx) << 27))); 344} 345 346static inline void 347OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt) 348{ 349 BEGIN_RING(ring, cnt+1); 350 OUT_RING(ring, CP_TYPE7_PKT | cnt | 351 (_odd_parity_bit(cnt) << 15) | 352 ((opcode & 0x7f) << 16) | 353 ((_odd_parity_bit(opcode) << 23))); 354} 355 356static inline void 357OUT_WFI(struct fd_ringbuffer *ring) 358{ 359 OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); 360 OUT_RING(ring, 0x00000000); 361} 362 363static inline void 364OUT_WFI5(struct fd_ringbuffer *ring) 365{ 366 OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0); 367} 368 369static inline void 370__OUT_IB(struct fd_ringbuffer *ring, bool prefetch, struct fd_ringbuffer *target) 371{ 372 if (target->cur == target->start) 373 return; 374 375 unsigned count = fd_ringbuffer_cmd_count(target); 376 377 /* for debug after a lock up, write a unique counter value 378 * to scratch6 for each IB, to make it easier to match up 379 * register dumps to cmdstream. The combination of IB and 380 * DRAW (scratch7) is enough to "triangulate" the particular 381 * draw that caused lockup. 382 */ 383 emit_marker(ring, 6); 384 385 for (unsigned i = 0; i < count; i++) { 386 uint32_t dwords; 387 OUT_PKT3(ring, prefetch ? CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2); 388 dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4; 389 assert(dwords > 0); 390 OUT_RING(ring, dwords); 391 OUT_PKT2(ring); 392 } 393 394 emit_marker(ring, 6); 395} 396 397static inline void 398__OUT_IB5(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) 399{ 400 if (target->cur == target->start) 401 return; 402 403 unsigned count = fd_ringbuffer_cmd_count(target); 404 405 for (unsigned i = 0; i < count; i++) { 406 uint32_t dwords; 407 OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); 408 dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4; 409 assert(dwords > 0); 410 OUT_RING(ring, dwords); 411 } 412} 413 414/* CP_SCRATCH_REG4 is used to hold base address for query results: */ 415// XXX annoyingly scratch regs move on a5xx.. and additionally different 416// packet types.. so freedreno_query_hw is going to need a bit of 417// rework.. 418#define HW_QUERY_BASE_REG REG_AXXX_CP_SCRATCH_REG4 419 420static inline void 421emit_marker(struct fd_ringbuffer *ring, int scratch_idx) 422{ 423 extern unsigned marker_cnt; 424 unsigned reg = REG_AXXX_CP_SCRATCH_REG0 + scratch_idx; 425 assert(reg != HW_QUERY_BASE_REG); 426 if (reg == HW_QUERY_BASE_REG) 427 return; 428 OUT_PKT0(ring, reg, 1); 429 OUT_RING(ring, ++marker_cnt); 430} 431 432static inline uint32_t 433pack_rgba(enum pipe_format format, const float *rgba) 434{ 435 union util_color uc; 436 util_pack_color(rgba, format, &uc); 437 return uc.ui[0]; 438} 439 440/* 441 * swap - swap value of @a and @b 442 */ 443#define swap(a, b) \ 444 do { __typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) 445 446#define foreach_bit(b, mask) \ 447 for (uint32_t _m = (mask); _m && ({(b) = u_bit_scan(&_m); 1;});) 448 449 450#define BIT(bit) (1u << bit) 451 452/* 453 * a3xx+ helpers: 454 */ 455 456static inline enum a3xx_msaa_samples 457fd_msaa_samples(unsigned samples) 458{ 459 switch (samples) { 460 default: 461 debug_assert(0); 462 case 0: 463 case 1: return MSAA_ONE; 464 case 2: return MSAA_TWO; 465 case 4: return MSAA_FOUR; 466 case 8: return MSAA_EIGHT; 467 } 468} 469 470/* 471 * a4xx+ helpers: 472 */ 473 474static inline enum a4xx_state_block 475fd4_stage2shadersb(gl_shader_stage type) 476{ 477 switch (type) { 478 case MESA_SHADER_VERTEX: 479 return SB4_VS_SHADER; 480 case MESA_SHADER_FRAGMENT: 481 return SB4_FS_SHADER; 482 case MESA_SHADER_COMPUTE: 483 case MESA_SHADER_KERNEL: 484 return SB4_CS_SHADER; 485 default: 486 unreachable("bad shader type"); 487 return ~0; 488 } 489} 490 491#endif /* FREEDRENO_UTIL_H_ */ 492