freedreno_util.h revision 7ec681f3
1/* 2 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27#ifndef FREEDRENO_UTIL_H_ 28#define FREEDRENO_UTIL_H_ 29 30#include "drm/freedreno_drmif.h" 31#include "drm/freedreno_ringbuffer.h" 32 33#include "pipe/p_format.h" 34#include "pipe/p_state.h" 35#include "util/compiler.h" 36#include "util/half_float.h" 37#include "util/log.h" 38#include "util/u_debug.h" 39#include "util/u_dynarray.h" 40#include "util/u_math.h" 41#include "util/u_pack_color.h" 42 43#include "adreno_common.xml.h" 44#include "adreno_pm4.xml.h" 45#include "disasm.h" 46 47#ifdef __cplusplus 48extern "C" { 49#endif 50 51enum adreno_rb_depth_format fd_pipe2depth(enum pipe_format format); 52enum pc_di_index_size fd_pipe2index(enum pipe_format format); 53enum pipe_format fd_gmem_restore_format(enum pipe_format format); 54enum adreno_rb_blend_factor fd_blend_factor(unsigned factor); 55enum adreno_pa_su_sc_draw fd_polygon_mode(unsigned mode); 56enum adreno_stencil_op fd_stencil_op(unsigned op); 57 58#define A3XX_MAX_MIP_LEVELS 14 59 60#define A2XX_MAX_RENDER_TARGETS 1 61#define A3XX_MAX_RENDER_TARGETS 4 62#define A4XX_MAX_RENDER_TARGETS 8 63#define A5XX_MAX_RENDER_TARGETS 8 64#define A6XX_MAX_RENDER_TARGETS 8 65 66#define MAX_RENDER_TARGETS A6XX_MAX_RENDER_TARGETS 67 68/* clang-format off */ 69enum fd_debug_flag { 70 FD_DBG_MSGS = BITFIELD_BIT(0), 71 FD_DBG_DISASM = BITFIELD_BIT(1), 72 FD_DBG_DCLEAR = BITFIELD_BIT(2), 73 FD_DBG_DDRAW = BITFIELD_BIT(3), 74 FD_DBG_NOSCIS = BITFIELD_BIT(4), 75 FD_DBG_DIRECT = BITFIELD_BIT(5), 76 FD_DBG_NOBYPASS = BITFIELD_BIT(6), 77 FD_DBG_PERF = BITFIELD_BIT(7), 78 FD_DBG_NOBIN = BITFIELD_BIT(8), 79 FD_DBG_NOGMEM = BITFIELD_BIT(9), 80 FD_DBG_SERIALC = BITFIELD_BIT(10), 81 FD_DBG_SHADERDB = BITFIELD_BIT(11), 82 FD_DBG_FLUSH = BITFIELD_BIT(12), 83 FD_DBG_DEQP = BITFIELD_BIT(13), 84 FD_DBG_INORDER = BITFIELD_BIT(14), 85 FD_DBG_BSTAT = BITFIELD_BIT(15), 86 FD_DBG_NOGROW = BITFIELD_BIT(16), 87 FD_DBG_LRZ = BITFIELD_BIT(17), 88 FD_DBG_NOINDR = BITFIELD_BIT(18), 89 FD_DBG_NOBLIT = BITFIELD_BIT(19), 90 FD_DBG_HIPRIO = BITFIELD_BIT(20), 91 FD_DBG_TTILE = BITFIELD_BIT(21), 92 FD_DBG_PERFC = BITFIELD_BIT(22), 93 FD_DBG_NOUBWC = BITFIELD_BIT(23), 94 FD_DBG_NOLRZ = BITFIELD_BIT(24), 95 FD_DBG_NOTILE = BITFIELD_BIT(25), 96 FD_DBG_LAYOUT = BITFIELD_BIT(26), 97 FD_DBG_NOFP16 = BITFIELD_BIT(27), 98 FD_DBG_NOHW = BITFIELD_BIT(28), 99}; 100/* clang-format on */ 101 102extern int fd_mesa_debug; 103extern bool fd_binning_enabled; 104 105#define FD_DBG(category) unlikely(fd_mesa_debug &FD_DBG_##category) 106 107#include <unistd.h> 108#include <sys/types.h> 109#include <sys/syscall.h> 110 111#define DBG(fmt, ...) \ 112 do { \ 113 if (FD_DBG(MSGS)) \ 114 mesa_logi("%5d: %s:%d: " fmt, ((pid_t)syscall(SYS_gettid)), \ 115 __FUNCTION__, __LINE__, \ 116 ##__VA_ARGS__); \ 117 } while (0) 118 119#define perf_debug_message(debug, type, ...) \ 120 do { \ 121 if (FD_DBG(PERF)) \ 122 mesa_logw(__VA_ARGS__); \ 123 struct pipe_debug_callback *__d = (debug); \ 124 if (__d) \ 125 pipe_debug_message(__d, type, __VA_ARGS__); \ 126 } while (0) 127 128#define perf_debug_ctx(ctx, ...) \ 129 do { \ 130 struct fd_context *__c = (ctx); \ 131 perf_debug_message(__c ? &__c->debug : NULL, PERF_INFO, __VA_ARGS__); \ 132 } while (0) 133 134#define perf_debug(...) perf_debug_ctx(NULL, __VA_ARGS__) 135 136#define perf_time_ctx(ctx, limit_ns, fmt, ...) \ 137 for (struct __perf_time_state __s = \ 138 { \ 139 .t = -__perf_get_time(ctx), \ 140 }; \ 141 !__s.done; ({ \ 142 __s.t += __perf_get_time(ctx); \ 143 __s.done = true; \ 144 if (__s.t > (limit_ns)) { \ 145 perf_debug_ctx(ctx, fmt " (%.03f ms)", ##__VA_ARGS__, \ 146 (double)__s.t / 1000000.0); \ 147 } \ 148 })) 149 150#define perf_time(limit_ns, fmt, ...) \ 151 perf_time_ctx(NULL, limit_ns, fmt, ##__VA_ARGS__) 152 153struct __perf_time_state { 154 int64_t t; 155 bool done; 156}; 157 158/* static inline would be nice here, except 'struct fd_context' is not 159 * defined yet: 160 */ 161#define __perf_get_time(ctx) \ 162 ((FD_DBG(PERF) || ({ \ 163 struct fd_context *__c = (ctx); \ 164 unlikely(__c && __c->debug.debug_message); \ 165 })) \ 166 ? os_time_get_nano() \ 167 : 0) 168 169struct fd_context; 170 171/** 172 * A psuedo-variable for defining where various parts of the fd_context 173 * can be safely accessed. 174 * 175 * With threaded_context, certain pctx funcs are called from gallium 176 * front-end/state-tracker (eg. CSO creation), while others are called 177 * from the driver thread. Things called from driver thread can safely 178 * access anything in the ctx, while things called from the fe/st thread 179 * must limit themselves to "safe" things (ie. ctx->screen is safe as it 180 * is immutable, but the blitter_context is not). 181 */ 182extern lock_cap_t fd_context_access_cap; 183 184/** 185 * Make the annotation a bit less verbose.. mark fields which should only 186 * be accessed by driver-thread with 'dt' 187 */ 188#define dt guarded_by(fd_context_access_cap) 189 190/** 191 * Annotation for entry-point functions only called in driver thread. 192 * 193 * For static functions, apply the annotation to the function declaration. 194 * Otherwise apply to the function prototype. 195 */ 196#define in_dt assert_cap(fd_context_access_cap) 197 198/** 199 * Annotation for internal functions which are only called from entry- 200 * point functions (with 'in_dt' annotation) or other internal functions 201 * with the 'assert_dt' annotation. 202 * 203 * For static functions, apply the annotation to the function declaration. 204 * Otherwise apply to the function prototype. 205 */ 206#define assert_dt requires_cap(fd_context_access_cap) 207 208/** 209 * Special helpers for context access outside of driver thread. For ex, 210 * pctx->get_query_result() is not called on driver thread, but the 211 * query is guaranteed to be flushed, or the driver thread queue is 212 * guaranteed to be flushed. 213 * 214 * Use with caution! 215 */ 216static inline void 217fd_context_access_begin(struct fd_context *ctx) 218 acquire_cap(fd_context_access_cap) 219{ 220} 221 222static inline void 223fd_context_access_end(struct fd_context *ctx) release_cap(fd_context_access_cap) 224{ 225} 226 227/* for conditionally setting boolean flag(s): */ 228#define COND(bool, val) ((bool) ? (val) : 0) 229 230#define CP_REG(reg) ((0x4 << 16) | ((unsigned int)((reg) - (0x2000)))) 231 232static inline uint32_t 233DRAW(enum pc_di_primtype prim_type, enum pc_di_src_sel source_select, 234 enum pc_di_index_size index_size, enum pc_di_vis_cull_mode vis_cull_mode, 235 uint8_t instances) 236{ 237 return (prim_type << 0) | (source_select << 6) | ((index_size & 1) << 11) | 238 ((index_size >> 1) << 13) | (vis_cull_mode << 9) | (1 << 14) | 239 (instances << 24); 240} 241 242static inline uint32_t 243DRAW_A20X(enum pc_di_primtype prim_type, 244 enum pc_di_face_cull_sel faceness_cull_select, 245 enum pc_di_src_sel source_select, enum pc_di_index_size index_size, 246 bool pre_fetch_cull_enable, bool grp_cull_enable, uint16_t count) 247{ 248 return (prim_type << 0) | (source_select << 6) | 249 (faceness_cull_select << 8) | ((index_size & 1) << 11) | 250 ((index_size >> 1) << 13) | (pre_fetch_cull_enable << 14) | 251 (grp_cull_enable << 15) | (count << 16); 252} 253 254/* for tracking cmdstream positions that need to be patched: */ 255struct fd_cs_patch { 256 uint32_t *cs; 257 uint32_t val; 258}; 259#define fd_patch_num_elements(buf) ((buf)->size / sizeof(struct fd_cs_patch)) 260#define fd_patch_element(buf, i) \ 261 util_dynarray_element(buf, struct fd_cs_patch, i) 262 263static inline enum pipe_format 264pipe_surface_format(struct pipe_surface *psurf) 265{ 266 if (!psurf) 267 return PIPE_FORMAT_NONE; 268 return psurf->format; 269} 270 271static inline bool 272fd_surface_half_precision(const struct pipe_surface *psurf) 273{ 274 enum pipe_format format; 275 276 if (!psurf) 277 return true; 278 279 format = psurf->format; 280 281 /* colors are provided in consts, which go through cov.f32f16, which will 282 * break these values 283 */ 284 if (util_format_is_pure_integer(format)) 285 return false; 286 287 /* avoid losing precision on 32-bit float formats */ 288 if (util_format_is_float(format) && 289 util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) == 290 32) 291 return false; 292 293 return true; 294} 295 296static inline unsigned 297fd_sampler_first_level(const struct pipe_sampler_view *view) 298{ 299 if (view->target == PIPE_BUFFER) 300 return 0; 301 return view->u.tex.first_level; 302} 303 304static inline unsigned 305fd_sampler_last_level(const struct pipe_sampler_view *view) 306{ 307 if (view->target == PIPE_BUFFER) 308 return 0; 309 return view->u.tex.last_level; 310} 311 312static inline bool 313fd_half_precision(struct pipe_framebuffer_state *pfb) 314{ 315 unsigned i; 316 317 for (i = 0; i < pfb->nr_cbufs; i++) 318 if (!fd_surface_half_precision(pfb->cbufs[i])) 319 return false; 320 321 return true; 322} 323 324static inline void emit_marker(struct fd_ringbuffer *ring, int scratch_idx); 325 326/* like OUT_RING() but appends a cmdstream patch point to 'buf' */ 327static inline void 328OUT_RINGP(struct fd_ringbuffer *ring, uint32_t data, struct util_dynarray *buf) 329{ 330 if (LOG_DWORDS) { 331 DBG("ring[%p]: OUT_RINGP %04x: %08x", ring, 332 (uint32_t)(ring->cur - ring->start), data); 333 } 334 util_dynarray_append(buf, struct fd_cs_patch, 335 ((struct fd_cs_patch){ 336 .cs = ring->cur++, 337 .val = data, 338 })); 339} 340 341static inline void 342__OUT_IB(struct fd_ringbuffer *ring, bool prefetch, 343 struct fd_ringbuffer *target) 344{ 345 if (target->cur == target->start) 346 return; 347 348 unsigned count = fd_ringbuffer_cmd_count(target); 349 350 /* for debug after a lock up, write a unique counter value 351 * to scratch6 for each IB, to make it easier to match up 352 * register dumps to cmdstream. The combination of IB and 353 * DRAW (scratch7) is enough to "triangulate" the particular 354 * draw that caused lockup. 355 */ 356 emit_marker(ring, 6); 357 358 for (unsigned i = 0; i < count; i++) { 359 uint32_t dwords; 360 OUT_PKT3(ring, prefetch ? CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 361 2); 362 dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4; 363 assert(dwords > 0); 364 OUT_RING(ring, dwords); 365 OUT_PKT2(ring); 366 } 367 368 emit_marker(ring, 6); 369} 370 371static inline void 372__OUT_IB5(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) 373{ 374 if (target->cur == target->start) 375 return; 376 377 unsigned count = fd_ringbuffer_cmd_count(target); 378 379 for (unsigned i = 0; i < count; i++) { 380 uint32_t dwords; 381 OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); 382 dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4; 383 assert(dwords > 0); 384 OUT_RING(ring, dwords); 385 } 386} 387 388/* CP_SCRATCH_REG4 is used to hold base address for query results: */ 389// XXX annoyingly scratch regs move on a5xx.. and additionally different 390// packet types.. so freedreno_query_hw is going to need a bit of 391// rework.. 392#define HW_QUERY_BASE_REG REG_AXXX_CP_SCRATCH_REG4 393 394#ifdef DEBUG 395#define __EMIT_MARKER 1 396#else 397#define __EMIT_MARKER 0 398#endif 399 400static inline void 401emit_marker(struct fd_ringbuffer *ring, int scratch_idx) 402{ 403 extern int32_t marker_cnt; 404 unsigned reg = REG_AXXX_CP_SCRATCH_REG0 + scratch_idx; 405 assert(reg != HW_QUERY_BASE_REG); 406 if (reg == HW_QUERY_BASE_REG) 407 return; 408 if (__EMIT_MARKER) { 409 OUT_WFI(ring); 410 OUT_PKT0(ring, reg, 1); 411 OUT_RING(ring, p_atomic_inc_return(&marker_cnt)); 412 } 413} 414 415static inline uint32_t 416pack_rgba(enum pipe_format format, const float *rgba) 417{ 418 union util_color uc; 419 util_pack_color(rgba, format, &uc); 420 return uc.ui[0]; 421} 422 423/* 424 * swap - swap value of @a and @b 425 */ 426#define swap(a, b) \ 427 do { \ 428 __typeof(a) __tmp = (a); \ 429 (a) = (b); \ 430 (b) = __tmp; \ 431 } while (0) 432 433#define BIT(bit) (1u << bit) 434 435/* 436 * a3xx+ helpers: 437 */ 438 439static inline enum a3xx_msaa_samples 440fd_msaa_samples(unsigned samples) 441{ 442 switch (samples) { 443 default: 444 debug_assert(0); 445#if defined(NDEBUG) || defined(DEBUG) 446 FALLTHROUGH; 447#endif 448 case 0: 449 case 1: 450 return MSAA_ONE; 451 case 2: 452 return MSAA_TWO; 453 case 4: 454 return MSAA_FOUR; 455 case 8: 456 return MSAA_EIGHT; 457 } 458} 459 460/* 461 * a4xx+ helpers: 462 */ 463 464static inline enum a4xx_state_block 465fd4_stage2shadersb(gl_shader_stage type) 466{ 467 switch (type) { 468 case MESA_SHADER_VERTEX: 469 return SB4_VS_SHADER; 470 case MESA_SHADER_FRAGMENT: 471 return SB4_FS_SHADER; 472 case MESA_SHADER_COMPUTE: 473 case MESA_SHADER_KERNEL: 474 return SB4_CS_SHADER; 475 default: 476 unreachable("bad shader type"); 477 return (enum a4xx_state_block) ~0; 478 } 479} 480 481static inline enum a4xx_index_size 482fd4_size2indextype(unsigned index_size) 483{ 484 switch (index_size) { 485 case 1: 486 return INDEX4_SIZE_8_BIT; 487 case 2: 488 return INDEX4_SIZE_16_BIT; 489 case 4: 490 return INDEX4_SIZE_32_BIT; 491 } 492 DBG("unsupported index size: %d", index_size); 493 assert(0); 494 return INDEX4_SIZE_32_BIT; 495} 496 497#ifdef __cplusplus 498} 499#endif 500 501#endif /* FREEDRENO_UTIL_H_ */ 502