1/*
2 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Rob Clark <robclark@freedesktop.org>
25 */
26
27#ifndef FREEDRENO_CONTEXT_H_
28#define FREEDRENO_CONTEXT_H_
29
30#include "pipe/p_context.h"
31#include "util/libsync.h"
32#include "util/list.h"
33#include "util/slab.h"
34#include "util/u_blitter.h"
35#include "util/u_string.h"
36#include "util/u_threaded_context.h"
37#include "util/perf/u_trace.h"
38
39#include "freedreno_autotune.h"
40#include "freedreno_gmem.h"
41#include "freedreno_perfetto.h"
42#include "freedreno_screen.h"
43#include "freedreno_util.h"
44
45#ifdef __cplusplus
46extern "C" {
47#endif
48
49#define BORDER_COLOR_UPLOAD_SIZE (2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE)
50
51struct fd_vertex_stateobj;
52struct fd_batch;
53
54struct fd_texture_stateobj {
55   struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS];
56   unsigned num_textures;
57   unsigned valid_textures;
58   struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS];
59   unsigned num_samplers;
60   unsigned valid_samplers;
61};
62
63struct fd_program_stateobj {
64   void *vs, *hs, *ds, *gs, *fs;
65};
66
67struct fd_constbuf_stateobj {
68   struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
69   uint32_t enabled_mask;
70};
71
72struct fd_shaderbuf_stateobj {
73   struct pipe_shader_buffer sb[PIPE_MAX_SHADER_BUFFERS];
74   uint32_t enabled_mask;
75   uint32_t writable_mask;
76};
77
78struct fd_shaderimg_stateobj {
79   struct pipe_image_view si[PIPE_MAX_SHADER_IMAGES];
80   uint32_t enabled_mask;
81};
82
83struct fd_vertexbuf_stateobj {
84   struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
85   unsigned count;
86   uint32_t enabled_mask;
87};
88
89struct fd_vertex_stateobj {
90   struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS];
91   unsigned num_elements;
92};
93
94struct fd_stream_output_target {
95   struct pipe_stream_output_target base;
96   struct pipe_resource *offset_buf;
97   /* stride of the last stream out recorded to this target, for
98    * glDrawTransformFeedback(). */
99   uint32_t stride;
100};
101
102struct fd_streamout_stateobj {
103   struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
104   /* Bitmask of stream that should be reset. */
105   unsigned reset;
106
107   unsigned num_targets;
108   /* Track offset from vtxcnt for streamout data.  This counter
109    * is just incremented by # of vertices on each draw until
110    * reset or new streamout buffer bound.
111    *
112    * When we eventually have GS, the CPU won't actually know the
113    * number of vertices per draw, so I think we'll have to do
114    * something more clever.
115    */
116   unsigned offsets[PIPE_MAX_SO_BUFFERS];
117
118   /* Pre-a6xx, the maximum number of vertices that could be recorded to this
119    * set of targets with the current vertex shader.  a6xx and newer, hardware
120    * queries are used.
121    */
122   unsigned max_tf_vtx;
123
124   /* Pre-a6xx, the number of verts written to the buffers since the last
125    * Begin.  Used for overflow checking for SW queries.
126    */
127   unsigned verts_written;
128};
129
130#define MAX_GLOBAL_BUFFERS 16
131struct fd_global_bindings_stateobj {
132   struct pipe_resource *buf[MAX_GLOBAL_BUFFERS];
133   uint32_t enabled_mask;
134};
135
136/* group together the vertex and vertexbuf state.. for ease of passing
137 * around, and because various internal operations (gmem<->mem, etc)
138 * need their own vertex state:
139 */
140struct fd_vertex_state {
141   struct fd_vertex_stateobj *vtx;
142   struct fd_vertexbuf_stateobj vertexbuf;
143};
144
145/* global 3d pipeline dirty state: */
146enum fd_dirty_3d_state {
147   FD_DIRTY_BLEND = BIT(0),
148   FD_DIRTY_RASTERIZER = BIT(1),
149   FD_DIRTY_ZSA = BIT(2),
150   FD_DIRTY_BLEND_COLOR = BIT(3),
151   FD_DIRTY_STENCIL_REF = BIT(4),
152   FD_DIRTY_SAMPLE_MASK = BIT(5),
153   FD_DIRTY_FRAMEBUFFER = BIT(6),
154   FD_DIRTY_STIPPLE = BIT(7),
155   FD_DIRTY_VIEWPORT = BIT(8),
156   FD_DIRTY_VTXSTATE = BIT(9),
157   FD_DIRTY_VTXBUF = BIT(10),
158   FD_DIRTY_MIN_SAMPLES = BIT(11),
159   FD_DIRTY_SCISSOR = BIT(12),
160   FD_DIRTY_STREAMOUT = BIT(13),
161   FD_DIRTY_UCP = BIT(14),
162   FD_DIRTY_PROG = BIT(15),
163   FD_DIRTY_CONST = BIT(16),
164   FD_DIRTY_TEX = BIT(17),
165   FD_DIRTY_IMAGE = BIT(18),
166   FD_DIRTY_SSBO = BIT(19),
167
168   /* only used by a2xx.. possibly can be removed.. */
169   FD_DIRTY_TEXSTATE = BIT(20),
170
171   /* fine grained state changes, for cases where state is not orthogonal
172    * from hw perspective:
173    */
174   FD_DIRTY_RASTERIZER_DISCARD = BIT(24),
175   FD_DIRTY_RASTERIZER_CLIP_PLANE_ENABLE = BIT(25),
176   FD_DIRTY_BLEND_DUAL = BIT(26),
177#define NUM_DIRTY_BITS 27
178
179   /* additional flag for state requires updated resource tracking: */
180   FD_DIRTY_RESOURCE = BIT(31),
181};
182
183/* per shader-stage dirty state: */
184enum fd_dirty_shader_state {
185   FD_DIRTY_SHADER_PROG = BIT(0),
186   FD_DIRTY_SHADER_CONST = BIT(1),
187   FD_DIRTY_SHADER_TEX = BIT(2),
188   FD_DIRTY_SHADER_SSBO = BIT(3),
189   FD_DIRTY_SHADER_IMAGE = BIT(4),
190#define NUM_DIRTY_SHADER_BITS 5
191};
192
193#define MAX_HW_SAMPLE_PROVIDERS 7
194struct fd_hw_sample_provider;
195struct fd_hw_sample;
196
197struct ir3_shader_key;
198
199struct fd_context {
200   struct pipe_context base;
201
202   struct threaded_context *tc;
203
204   struct list_head node; /* node in screen->context_list */
205
206   /* We currently need to serialize emitting GMEM batches, because of
207    * VSC state access in the context.
208    *
209    * In practice this lock should not be contended, since pipe_context
210    * use should be single threaded.  But it is needed to protect the
211    * case, with batch reordering where a ctxB batch triggers flushing
212    * a ctxA batch
213    */
214   simple_mtx_t gmem_lock;
215
216   struct fd_device *dev;
217   struct fd_screen *screen;
218   struct fd_pipe *pipe;
219
220   struct blitter_context *blitter dt;
221   void *clear_rs_state[2] dt;
222
223   /* slab for pipe_transfer allocations: */
224   struct slab_child_pool transfer_pool dt;
225   struct slab_child_pool transfer_pool_unsync; /* for threaded_context */
226
227   struct fd_autotune autotune dt;
228
229   /**
230    * query related state:
231    */
232   /*@{*/
233   /* slabs for fd_hw_sample and fd_hw_sample_period allocations: */
234   struct slab_mempool sample_pool dt;
235   struct slab_mempool sample_period_pool dt;
236
237   /* sample-providers for hw queries: */
238   const struct fd_hw_sample_provider
239      *hw_sample_providers[MAX_HW_SAMPLE_PROVIDERS];
240
241   /* list of active queries: */
242   struct list_head hw_active_queries dt;
243
244   /* sample-providers for accumulating hw queries: */
245   const struct fd_acc_sample_provider
246      *acc_sample_providers[MAX_HW_SAMPLE_PROVIDERS];
247
248   /* list of active accumulating queries: */
249   struct list_head acc_active_queries dt;
250   /*@}*/
251
252   uint8_t patch_vertices;
253
254   /* Whether we need to recheck the active_queries list next
255    * fd_batch_update_queries().
256    */
257   bool update_active_queries dt;
258
259   /* Current state of pctx->set_active_query_state() (i.e. "should drawing
260    * be counted against non-perfcounter queries")
261    */
262   bool active_queries dt;
263
264   /* shaders used by clear, and gmem->mem blits: */
265   struct fd_program_stateobj solid_prog; // TODO move to screen?
266   struct fd_program_stateobj solid_layered_prog;
267
268   /* shaders used by mem->gmem blits: */
269   struct fd_program_stateobj
270      blit_prog[MAX_RENDER_TARGETS]; // TODO move to screen?
271   struct fd_program_stateobj blit_z, blit_zs;
272
273   /* Stats/counters:
274    */
275   struct {
276      uint64_t prims_emitted;
277      uint64_t prims_generated;
278      uint64_t draw_calls;
279      uint64_t batch_total, batch_sysmem, batch_gmem, batch_nondraw,
280         batch_restore;
281      uint64_t staging_uploads, shadow_uploads;
282      uint64_t vs_regs, hs_regs, ds_regs, gs_regs, fs_regs;
283   } stats dt;
284
285   /* Counter for number of users who need sw counters (so we can
286    * skip collecting them when not needed)
287    */
288   unsigned stats_users;
289
290   /* Current batch.. the rule here is that you can deref ctx->batch
291    * in codepaths from pipe_context entrypoints.  But not in code-
292    * paths from fd_batch_flush() (basically, the stuff that gets
293    * called from GMEM code), since in those code-paths the batch
294    * you care about is not necessarily the same as ctx->batch.
295    */
296   struct fd_batch *batch dt;
297
298   /* NULL if there has been rendering since last flush.  Otherwise
299    * keeps a reference to the last fence so we can re-use it rather
300    * than having to flush no-op batch.
301    */
302   struct pipe_fence_handle *last_fence dt;
303
304   /* Fence fd we are told to wait on via ->fence_server_sync() (or -1
305    * if none).  The in-fence is transferred over to the batch on the
306    * next draw/blit/grid.
307    *
308    * The reason for this extra complexity is that apps will typically
309    * do eglWaitSyncKHR()/etc at the beginning of the frame, before the
310    * first draw.  But mesa/st doesn't flush down framebuffer state
311    * change until we hit a draw, so at ->fence_server_sync() time, we
312    * don't yet have the correct batch.  If we created a batch at that
313    * point, it would be the wrong one, and we'd have to flush it pre-
314    * maturely, causing us to stall early in the frame where we could
315    * be building up cmdstream.
316    */
317   int in_fence_fd dt;
318
319   /* track last known reset status globally and per-context to
320    * determine if more resets occurred since then.  If global reset
321    * count increases, it means some other context crashed.  If
322    * per-context reset count increases, it means we crashed the
323    * gpu.
324    *
325    * Only accessed by front-end thread, never accessed by TC driver
326    * thread.
327    */
328   uint32_t context_reset_count;
329   uint32_t global_reset_count;
330
331   /* Context sequence #, used for batch-cache key: */
332   uint16_t seqno;
333
334   /* Cost per draw, used in conjunction with samples-passed history to
335    * estimate whether GMEM or bypass is the better option.
336    */
337   uint8_t draw_cost;
338
339   /* Are we in process of shadowing a resource? Used to detect recursion
340    * in transfer_map, and skip unneeded synchronization.
341    */
342   bool in_shadow : 1 dt;
343
344   /* For catching recursion problems with blit fallback: */
345   bool in_blit : 1 dt;
346
347   /* points to either scissor or disabled_scissor depending on rast state: */
348   struct pipe_scissor_state *current_scissor dt;
349
350   struct pipe_scissor_state scissor dt;
351
352   /* we don't have a disable/enable bit for scissor, so instead we keep
353    * a disabled-scissor state which matches the entire bound framebuffer
354    * and use that when scissor is not enabled.
355    */
356   struct pipe_scissor_state disabled_scissor dt;
357
358   /* Per vsc pipe bo's (a2xx-a5xx): */
359   struct fd_bo *vsc_pipe_bo[32] dt;
360
361   /* Maps generic gallium oriented fd_dirty_3d_state bits to generation
362    * specific bitmask of state "groups".
363    */
364   uint32_t gen_dirty_map[NUM_DIRTY_BITS];
365   uint32_t gen_dirty_shader_map[PIPE_SHADER_TYPES][NUM_DIRTY_SHADER_BITS];
366
367   /* Bitmask of all possible gen_dirty bits: */
368   uint32_t gen_all_dirty;
369
370   /* Generation specific bitmask of dirty state groups: */
371   uint32_t gen_dirty;
372
373   /* which state objects need to be re-emit'd: */
374   enum fd_dirty_3d_state dirty dt;
375
376   /* per shader-stage dirty status: */
377   enum fd_dirty_shader_state dirty_shader[PIPE_SHADER_TYPES] dt;
378
379   void *compute dt;
380   struct pipe_blend_state *blend dt;
381   struct pipe_rasterizer_state *rasterizer dt;
382   struct pipe_depth_stencil_alpha_state *zsa dt;
383
384   struct fd_texture_stateobj tex[PIPE_SHADER_TYPES] dt;
385
386   struct fd_program_stateobj prog dt;
387   uint32_t bound_shader_stages dt;
388
389   struct fd_vertex_state vtx dt;
390
391   struct pipe_blend_color blend_color dt;
392   struct pipe_stencil_ref stencil_ref dt;
393   unsigned sample_mask dt;
394   unsigned min_samples dt;
395   /* local context fb state, for when ctx->batch is null: */
396   struct pipe_framebuffer_state framebuffer dt;
397   struct pipe_poly_stipple stipple dt;
398   struct pipe_viewport_state viewport dt;
399   struct pipe_scissor_state viewport_scissor dt;
400   struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES] dt;
401   struct fd_shaderbuf_stateobj shaderbuf[PIPE_SHADER_TYPES] dt;
402   struct fd_shaderimg_stateobj shaderimg[PIPE_SHADER_TYPES] dt;
403   struct fd_streamout_stateobj streamout dt;
404   struct fd_global_bindings_stateobj global_bindings dt;
405   struct pipe_clip_state ucp dt;
406
407   struct pipe_query *cond_query dt;
408   bool cond_cond dt; /* inverted rendering condition */
409   uint cond_mode dt;
410
411   /* Private memory is a memory space where each fiber gets its own piece of
412    * memory, in addition to registers. It is backed by a buffer which needs
413    * to be large enough to hold the contents of every possible wavefront in
414    * every core of the GPU. Because it allocates space via the internal
415    * wavefront ID which is shared between all currently executing shaders,
416    * the same buffer can be reused by all shaders, as long as all shaders
417    * sharing the same buffer use the exact same configuration. There are two
418    * inputs to the configuration, the amount of per-fiber space and whether
419    * to use the newer per-wave or older per-fiber layout. We only ever
420    * increase the size, and shaders with a smaller size requirement simply
421    * use the larger existing buffer, so that we only need to keep track of
422    * one buffer and its size, but we still need to keep track of per-fiber
423    * and per-wave buffers separately so that we never use the same buffer
424    * for different layouts. pvtmem[0] is for per-fiber, and pvtmem[1] is for
425    * per-wave.
426    */
427   struct {
428      struct fd_bo *bo;
429      uint32_t per_fiber_size;
430   } pvtmem[2] dt;
431
432   /* maps per-shader-stage state plus variant key to hw
433    * program stateobj:
434    */
435   struct ir3_cache *shader_cache;
436
437   struct pipe_debug_callback debug;
438
439   struct u_trace_context trace_context dt;
440
441#ifdef HAVE_PERFETTO
442   struct fd_perfetto_state perfetto;
443#endif
444
445   /*
446    * Counter to generate submit-ids
447    */
448   uint32_t submit_count;
449
450   /* Called on rebind_resource() for any per-gen cleanup required: */
451   void (*rebind_resource)(struct fd_context *ctx, struct fd_resource *rsc) dt;
452
453   /* GMEM/tile handling fxns: */
454   void (*emit_tile_init)(struct fd_batch *batch) dt;
455   void (*emit_tile_prep)(struct fd_batch *batch,
456                          const struct fd_tile *tile) dt;
457   void (*emit_tile_mem2gmem)(struct fd_batch *batch,
458                              const struct fd_tile *tile) dt;
459   void (*emit_tile_renderprep)(struct fd_batch *batch,
460                                const struct fd_tile *tile) dt;
461   void (*emit_tile)(struct fd_batch *batch, const struct fd_tile *tile) dt;
462   void (*emit_tile_gmem2mem)(struct fd_batch *batch,
463                              const struct fd_tile *tile) dt;
464   void (*emit_tile_fini)(struct fd_batch *batch) dt; /* optional */
465
466   /* optional, for GMEM bypass: */
467   void (*emit_sysmem_prep)(struct fd_batch *batch) dt;
468   void (*emit_sysmem_fini)(struct fd_batch *batch) dt;
469
470   /* draw: */
471   bool (*draw_vbo)(struct fd_context *ctx, const struct pipe_draw_info *info,
472			unsigned drawid_offset,
473                    const struct pipe_draw_indirect_info *indirect,
474			const struct pipe_draw_start_count_bias *draw,
475                    unsigned index_offset) dt;
476   bool (*clear)(struct fd_context *ctx, unsigned buffers,
477                 const union pipe_color_union *color, double depth,
478                 unsigned stencil) dt;
479
480   /* compute: */
481   void (*launch_grid)(struct fd_context *ctx,
482                       const struct pipe_grid_info *info) dt;
483
484   /* query: */
485   struct fd_query *(*create_query)(struct fd_context *ctx, unsigned query_type,
486                                    unsigned index);
487   void (*query_prepare)(struct fd_batch *batch, uint32_t num_tiles) dt;
488   void (*query_prepare_tile)(struct fd_batch *batch, uint32_t n,
489                              struct fd_ringbuffer *ring) dt;
490   void (*query_update_batch)(struct fd_batch *batch, bool disable_all) dt;
491
492   /* blitter: */
493   bool (*blit)(struct fd_context *ctx, const struct pipe_blit_info *info) dt;
494   void (*clear_ubwc)(struct fd_batch *batch, struct fd_resource *rsc) dt;
495
496   /* uncompress resource, if necessary, to use as the specified format: */
497   void (*validate_format)(struct fd_context *ctx, struct fd_resource *rsc,
498                           enum pipe_format format) dt;
499
500   /* handling for barriers: */
501   void (*framebuffer_barrier)(struct fd_context *ctx) dt;
502
503   /* logger: */
504   void (*record_timestamp)(struct fd_ringbuffer *ring, struct fd_bo *bo,
505                            unsigned offset);
506   uint64_t (*ts_to_ns)(uint64_t ts);
507
508   /*
509    * Common pre-cooked VBO state (used for a3xx and later):
510    */
511
512   /* for clear/gmem->mem vertices, and mem->gmem */
513   struct pipe_resource *solid_vbuf;
514
515   /* for mem->gmem tex coords: */
516   struct pipe_resource *blit_texcoord_vbuf;
517
518   /* vertex state for solid_vbuf:
519    *    - solid_vbuf / 12 / R32G32B32_FLOAT
520    */
521   struct fd_vertex_state solid_vbuf_state;
522
523   /* vertex state for blit_prog:
524    *    - blit_texcoord_vbuf / 8 / R32G32_FLOAT
525    *    - solid_vbuf / 12 / R32G32B32_FLOAT
526    */
527   struct fd_vertex_state blit_vbuf_state;
528
529   /*
530    * Info about state of previous draw, for state that comes from
531    * pipe_draw_info (ie. not part of a CSO).  This allows us to
532    * skip some register emit when the state doesn't change from
533    * draw-to-draw
534    */
535   struct {
536      bool dirty; /* last draw state unknown */
537      bool primitive_restart;
538      uint32_t index_start;
539      uint32_t instance_start;
540      uint32_t restart_index;
541      uint32_t streamout_mask;
542
543      /* some state changes require a different shader variant.  Keep
544       * track of this so we know when we need to re-emit shader state
545       * due to variant change.  See ir3_fixup_shader_state()
546       *
547       * (used for a3xx+, NULL otherwise)
548       */
549      struct ir3_shader_key *key;
550
551   } last dt;
552};
553
554static inline struct fd_context *
555fd_context(struct pipe_context *pctx)
556{
557   return (struct fd_context *)pctx;
558}
559
560static inline struct fd_stream_output_target *
561fd_stream_output_target(struct pipe_stream_output_target *target)
562{
563   return (struct fd_stream_output_target *)target;
564}
565
566/**
567 * Does the dirty state require resource tracking, ie. in general
568 * does it reference some resource.  There are some special cases:
569 *
570 * - FD_DIRTY_CONST can reference a resource, but cb0 is handled
571 *   specially as if it is not a user-buffer, we expect it to be
572 *   coming from const_uploader, so we can make some assumptions
573 *   that future transfer_map will be UNSYNCRONIZED
574 * - FD_DIRTY_ZSA controls how the framebuffer is accessed
575 * - FD_DIRTY_BLEND needs to update GMEM reason
576 *
577 * TODO if we can make assumptions that framebuffer state is bound
578 * first, before blend/zsa/etc state we can move some of the ZSA/
579 * BLEND state handling from draw time to bind time.  I think this
580 * is true of mesa/st, perhaps we can just document it to be a
581 * frontend requirement?
582 */
583static inline bool
584fd_context_dirty_resource(enum fd_dirty_3d_state dirty)
585{
586   return dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_ZSA | FD_DIRTY_BLEND |
587                   FD_DIRTY_SSBO | FD_DIRTY_IMAGE | FD_DIRTY_VTXBUF |
588                   FD_DIRTY_TEX | FD_DIRTY_STREAMOUT);
589}
590
591#ifdef __cplusplus
592#define or_dirty(d, mask)                                                      \
593   do {                                                                        \
594      decltype(mask) _d = (d);                                                 \
595      d = (decltype(mask))(_d | (mask));                                       \
596   } while (0)
597#else
598#define or_dirty(d, mask)                                                      \
599   do {                                                                        \
600      d |= (mask);                                                             \
601   } while (0)
602#endif
603
604/* Mark specified non-shader-stage related state as dirty: */
605static inline void
606fd_context_dirty(struct fd_context *ctx, enum fd_dirty_3d_state dirty) assert_dt
607{
608   assert(util_is_power_of_two_nonzero(dirty));
609   STATIC_ASSERT(ffs(dirty) <= ARRAY_SIZE(ctx->gen_dirty_map));
610
611   ctx->gen_dirty |= ctx->gen_dirty_map[ffs(dirty) - 1];
612
613   if (fd_context_dirty_resource(dirty))
614      or_dirty(dirty, FD_DIRTY_RESOURCE);
615
616   or_dirty(ctx->dirty, dirty);
617}
618
619static inline void
620fd_context_dirty_shader(struct fd_context *ctx, enum pipe_shader_type shader,
621                        enum fd_dirty_shader_state dirty) assert_dt
622{
623   const enum fd_dirty_3d_state map[] = {
624      FD_DIRTY_PROG, FD_DIRTY_CONST, FD_DIRTY_TEX,
625      FD_DIRTY_SSBO, FD_DIRTY_IMAGE,
626   };
627
628   /* Need to update the table above if these shift: */
629   STATIC_ASSERT(FD_DIRTY_SHADER_PROG == BIT(0));
630   STATIC_ASSERT(FD_DIRTY_SHADER_CONST == BIT(1));
631   STATIC_ASSERT(FD_DIRTY_SHADER_TEX == BIT(2));
632   STATIC_ASSERT(FD_DIRTY_SHADER_SSBO == BIT(3));
633   STATIC_ASSERT(FD_DIRTY_SHADER_IMAGE == BIT(4));
634
635   assert(util_is_power_of_two_nonzero(dirty));
636   assert(ffs(dirty) <= ARRAY_SIZE(map));
637
638   ctx->gen_dirty |= ctx->gen_dirty_shader_map[shader][ffs(dirty) - 1];
639
640   or_dirty(ctx->dirty_shader[shader], dirty);
641   fd_context_dirty(ctx, map[ffs(dirty) - 1]);
642}
643
644/* mark all state dirty: */
645static inline void
646fd_context_all_dirty(struct fd_context *ctx) assert_dt
647{
648   ctx->last.dirty = true;
649   ctx->dirty = (enum fd_dirty_3d_state) ~0;
650
651   /* NOTE: don't use ~0 for gen_dirty, because the gen specific
652    * emit code will loop over all the bits:
653    */
654   ctx->gen_dirty = ctx->gen_all_dirty;
655
656   for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++)
657      ctx->dirty_shader[i] = (enum fd_dirty_shader_state) ~0;
658}
659
660static inline void
661fd_context_all_clean(struct fd_context *ctx) assert_dt
662{
663   ctx->last.dirty = false;
664   ctx->dirty = (enum fd_dirty_3d_state)0;
665   ctx->gen_dirty = 0;
666   for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) {
667      /* don't mark compute state as clean, since it is not emitted
668       * during normal draw call.  The places that call _all_dirty(),
669       * it is safe to mark compute state dirty as well, but the
670       * inverse is not true.
671       */
672      if (i == PIPE_SHADER_COMPUTE)
673         continue;
674      ctx->dirty_shader[i] = (enum fd_dirty_shader_state)0;
675   }
676}
677
678/**
679 * Add mapping between global dirty bit and generation specific dirty
680 * bit.
681 */
682static inline void
683fd_context_add_map(struct fd_context *ctx, enum fd_dirty_3d_state dirty,
684                   uint32_t gen_dirty)
685{
686   u_foreach_bit (b, dirty) {
687      ctx->gen_dirty_map[b] |= gen_dirty;
688   }
689   ctx->gen_all_dirty |= gen_dirty;
690}
691
692/**
693 * Add mapping between shader stage specific dirty bit and generation
694 * specific dirty bit
695 */
696static inline void
697fd_context_add_shader_map(struct fd_context *ctx, enum pipe_shader_type shader,
698                          enum fd_dirty_shader_state dirty, uint32_t gen_dirty)
699{
700   u_foreach_bit (b, dirty) {
701      ctx->gen_dirty_shader_map[shader][b] |= gen_dirty;
702   }
703   ctx->gen_all_dirty |= gen_dirty;
704}
705
706static inline struct pipe_scissor_state *
707fd_context_get_scissor(struct fd_context *ctx) assert_dt
708{
709   return ctx->current_scissor;
710}
711
712void fd_context_switch_from(struct fd_context *ctx) assert_dt;
713void fd_context_switch_to(struct fd_context *ctx,
714                          struct fd_batch *batch) assert_dt;
715struct fd_batch *fd_context_batch(struct fd_context *ctx) assert_dt;
716struct fd_batch *fd_context_batch_locked(struct fd_context *ctx) assert_dt;
717
718void fd_context_setup_common_vbos(struct fd_context *ctx);
719void fd_context_cleanup_common_vbos(struct fd_context *ctx);
720void fd_emit_string(struct fd_ringbuffer *ring, const char *string, int len);
721void fd_emit_string5(struct fd_ringbuffer *ring, const char *string, int len);
722
723struct pipe_context *fd_context_init(struct fd_context *ctx,
724                                     struct pipe_screen *pscreen,
725                                     void *priv, unsigned flags);
726struct pipe_context *fd_context_init_tc(struct pipe_context *pctx,
727                                        unsigned flags);
728
729void fd_context_destroy(struct pipe_context *pctx) assert_dt;
730
731#ifdef __cplusplus
732}
733#endif
734
735#endif /* FREEDRENO_CONTEXT_H_ */
736