1/*
2 Copyright 2003 VMware, Inc.
3 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
4 Intel funded Tungsten Graphics to
5 develop this 3D driver.
6
7 Permission is hereby granted, free of charge, to any person obtaining
8 a copy of this software and associated documentation files (the
9 "Software"), to deal in the Software without restriction, including
10 without limitation the rights to use, copy, modify, merge, publish,
11 distribute, sublicense, and/or sell copies of the Software, and to
12 permit persons to whom the Software is furnished to do so, subject to
13 the following conditions:
14
15 The above copyright notice and this permission notice (including the
16 next paragraph) shall be included in all copies or substantial
17 portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 **********************************************************************/
28 /*
29  * Authors:
30  *   Keith Whitwell <keithw@vmware.com>
31  */
32
33
34#include "compiler/nir/nir.h"
35#include "main/api_exec.h"
36#include "main/context.h"
37#include "main/fbobject.h"
38#include "main/extensions.h"
39#include "main/glthread.h"
40#include "main/imports.h"
41#include "main/macros.h"
42#include "main/points.h"
43#include "main/version.h"
44#include "main/vtxfmt.h"
45#include "main/texobj.h"
46#include "main/framebuffer.h"
47#include "main/stencil.h"
48#include "main/state.h"
49
50#include "vbo/vbo.h"
51
52#include "drivers/common/driverfuncs.h"
53#include "drivers/common/meta.h"
54#include "utils.h"
55
56#include "brw_context.h"
57#include "brw_defines.h"
58#include "brw_blorp.h"
59#include "brw_draw.h"
60#include "brw_state.h"
61
62#include "intel_batchbuffer.h"
63#include "intel_buffer_objects.h"
64#include "intel_buffers.h"
65#include "intel_fbo.h"
66#include "intel_mipmap_tree.h"
67#include "intel_pixel.h"
68#include "intel_image.h"
69#include "intel_tex.h"
70#include "intel_tex_obj.h"
71
72#include "swrast_setup/swrast_setup.h"
73#include "tnl/tnl.h"
74#include "tnl/t_pipeline.h"
75#include "util/ralloc.h"
76#include "util/debug.h"
77#include "util/disk_cache.h"
78#include "isl/isl.h"
79
80#include "common/gen_defines.h"
81
82#include "compiler/spirv/nir_spirv.h"
83/***************************************
84 * Mesa's Driver Functions
85 ***************************************/
86
87const char *const brw_vendor_string = "Intel Open Source Technology Center";
88
89static const char *
90get_bsw_model(const struct intel_screen *screen)
91{
92   switch (screen->eu_total) {
93   case 16:
94      return "405";
95   case 12:
96      return "400";
97   default:
98      return "   ";
99   }
100}
101
102const char *
103brw_get_renderer_string(const struct intel_screen *screen)
104{
105   const char *chipset;
106   static char buffer[128];
107   char *bsw = NULL;
108
109   switch (screen->deviceID) {
110#undef CHIPSET
111#define CHIPSET(id, symbol, str) case id: chipset = str; break;
112#include "pci_ids/i965_pci_ids.h"
113   default:
114      chipset = "Unknown Intel Chipset";
115      break;
116   }
117
118   /* Braswell branding is funny, so we have to fix it up here */
119   if (screen->deviceID == 0x22B1) {
120      bsw = strdup(chipset);
121      char *needle = strstr(bsw, "XXX");
122      if (needle) {
123         memcpy(needle, get_bsw_model(screen), 3);
124         chipset = bsw;
125      }
126   }
127
128   (void) driGetRendererString(buffer, chipset, 0);
129   free(bsw);
130   return buffer;
131}
132
133static const GLubyte *
134intel_get_string(struct gl_context * ctx, GLenum name)
135{
136   const struct brw_context *const brw = brw_context(ctx);
137
138   switch (name) {
139   case GL_VENDOR:
140      return (GLubyte *) brw_vendor_string;
141
142   case GL_RENDERER:
143      return
144         (GLubyte *) brw_get_renderer_string(brw->screen);
145
146   default:
147      return NULL;
148   }
149}
150
151static void
152brw_set_background_context(struct gl_context *ctx,
153                           struct util_queue_monitoring *queue_info)
154{
155   struct brw_context *brw = brw_context(ctx);
156   __DRIcontext *driContext = brw->driContext;
157   __DRIscreen *driScreen = driContext->driScreenPriv;
158   const __DRIbackgroundCallableExtension *backgroundCallable =
159      driScreen->dri2.backgroundCallable;
160
161   /* Note: Mesa will only call this function if we've called
162    * _mesa_enable_multithreading().  We only do that if the loader exposed
163    * the __DRI_BACKGROUND_CALLABLE extension.  So we know that
164    * backgroundCallable is not NULL.
165    */
166   backgroundCallable->setBackgroundContext(driContext->loaderPrivate);
167}
168
169static void
170intel_viewport(struct gl_context *ctx)
171{
172   struct brw_context *brw = brw_context(ctx);
173   __DRIcontext *driContext = brw->driContext;
174
175   if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
176      if (driContext->driDrawablePriv)
177         dri2InvalidateDrawable(driContext->driDrawablePriv);
178      if (driContext->driReadablePriv)
179         dri2InvalidateDrawable(driContext->driReadablePriv);
180   }
181}
182
183static void
184intel_update_framebuffer(struct gl_context *ctx,
185                         struct gl_framebuffer *fb)
186{
187   struct brw_context *brw = brw_context(ctx);
188
189   /* Quantize the derived default number of samples
190    */
191   fb->DefaultGeometry._NumSamples =
192      intel_quantize_num_samples(brw->screen,
193                                 fb->DefaultGeometry.NumSamples);
194}
195
196static void
197intel_update_state(struct gl_context * ctx)
198{
199   GLuint new_state = ctx->NewState;
200   struct brw_context *brw = brw_context(ctx);
201
202   if (ctx->swrast_context)
203      _swrast_InvalidateState(ctx, new_state);
204
205   brw->NewGLState |= new_state;
206
207   if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT))
208      _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer);
209
210   if (new_state & (_NEW_STENCIL | _NEW_BUFFERS)) {
211      brw->stencil_enabled = _mesa_stencil_is_enabled(ctx);
212      brw->stencil_two_sided = _mesa_stencil_is_two_sided(ctx);
213      brw->stencil_write_enabled =
214         _mesa_stencil_is_write_enabled(ctx, brw->stencil_two_sided);
215   }
216
217   if (new_state & _NEW_POLYGON)
218      brw->polygon_front_bit = _mesa_polygon_get_front_bit(ctx);
219
220   if (new_state & _NEW_BUFFERS) {
221      intel_update_framebuffer(ctx, ctx->DrawBuffer);
222      if (ctx->DrawBuffer != ctx->ReadBuffer)
223         intel_update_framebuffer(ctx, ctx->ReadBuffer);
224   }
225}
226
227#define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
228
229static void
230intel_flush_front(struct gl_context *ctx)
231{
232   struct brw_context *brw = brw_context(ctx);
233   __DRIcontext *driContext = brw->driContext;
234   __DRIdrawable *driDrawable = driContext->driDrawablePriv;
235   __DRIscreen *const dri_screen = brw->screen->driScrnPriv;
236
237   if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
238      if (flushFront(dri_screen) && driDrawable &&
239          driDrawable->loaderPrivate) {
240
241         /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
242          *
243          * This potentially resolves both front and back buffer. It
244          * is unnecessary to resolve the back, but harms nothing except
245          * performance. And no one cares about front-buffer render
246          * performance.
247          */
248         intel_resolve_for_dri2_flush(brw, driDrawable);
249         intel_batchbuffer_flush(brw);
250
251         flushFront(dri_screen)(driDrawable, driDrawable->loaderPrivate);
252
253         /* We set the dirty bit in intel_prepare_render() if we're
254          * front buffer rendering once we get there.
255          */
256         brw->front_buffer_dirty = false;
257      }
258   }
259}
260
261static void
262brw_display_shared_buffer(struct brw_context *brw)
263{
264   __DRIcontext *dri_context = brw->driContext;
265   __DRIdrawable *dri_drawable = dri_context->driDrawablePriv;
266   __DRIscreen *dri_screen = brw->screen->driScrnPriv;
267   int fence_fd = -1;
268
269   if (!brw->is_shared_buffer_bound)
270      return;
271
272   if (!brw->is_shared_buffer_dirty)
273      return;
274
275   if (brw->screen->has_exec_fence) {
276      /* This function is always called during a flush operation, so there is
277       * no need to flush again here. But we want to provide a fence_fd to the
278       * loader, and a redundant flush is the easiest way to acquire one.
279       */
280      if (intel_batchbuffer_flush_fence(brw, -1, &fence_fd))
281         return;
282   }
283
284   dri_screen->mutableRenderBuffer.loader
285      ->displaySharedBuffer(dri_drawable, fence_fd,
286                            dri_drawable->loaderPrivate);
287   brw->is_shared_buffer_dirty = false;
288}
289
290static void
291intel_glFlush(struct gl_context *ctx)
292{
293   struct brw_context *brw = brw_context(ctx);
294
295   intel_batchbuffer_flush(brw);
296   intel_flush_front(ctx);
297   brw_display_shared_buffer(brw);
298   brw->need_flush_throttle = true;
299}
300
301static void
302intel_finish(struct gl_context * ctx)
303{
304   struct brw_context *brw = brw_context(ctx);
305
306   intel_glFlush(ctx);
307
308   if (brw->batch.last_bo)
309      brw_bo_wait_rendering(brw->batch.last_bo);
310}
311
312static void
313brw_init_driver_functions(struct brw_context *brw,
314                          struct dd_function_table *functions)
315{
316   const struct gen_device_info *devinfo = &brw->screen->devinfo;
317
318   _mesa_init_driver_functions(functions);
319
320   /* GLX uses DRI2 invalidate events to handle window resizing.
321    * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
322    * which doesn't provide a mechanism for snooping the event queues.
323    *
324    * So EGL still relies on viewport hacks to handle window resizing.
325    * This should go away with DRI3000.
326    */
327   if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
328      functions->Viewport = intel_viewport;
329
330   functions->Flush = intel_glFlush;
331   functions->Finish = intel_finish;
332   functions->GetString = intel_get_string;
333   functions->UpdateState = intel_update_state;
334
335   brw_init_draw_functions(functions);
336   intelInitTextureFuncs(functions);
337   intelInitTextureImageFuncs(functions);
338   intelInitTextureCopyImageFuncs(functions);
339   intelInitCopyImageFuncs(functions);
340   intelInitClearFuncs(functions);
341   intelInitBufferFuncs(functions);
342   intelInitPixelFuncs(functions);
343   intelInitBufferObjectFuncs(functions);
344   brw_init_syncobj_functions(functions);
345   brw_init_object_purgeable_functions(functions);
346
347   brwInitFragProgFuncs( functions );
348   brw_init_common_queryobj_functions(functions);
349   if (devinfo->gen >= 8 || devinfo->is_haswell)
350      hsw_init_queryobj_functions(functions);
351   else if (devinfo->gen >= 6)
352      gen6_init_queryobj_functions(functions);
353   else
354      gen4_init_queryobj_functions(functions);
355   brw_init_compute_functions(functions);
356   brw_init_conditional_render_functions(functions);
357
358   functions->GenerateMipmap = brw_generate_mipmap;
359
360   functions->QueryInternalFormat = brw_query_internal_format;
361
362   functions->NewTransformFeedback = brw_new_transform_feedback;
363   functions->DeleteTransformFeedback = brw_delete_transform_feedback;
364   if (can_do_mi_math_and_lrr(brw->screen)) {
365      functions->BeginTransformFeedback = hsw_begin_transform_feedback;
366      functions->EndTransformFeedback = hsw_end_transform_feedback;
367      functions->PauseTransformFeedback = hsw_pause_transform_feedback;
368      functions->ResumeTransformFeedback = hsw_resume_transform_feedback;
369   } else if (devinfo->gen >= 7) {
370      functions->BeginTransformFeedback = gen7_begin_transform_feedback;
371      functions->EndTransformFeedback = gen7_end_transform_feedback;
372      functions->PauseTransformFeedback = gen7_pause_transform_feedback;
373      functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
374      functions->GetTransformFeedbackVertexCount =
375         brw_get_transform_feedback_vertex_count;
376   } else {
377      functions->BeginTransformFeedback = brw_begin_transform_feedback;
378      functions->EndTransformFeedback = brw_end_transform_feedback;
379      functions->PauseTransformFeedback = brw_pause_transform_feedback;
380      functions->ResumeTransformFeedback = brw_resume_transform_feedback;
381      functions->GetTransformFeedbackVertexCount =
382         brw_get_transform_feedback_vertex_count;
383   }
384
385   if (devinfo->gen >= 6)
386      functions->GetSamplePosition = gen6_get_sample_position;
387
388   /* GL_ARB_get_program_binary */
389   brw_program_binary_init(brw->screen->deviceID);
390   functions->GetProgramBinaryDriverSHA1 = brw_get_program_binary_driver_sha1;
391   functions->ProgramBinarySerializeDriverBlob = brw_serialize_program_binary;
392   functions->ProgramBinaryDeserializeDriverBlob =
393      brw_deserialize_program_binary;
394
395   if (brw->screen->disk_cache) {
396      functions->ShaderCacheSerializeDriverBlob = brw_program_serialize_nir;
397   }
398
399   functions->SetBackgroundContext = brw_set_background_context;
400}
401
402static void
403brw_initialize_spirv_supported_capabilities(struct brw_context *brw)
404{
405   const struct gen_device_info *devinfo = &brw->screen->devinfo;
406   struct gl_context *ctx = &brw->ctx;
407
408   /* The following SPIR-V capabilities are only supported on gen7+. In theory
409    * you should enable the extension only on gen7+, but just in case let's
410    * assert it.
411    */
412   assert(devinfo->gen >= 7);
413
414   ctx->Const.SpirVCapabilities.atomic_storage = devinfo->gen >= 7;
415   ctx->Const.SpirVCapabilities.draw_parameters = true;
416   ctx->Const.SpirVCapabilities.float64 = devinfo->gen >= 8;
417   ctx->Const.SpirVCapabilities.geometry_streams = devinfo->gen >= 7;
418   ctx->Const.SpirVCapabilities.image_write_without_format = true;
419   ctx->Const.SpirVCapabilities.int64 = devinfo->gen >= 8;
420   ctx->Const.SpirVCapabilities.tessellation = true;
421   ctx->Const.SpirVCapabilities.transform_feedback = devinfo->gen >= 7;
422   ctx->Const.SpirVCapabilities.variable_pointers = true;
423}
424
425static void
426brw_initialize_context_constants(struct brw_context *brw)
427{
428   const struct gen_device_info *devinfo = &brw->screen->devinfo;
429   struct gl_context *ctx = &brw->ctx;
430   const struct brw_compiler *compiler = brw->screen->compiler;
431
432   const bool stage_exists[MESA_SHADER_STAGES] = {
433      [MESA_SHADER_VERTEX] = true,
434      [MESA_SHADER_TESS_CTRL] = devinfo->gen >= 7,
435      [MESA_SHADER_TESS_EVAL] = devinfo->gen >= 7,
436      [MESA_SHADER_GEOMETRY] = devinfo->gen >= 6,
437      [MESA_SHADER_FRAGMENT] = true,
438      [MESA_SHADER_COMPUTE] =
439         (_mesa_is_desktop_gl(ctx) &&
440          ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
441         (ctx->API == API_OPENGLES2 &&
442          ctx->Const.MaxComputeWorkGroupSize[0] >= 128),
443   };
444
445   unsigned num_stages = 0;
446   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
447      if (stage_exists[i])
448         num_stages++;
449   }
450
451   unsigned max_samplers =
452      devinfo->gen >= 8 || devinfo->is_haswell ? BRW_MAX_TEX_UNIT : 16;
453
454   ctx->Const.MaxDualSourceDrawBuffers = 1;
455   ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
456   ctx->Const.MaxCombinedShaderOutputResources =
457      MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
458
459   /* The timestamp register we can read for glGetTimestamp() is
460    * sometimes only 32 bits, before scaling to nanoseconds (depending
461    * on kernel).
462    *
463    * Once scaled to nanoseconds the timestamp would roll over at a
464    * non-power-of-two, so an application couldn't use
465    * GL_QUERY_COUNTER_BITS to handle rollover correctly.  Instead, we
466    * report 36 bits and truncate at that (rolling over 5 times as
467    * often as the HW counter), and when the 32-bit counter rolls
468    * over, it happens to also be at a rollover in the reported value
469    * from near (1<<36) to 0.
470    *
471    * The low 32 bits rolls over in ~343 seconds.  Our 36-bit result
472    * rolls over every ~69 seconds.
473    */
474   ctx->Const.QueryCounterBits.Timestamp = 36;
475
476   ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
477   ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
478   if (devinfo->gen >= 7) {
479      ctx->Const.MaxRenderbufferSize = 16384;
480      ctx->Const.MaxTextureLevels = MIN2(15 /* 16384 */, MAX_TEXTURE_LEVELS);
481      ctx->Const.MaxCubeTextureLevels = 15; /* 16384 */
482   } else {
483      ctx->Const.MaxRenderbufferSize = 8192;
484      ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
485      ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
486   }
487   ctx->Const.Max3DTextureLevels = 12; /* 2048 */
488   ctx->Const.MaxArrayTextureLayers = devinfo->gen >= 7 ? 2048 : 512;
489   ctx->Const.MaxTextureMbytes = 1536;
490   ctx->Const.MaxTextureRectSize = devinfo->gen >= 7 ? 16384 : 8192;
491   ctx->Const.MaxTextureMaxAnisotropy = 16.0;
492   ctx->Const.MaxTextureLodBias = 15.0;
493   ctx->Const.StripTextureBorder = true;
494   if (devinfo->gen >= 7) {
495      ctx->Const.MaxProgramTextureGatherComponents = 4;
496      ctx->Const.MinProgramTextureGatherOffset = -32;
497      ctx->Const.MaxProgramTextureGatherOffset = 31;
498   } else if (devinfo->gen == 6) {
499      ctx->Const.MaxProgramTextureGatherComponents = 1;
500      ctx->Const.MinProgramTextureGatherOffset = -8;
501      ctx->Const.MaxProgramTextureGatherOffset = 7;
502   }
503
504   ctx->Const.MaxUniformBlockSize = 65536;
505
506   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
507      struct gl_program_constants *prog = &ctx->Const.Program[i];
508
509      if (!stage_exists[i])
510         continue;
511
512      prog->MaxTextureImageUnits = max_samplers;
513
514      prog->MaxUniformBlocks = BRW_MAX_UBO;
515      prog->MaxCombinedUniformComponents =
516         prog->MaxUniformComponents +
517         ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
518
519      prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
520      prog->MaxAtomicBuffers = BRW_MAX_ABO;
521      prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
522      prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
523   }
524
525   ctx->Const.MaxTextureUnits =
526      MIN2(ctx->Const.MaxTextureCoordUnits,
527           ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
528
529   ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
530   ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
531   ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
532   ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
533   ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
534   ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
535   ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
536
537
538   /* Hardware only supports a limited number of transform feedback buffers.
539    * So we need to override the Mesa default (which is based only on software
540    * limits).
541    */
542   ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
543
544   /* On Gen6, in the worst case, we use up one binding table entry per
545    * transform feedback component (see comments above the definition of
546    * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
547    * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
548    * BRW_MAX_SOL_BINDINGS.
549    *
550    * In "separate components" mode, we need to divide this value by
551    * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
552    * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
553    */
554   ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
555   ctx->Const.MaxTransformFeedbackSeparateComponents =
556      BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
557
558   ctx->Const.AlwaysUseGetTransformFeedbackVertexCount =
559      !can_do_mi_math_and_lrr(brw->screen);
560
561   int max_samples;
562   const int *msaa_modes = intel_supported_msaa_modes(brw->screen);
563   const int clamp_max_samples =
564      driQueryOptioni(&brw->optionCache, "clamp_max_samples");
565
566   if (clamp_max_samples < 0) {
567      max_samples = msaa_modes[0];
568   } else {
569      /* Select the largest supported MSAA mode that does not exceed
570       * clamp_max_samples.
571       */
572      max_samples = 0;
573      for (int i = 0; msaa_modes[i] != 0; ++i) {
574         if (msaa_modes[i] <= clamp_max_samples) {
575            max_samples = msaa_modes[i];
576            break;
577         }
578      }
579   }
580
581   ctx->Const.MaxSamples = max_samples;
582   ctx->Const.MaxColorTextureSamples = max_samples;
583   ctx->Const.MaxDepthTextureSamples = max_samples;
584   ctx->Const.MaxIntegerSamples = max_samples;
585   ctx->Const.MaxImageSamples = 0;
586
587   /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
588    * to map indices of rectangular grid to sample numbers within a pixel.
589    * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
590    * extension implementation. For more details see the comment above
591    * gen6_set_sample_maps() definition.
592    */
593   gen6_set_sample_maps(ctx);
594
595   ctx->Const.MinLineWidth = 1.0;
596   ctx->Const.MinLineWidthAA = 1.0;
597   if (devinfo->gen >= 6) {
598      ctx->Const.MaxLineWidth = 7.375;
599      ctx->Const.MaxLineWidthAA = 7.375;
600      ctx->Const.LineWidthGranularity = 0.125;
601   } else {
602      ctx->Const.MaxLineWidth = 7.0;
603      ctx->Const.MaxLineWidthAA = 7.0;
604      ctx->Const.LineWidthGranularity = 0.5;
605   }
606
607   /* For non-antialiased lines, we have to round the line width to the
608    * nearest whole number. Make sure that we don't advertise a line
609    * width that, when rounded, will be beyond the actual hardware
610    * maximum.
611    */
612   assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
613
614   ctx->Const.MinPointSize = 1.0;
615   ctx->Const.MinPointSizeAA = 1.0;
616   ctx->Const.MaxPointSize = 255.0;
617   ctx->Const.MaxPointSizeAA = 255.0;
618   ctx->Const.PointSizeGranularity = 1.0;
619
620   if (devinfo->gen >= 5 || devinfo->is_g4x)
621      ctx->Const.MaxClipPlanes = 8;
622
623   ctx->Const.GLSLTessLevelsAsInputs = true;
624   ctx->Const.PrimitiveRestartForPatches = true;
625
626   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
627   ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
628   ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
629   ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
630   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
631   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
632   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
633   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
634   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
635   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
636   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
637   ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
638      MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
639	   ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
640
641   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
642   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
643   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
644   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
645   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
646   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
647   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
648   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
649   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
650      MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
651	   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
652
653   /* Fragment shaders use real, 32-bit twos-complement integers for all
654    * integer types.
655    */
656   ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
657   ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
658   ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
659   ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
660   ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
661
662   ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
663   ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
664   ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
665   ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
666   ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
667
668   /* Gen6 converts quads to polygon in beginning of 3D pipeline,
669    * but we're not sure how it's actually done for vertex order,
670    * that affect provoking vertex decision. Always use last vertex
671    * convention for quad primitive which works as expected for now.
672    */
673   if (devinfo->gen >= 6)
674      ctx->Const.QuadsFollowProvokingVertexConvention = false;
675
676   ctx->Const.NativeIntegers = true;
677
678   /* Regarding the CMP instruction, the Ivybridge PRM says:
679    *
680    *   "For each enabled channel 0b or 1b is assigned to the appropriate flag
681    *    bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
682    *    0xFFFFFFFF) is assigned to dst."
683    *
684    * but PRMs for earlier generations say
685    *
686    *   "In dword format, one GRF may store up to 8 results. When the register
687    *    is used later as a vector of Booleans, as only LSB at each channel
688    *    contains meaning [sic] data, software should make sure all higher bits
689    *    are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
690    *
691    * We select the representation of a true boolean uniform to be ~0, and fix
692    * the results of Gen <= 5 CMP instruction's with -(result & 1).
693    */
694   ctx->Const.UniformBooleanTrue = ~0;
695
696   /* From the gen4 PRM, volume 4 page 127:
697    *
698    *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
699    *      the base address of the first element of the surface, computed in
700    *      software by adding the surface base address to the byte offset of
701    *      the element in the buffer."
702    *
703    * However, unaligned accesses are slower, so enforce buffer alignment.
704    *
705    * In order to push UBO data, 3DSTATE_CONSTANT_XS imposes an additional
706    * restriction: the start of the buffer needs to be 32B aligned.
707    */
708   ctx->Const.UniformBufferOffsetAlignment = 32;
709
710   /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
711    * that we can safely have the CPU and GPU writing the same SSBO on
712    * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
713    * writes, so there's no problem. For an SSBO, the GPU and the CPU can
714    * be updating disjoint regions of the buffer simultaneously and that will
715    * break if the regions overlap the same cacheline.
716    */
717   ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
718   ctx->Const.TextureBufferOffsetAlignment = 16;
719   ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
720
721   if (devinfo->gen >= 6) {
722      ctx->Const.MaxVarying = 32;
723      ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
724      ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents =
725         compiler->scalar_stage[MESA_SHADER_GEOMETRY] ? 128 : 64;
726      ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
727      ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
728      ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128;
729      ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128;
730      ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128;
731      ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128;
732   }
733
734   /* We want the GLSL compiler to emit code that uses condition codes */
735   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
736      ctx->Const.ShaderCompilerOptions[i] =
737         brw->screen->compiler->glsl_compiler_options[i];
738   }
739
740   if (devinfo->gen >= 7) {
741      ctx->Const.MaxViewportWidth = 32768;
742      ctx->Const.MaxViewportHeight = 32768;
743   }
744
745   /* ARB_viewport_array, OES_viewport_array */
746   if (devinfo->gen >= 6) {
747      ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
748      ctx->Const.ViewportSubpixelBits = 8;
749
750      /* Cast to float before negating because MaxViewportWidth is unsigned.
751       */
752      ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
753      ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
754   }
755
756   /* ARB_gpu_shader5 */
757   if (devinfo->gen >= 7)
758      ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
759
760   /* ARB_framebuffer_no_attachments */
761   ctx->Const.MaxFramebufferWidth = 16384;
762   ctx->Const.MaxFramebufferHeight = 16384;
763   ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
764   ctx->Const.MaxFramebufferSamples = max_samples;
765
766   /* OES_primitive_bounding_box */
767   ctx->Const.NoPrimitiveBoundingBoxOutput = true;
768
769   /* TODO: We should be able to use STD430 packing by default on all hardware
770    * but some piglit tests [1] currently fail on SNB when this is enabled.
771    * The problem is the messages we're using for doing uniform pulls
772    * in the vec4 back-end on SNB is the OWORD block load instruction, which
773    * takes its offset in units of OWORDS (16 bytes).  On IVB+, we use the
774    * sampler which doesn't have these restrictions.
775    *
776    * In the scalar back-end, we use the sampler for dynamic uniform loads and
777    * pull an entire cache line at a time for constant offset loads both of
778    * which support almost any alignment.
779    *
780    * [1] glsl-1.40/uniform_buffer/vs-float-array-variable-index.shader_test
781    */
782   if (devinfo->gen >= 7)
783      ctx->Const.UseSTD430AsDefaultPacking = true;
784
785   if (!(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT))
786      ctx->Const.AllowMappedBuffersDuringExecution = true;
787
788   /* GL_ARB_get_program_binary */
789   ctx->Const.NumProgramBinaryFormats = 1;
790}
791
792static void
793brw_initialize_cs_context_constants(struct brw_context *brw)
794{
795   struct gl_context *ctx = &brw->ctx;
796   const struct intel_screen *screen = brw->screen;
797   struct gen_device_info *devinfo = &brw->screen->devinfo;
798
799   /* FINISHME: Do this for all platforms that the kernel supports */
800   if (devinfo->is_cherryview &&
801       screen->subslice_total > 0 && screen->eu_total > 0) {
802      /* Logical CS threads = EUs per subslice * 7 threads per EU */
803      uint32_t max_cs_threads = screen->eu_total / screen->subslice_total * 7;
804
805      /* Fuse configurations may give more threads than expected, never less. */
806      if (max_cs_threads > devinfo->max_cs_threads)
807         devinfo->max_cs_threads = max_cs_threads;
808   }
809
810   /* Maximum number of scalar compute shader invocations that can be run in
811    * parallel in the same subslice assuming SIMD32 dispatch.
812    *
813    * We don't advertise more than 64 threads, because we are limited to 64 by
814    * our usage of thread_width_max in the gpgpu walker command. This only
815    * currently impacts Haswell, which otherwise might be able to advertise 70
816    * threads. With SIMD32 and 64 threads, Haswell still provides twice the
817    * required the number of invocation needed for ARB_compute_shader.
818    */
819   const unsigned max_threads = MIN2(64, devinfo->max_cs_threads);
820   const uint32_t max_invocations = 32 * max_threads;
821   ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
822   ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
823   ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
824   ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
825   ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
826}
827
828/**
829 * Process driconf (drirc) options, setting appropriate context flags.
830 *
831 * intelInitExtensions still pokes at optionCache directly, in order to
832 * avoid advertising various extensions.  No flags are set, so it makes
833 * sense to continue doing that there.
834 */
835static void
836brw_process_driconf_options(struct brw_context *brw)
837{
838   const struct gen_device_info *devinfo = &brw->screen->devinfo;
839   struct gl_context *ctx = &brw->ctx;
840
841   driOptionCache *options = &brw->optionCache;
842   driParseConfigFiles(options, &brw->screen->optionCache,
843                       brw->driContext->driScreenPriv->myNum,
844                       "i965", NULL);
845
846   int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
847   switch (bo_reuse_mode) {
848   case DRI_CONF_BO_REUSE_DISABLED:
849      break;
850   case DRI_CONF_BO_REUSE_ALL:
851      brw_bufmgr_enable_reuse(brw->bufmgr);
852      break;
853   }
854
855   if (INTEL_DEBUG & DEBUG_NO_HIZ) {
856       brw->has_hiz = false;
857       /* On gen6, you can only do separate stencil with HIZ. */
858       if (devinfo->gen == 6)
859          brw->has_separate_stencil = false;
860   }
861
862   if (driQueryOptionb(options, "mesa_no_error"))
863      ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR;
864
865   if (driQueryOptionb(options, "always_flush_batch")) {
866      fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
867      brw->always_flush_batch = true;
868   }
869
870   if (driQueryOptionb(options, "always_flush_cache")) {
871      fprintf(stderr, "flushing GPU caches before/after each draw call\n");
872      brw->always_flush_cache = true;
873   }
874
875   if (driQueryOptionb(options, "disable_throttling")) {
876      fprintf(stderr, "disabling flush throttling\n");
877      brw->disable_throttling = true;
878   }
879
880   brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
881
882   if (driQueryOptionb(&brw->optionCache, "precise_trig"))
883      brw->screen->compiler->precise_trig = true;
884
885   ctx->Const.ForceGLSLExtensionsWarn =
886      driQueryOptionb(options, "force_glsl_extensions_warn");
887
888   ctx->Const.ForceGLSLVersion =
889      driQueryOptioni(options, "force_glsl_version");
890
891   ctx->Const.DisableGLSLLineContinuations =
892      driQueryOptionb(options, "disable_glsl_line_continuations");
893
894   ctx->Const.AllowGLSLExtensionDirectiveMidShader =
895      driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
896
897   ctx->Const.AllowGLSLBuiltinVariableRedeclaration =
898      driQueryOptionb(options, "allow_glsl_builtin_variable_redeclaration");
899
900   ctx->Const.AllowHigherCompatVersion =
901      driQueryOptionb(options, "allow_higher_compat_version");
902
903   ctx->Const.ForceGLSLAbsSqrt =
904      driQueryOptionb(options, "force_glsl_abs_sqrt");
905
906   ctx->Const.GLSLZeroInit = driQueryOptionb(options, "glsl_zero_init");
907
908   brw->dual_color_blend_by_location =
909      driQueryOptionb(options, "dual_color_blend_by_location");
910
911   ctx->Const.AllowGLSLCrossStageInterpolationMismatch =
912      driQueryOptionb(options, "allow_glsl_cross_stage_interpolation_mismatch");
913
914   ctx->Const.dri_config_options_sha1 = ralloc_array(brw, unsigned char, 20);
915   driComputeOptionsSha1(&brw->screen->optionCache,
916                         ctx->Const.dri_config_options_sha1);
917}
918
919GLboolean
920brwCreateContext(gl_api api,
921                 const struct gl_config *mesaVis,
922                 __DRIcontext *driContextPriv,
923                 const struct __DriverContextConfig *ctx_config,
924                 unsigned *dri_ctx_error,
925                 void *sharedContextPrivate)
926{
927   struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
928   struct intel_screen *screen = driContextPriv->driScreenPriv->driverPrivate;
929   const struct gen_device_info *devinfo = &screen->devinfo;
930   struct dd_function_table functions;
931
932   /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
933    * provides us with context reset notifications.
934    */
935   uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG |
936                            __DRI_CTX_FLAG_FORWARD_COMPATIBLE |
937                            __DRI_CTX_FLAG_NO_ERROR;
938
939   if (screen->has_context_reset_notification)
940      allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
941
942   if (ctx_config->flags & ~allowed_flags) {
943      *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
944      return false;
945   }
946
947   if (ctx_config->attribute_mask &
948       ~(__DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY |
949         __DRIVER_CONTEXT_ATTRIB_PRIORITY)) {
950      *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_ATTRIBUTE;
951      return false;
952   }
953
954   bool notify_reset =
955      ((ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY) &&
956       ctx_config->reset_strategy != __DRI_CTX_RESET_NO_NOTIFICATION);
957
958   struct brw_context *brw = rzalloc(NULL, struct brw_context);
959   if (!brw) {
960      fprintf(stderr, "%s: failed to alloc context\n", __func__);
961      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
962      return false;
963   }
964
965   driContextPriv->driverPrivate = brw;
966   brw->driContext = driContextPriv;
967   brw->screen = screen;
968   brw->bufmgr = screen->bufmgr;
969
970   brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
971   brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
972
973   brw->has_swizzling = screen->hw_has_swizzling;
974
975   brw->isl_dev = screen->isl_dev;
976
977   brw->vs.base.stage = MESA_SHADER_VERTEX;
978   brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
979   brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
980   brw->gs.base.stage = MESA_SHADER_GEOMETRY;
981   brw->wm.base.stage = MESA_SHADER_FRAGMENT;
982   brw->cs.base.stage = MESA_SHADER_COMPUTE;
983
984   brw_init_driver_functions(brw, &functions);
985
986   if (notify_reset)
987      functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
988
989   struct gl_context *ctx = &brw->ctx;
990
991   if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
992      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
993      fprintf(stderr, "%s: failed to init mesa context\n", __func__);
994      intelDestroyContext(driContextPriv);
995      return false;
996   }
997
998   driContextSetFlags(ctx, ctx_config->flags);
999
1000   /* Initialize the software rasterizer and helper modules.
1001    *
1002    * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
1003    * software fallbacks (which we have to support on legacy GL to do weird
1004    * glDrawPixels(), glBitmap(), and other functions).
1005    */
1006   if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
1007      _swrast_CreateContext(ctx);
1008   }
1009
1010   _vbo_CreateContext(ctx);
1011   if (ctx->swrast_context) {
1012      _tnl_CreateContext(ctx);
1013      TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
1014      _swsetup_CreateContext(ctx);
1015
1016      /* Configure swrast to match hardware characteristics: */
1017      _swrast_allow_pixel_fog(ctx, false);
1018      _swrast_allow_vertex_fog(ctx, true);
1019   }
1020
1021   _mesa_meta_init(ctx);
1022
1023   brw_process_driconf_options(brw);
1024
1025   if (INTEL_DEBUG & DEBUG_PERF)
1026      brw->perf_debug = true;
1027
1028   brw_initialize_cs_context_constants(brw);
1029   brw_initialize_context_constants(brw);
1030
1031   ctx->Const.ResetStrategy = notify_reset
1032      ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
1033
1034   /* Reinitialize the context point state.  It depends on ctx->Const values. */
1035   _mesa_init_point(ctx);
1036
1037   intel_fbo_init(brw);
1038
1039   intel_batchbuffer_init(brw);
1040
1041   /* Create a new hardware context.  Using a hardware context means that
1042    * our GPU state will be saved/restored on context switch, allowing us
1043    * to assume that the GPU is in the same state we left it in.
1044    *
1045    * This is required for transform feedback buffer offsets, query objects,
1046    * and also allows us to reduce how much state we have to emit.
1047    */
1048   brw->hw_ctx = brw_create_hw_context(brw->bufmgr);
1049   if (!brw->hw_ctx && devinfo->gen >= 6) {
1050      fprintf(stderr, "Failed to create hardware context.\n");
1051      intelDestroyContext(driContextPriv);
1052      return false;
1053   }
1054
1055   if (brw->hw_ctx) {
1056      int hw_priority = GEN_CONTEXT_MEDIUM_PRIORITY;
1057      if (ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_PRIORITY) {
1058         switch (ctx_config->priority) {
1059         case __DRI_CTX_PRIORITY_LOW:
1060            hw_priority = GEN_CONTEXT_LOW_PRIORITY;
1061            break;
1062         case __DRI_CTX_PRIORITY_HIGH:
1063            hw_priority = GEN_CONTEXT_HIGH_PRIORITY;
1064            break;
1065         }
1066      }
1067      if (hw_priority != I915_CONTEXT_DEFAULT_PRIORITY &&
1068          brw_hw_context_set_priority(brw->bufmgr, brw->hw_ctx, hw_priority)) {
1069         fprintf(stderr,
1070		 "Failed to set priority [%d:%d] for hardware context.\n",
1071                 ctx_config->priority, hw_priority);
1072         intelDestroyContext(driContextPriv);
1073         return false;
1074      }
1075   }
1076
1077   if (brw_init_pipe_control(brw, devinfo)) {
1078      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
1079      intelDestroyContext(driContextPriv);
1080      return false;
1081   }
1082
1083   brw_upload_init(&brw->upload, brw->bufmgr, 65536);
1084
1085   brw_init_state(brw);
1086
1087   intelInitExtensions(ctx);
1088
1089   brw_init_surface_formats(brw);
1090
1091   brw_blorp_init(brw);
1092
1093   brw->urb.size = devinfo->urb.size;
1094
1095   if (devinfo->gen == 6)
1096      brw->urb.gs_present = false;
1097
1098   brw->prim_restart.in_progress = false;
1099   brw->prim_restart.enable_cut_index = false;
1100   brw->gs.enabled = false;
1101   brw->clip.viewport_count = 1;
1102
1103   brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
1104
1105   brw->max_gtt_map_object_size = screen->max_gtt_map_object_size;
1106
1107   ctx->VertexProgram._MaintainTnlProgram = true;
1108   ctx->FragmentProgram._MaintainTexEnvProgram = true;
1109
1110   brw_draw_init( brw );
1111
1112   if ((ctx_config->flags & __DRI_CTX_FLAG_DEBUG) != 0) {
1113      /* Turn on some extra GL_ARB_debug_output generation. */
1114      brw->perf_debug = true;
1115   }
1116
1117   if ((ctx_config->flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) {
1118      ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
1119      ctx->Const.RobustAccess = GL_TRUE;
1120   }
1121
1122   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
1123      brw_init_shader_time(brw);
1124
1125   _mesa_override_extensions(ctx);
1126   _mesa_compute_version(ctx);
1127
1128   /* GL_ARB_gl_spirv */
1129   if (ctx->Extensions.ARB_gl_spirv)
1130      brw_initialize_spirv_supported_capabilities(brw);
1131
1132   _mesa_initialize_dispatch_tables(ctx);
1133   _mesa_initialize_vbo_vtxfmt(ctx);
1134
1135   if (ctx->Extensions.INTEL_performance_query)
1136      brw_init_performance_queries(brw);
1137
1138   vbo_use_buffer_objects(ctx);
1139   vbo_always_unmap_buffers(ctx);
1140
1141   brw->ctx.Cache = brw->screen->disk_cache;
1142
1143   if (driContextPriv->driScreenPriv->dri2.backgroundCallable &&
1144       driQueryOptionb(&screen->optionCache, "mesa_glthread")) {
1145      /* Loader supports multithreading, and so do we. */
1146      _mesa_glthread_init(ctx);
1147   }
1148
1149   return true;
1150}
1151
1152void
1153intelDestroyContext(__DRIcontext * driContextPriv)
1154{
1155   struct brw_context *brw =
1156      (struct brw_context *) driContextPriv->driverPrivate;
1157   struct gl_context *ctx = &brw->ctx;
1158
1159   GET_CURRENT_CONTEXT(curctx);
1160
1161   if (curctx == NULL) {
1162      /* No current context, but we need one to release
1163       * renderbuffer surface when we release framebuffer.
1164       * So temporarily bind the context.
1165       */
1166      _mesa_make_current(ctx, NULL, NULL);
1167   }
1168
1169   _mesa_glthread_destroy(&brw->ctx);
1170
1171   _mesa_meta_free(&brw->ctx);
1172
1173   if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
1174      /* Force a report. */
1175      brw->shader_time.report_time = 0;
1176
1177      brw_collect_and_report_shader_time(brw);
1178      brw_destroy_shader_time(brw);
1179   }
1180
1181   blorp_finish(&brw->blorp);
1182
1183   brw_destroy_state(brw);
1184   brw_draw_destroy(brw);
1185
1186   brw_bo_unreference(brw->curbe.curbe_bo);
1187
1188   brw_bo_unreference(brw->vs.base.scratch_bo);
1189   brw_bo_unreference(brw->tcs.base.scratch_bo);
1190   brw_bo_unreference(brw->tes.base.scratch_bo);
1191   brw_bo_unreference(brw->gs.base.scratch_bo);
1192   brw_bo_unreference(brw->wm.base.scratch_bo);
1193
1194   brw_bo_unreference(brw->vs.base.push_const_bo);
1195   brw_bo_unreference(brw->tcs.base.push_const_bo);
1196   brw_bo_unreference(brw->tes.base.push_const_bo);
1197   brw_bo_unreference(brw->gs.base.push_const_bo);
1198   brw_bo_unreference(brw->wm.base.push_const_bo);
1199
1200   brw_destroy_hw_context(brw->bufmgr, brw->hw_ctx);
1201
1202   if (ctx->swrast_context) {
1203      _swsetup_DestroyContext(&brw->ctx);
1204      _tnl_DestroyContext(&brw->ctx);
1205   }
1206   _vbo_DestroyContext(&brw->ctx);
1207
1208   if (ctx->swrast_context)
1209      _swrast_DestroyContext(&brw->ctx);
1210
1211   brw_fini_pipe_control(brw);
1212   intel_batchbuffer_free(&brw->batch);
1213
1214   brw_bo_unreference(brw->throttle_batch[1]);
1215   brw_bo_unreference(brw->throttle_batch[0]);
1216   brw->throttle_batch[1] = NULL;
1217   brw->throttle_batch[0] = NULL;
1218
1219   driDestroyOptionCache(&brw->optionCache);
1220
1221   /* free the Mesa context */
1222   _mesa_free_context_data(&brw->ctx, true);
1223
1224   ralloc_free(brw);
1225   driContextPriv->driverPrivate = NULL;
1226}
1227
1228GLboolean
1229intelUnbindContext(__DRIcontext * driContextPriv)
1230{
1231   struct gl_context *ctx = driContextPriv->driverPrivate;
1232   _mesa_glthread_finish(ctx);
1233
1234   /* Unset current context and dispath table */
1235   _mesa_make_current(NULL, NULL, NULL);
1236
1237   return true;
1238}
1239
1240/**
1241 * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1242 * on window system framebuffers.
1243 *
1244 * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1245 * your renderbuffer can do sRGB encode, and you can flip a switch that does
1246 * sRGB encode if the renderbuffer can handle it.  You can ask specifically
1247 * for a visual where you're guaranteed to be capable, but it turns out that
1248 * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1249 * incapable ones, because there's no difference between the two in resources
1250 * used.  Applications thus get built that accidentally rely on the default
1251 * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
1252 * great...
1253 *
1254 * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1255 * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1256 * So they removed the enable knob and made it "if the renderbuffer is sRGB
1257 * capable, do sRGB encode".  Then, for your window system renderbuffers, you
1258 * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1259 * and get no sRGB encode (assuming that both kinds of visual are available).
1260 * Thus our choice to support sRGB by default on our visuals for desktop would
1261 * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1262 *
1263 * Unfortunately, renderbuffer setup happens before a context is created.  So
1264 * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1265 * context (without an sRGB visual), we go turn that back off before anyone
1266 * finds out.
1267 */
1268static void
1269intel_gles3_srgb_workaround(struct brw_context *brw,
1270                            struct gl_framebuffer *fb)
1271{
1272   struct gl_context *ctx = &brw->ctx;
1273
1274   if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1275      return;
1276
1277   for (int i = 0; i < BUFFER_COUNT; i++) {
1278      struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer;
1279
1280      /* Check if sRGB was specifically asked for. */
1281      struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, i);
1282      if (irb && irb->need_srgb)
1283         return;
1284
1285      if (rb)
1286         rb->Format = _mesa_get_srgb_format_linear(rb->Format);
1287   }
1288   /* Disable sRGB from framebuffers that are not compatible. */
1289   fb->Visual.sRGBCapable = false;
1290}
1291
1292GLboolean
1293intelMakeCurrent(__DRIcontext * driContextPriv,
1294                 __DRIdrawable * driDrawPriv,
1295                 __DRIdrawable * driReadPriv)
1296{
1297   struct brw_context *brw;
1298
1299   if (driContextPriv)
1300      brw = (struct brw_context *) driContextPriv->driverPrivate;
1301   else
1302      brw = NULL;
1303
1304   if (driContextPriv) {
1305      struct gl_context *ctx = &brw->ctx;
1306      struct gl_framebuffer *fb, *readFb;
1307
1308      if (driDrawPriv == NULL) {
1309         fb = _mesa_get_incomplete_framebuffer();
1310      } else {
1311         fb = driDrawPriv->driverPrivate;
1312         driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1313      }
1314
1315      if (driReadPriv == NULL) {
1316         readFb = _mesa_get_incomplete_framebuffer();
1317      } else {
1318         readFb = driReadPriv->driverPrivate;
1319         driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1320      }
1321
1322      /* The sRGB workaround changes the renderbuffer's format. We must change
1323       * the format before the renderbuffer's miptree get's allocated, otherwise
1324       * the formats of the renderbuffer and its miptree will differ.
1325       */
1326      intel_gles3_srgb_workaround(brw, fb);
1327      intel_gles3_srgb_workaround(brw, readFb);
1328
1329      /* If the context viewport hasn't been initialized, force a call out to
1330       * the loader to get buffers so we have a drawable size for the initial
1331       * viewport. */
1332      if (!brw->ctx.ViewportInitialized)
1333         intel_prepare_render(brw);
1334
1335      _mesa_make_current(ctx, fb, readFb);
1336   } else {
1337      GET_CURRENT_CONTEXT(ctx);
1338      _mesa_glthread_finish(ctx);
1339      _mesa_make_current(NULL, NULL, NULL);
1340   }
1341
1342   return true;
1343}
1344
1345void
1346intel_resolve_for_dri2_flush(struct brw_context *brw,
1347                             __DRIdrawable *drawable)
1348{
1349   const struct gen_device_info *devinfo = &brw->screen->devinfo;
1350
1351   if (devinfo->gen < 6) {
1352      /* MSAA and fast color clear are not supported, so don't waste time
1353       * checking whether a resolve is needed.
1354       */
1355      return;
1356   }
1357
1358   struct gl_framebuffer *fb = drawable->driverPrivate;
1359   struct intel_renderbuffer *rb;
1360
1361   /* Usually, only the back buffer will need to be downsampled. However,
1362    * the front buffer will also need it if the user has rendered into it.
1363    */
1364   static const gl_buffer_index buffers[2] = {
1365         BUFFER_BACK_LEFT,
1366         BUFFER_FRONT_LEFT,
1367   };
1368
1369   for (int i = 0; i < 2; ++i) {
1370      rb = intel_get_renderbuffer(fb, buffers[i]);
1371      if (rb == NULL || rb->mt == NULL)
1372         continue;
1373      if (rb->mt->surf.samples == 1) {
1374         assert(rb->mt_layer == 0 && rb->mt_level == 0 &&
1375                rb->layer_count == 1);
1376         intel_miptree_prepare_external(brw, rb->mt);
1377      } else {
1378         intel_renderbuffer_downsample(brw, rb);
1379
1380         /* Call prepare_external on the single-sample miptree to do any
1381          * needed resolves prior to handing it off to the window system.
1382          * This is needed in the case that rb->singlesample_mt is Y-tiled
1383          * with CCS_E enabled but without I915_FORMAT_MOD_Y_TILED_CCS_E.  In
1384          * this case, the MSAA resolve above will write compressed data into
1385          * rb->singlesample_mt.
1386          *
1387          * TODO: Some day, if we decide to care about the tiny performance
1388          * hit we're taking by doing the MSAA resolve and then a CCS resolve,
1389          * we could detect this case and just allocate the single-sampled
1390          * miptree without aux.  However, that would be a lot of plumbing and
1391          * this is a rather exotic case so it's not really worth it.
1392          */
1393         intel_miptree_prepare_external(brw, rb->singlesample_mt);
1394      }
1395   }
1396}
1397
1398static unsigned
1399intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1400{
1401   return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1402}
1403
1404static void
1405intel_query_dri2_buffers(struct brw_context *brw,
1406                         __DRIdrawable *drawable,
1407                         __DRIbuffer **buffers,
1408                         int *count);
1409
1410static void
1411intel_process_dri2_buffer(struct brw_context *brw,
1412                          __DRIdrawable *drawable,
1413                          __DRIbuffer *buffer,
1414                          struct intel_renderbuffer *rb,
1415                          const char *buffer_name);
1416
1417static void
1418intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1419
1420static void
1421intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1422{
1423   struct gl_framebuffer *fb = drawable->driverPrivate;
1424   struct intel_renderbuffer *rb;
1425   __DRIbuffer *buffers = NULL;
1426   int count;
1427   const char *region_name;
1428
1429   /* Set this up front, so that in case our buffers get invalidated
1430    * while we're getting new buffers, we don't clobber the stamp and
1431    * thus ignore the invalidate. */
1432   drawable->lastStamp = drawable->dri2.stamp;
1433
1434   if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1435      fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1436
1437   intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1438
1439   if (buffers == NULL)
1440      return;
1441
1442   for (int i = 0; i < count; i++) {
1443       switch (buffers[i].attachment) {
1444       case __DRI_BUFFER_FRONT_LEFT:
1445           rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1446           region_name = "dri2 front buffer";
1447           break;
1448
1449       case __DRI_BUFFER_FAKE_FRONT_LEFT:
1450           rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1451           region_name = "dri2 fake front buffer";
1452           break;
1453
1454       case __DRI_BUFFER_BACK_LEFT:
1455           rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1456           region_name = "dri2 back buffer";
1457           break;
1458
1459       case __DRI_BUFFER_DEPTH:
1460       case __DRI_BUFFER_HIZ:
1461       case __DRI_BUFFER_DEPTH_STENCIL:
1462       case __DRI_BUFFER_STENCIL:
1463       case __DRI_BUFFER_ACCUM:
1464       default:
1465           fprintf(stderr,
1466                   "unhandled buffer attach event, attachment type %d\n",
1467                   buffers[i].attachment);
1468           return;
1469       }
1470
1471       intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1472   }
1473
1474}
1475
1476void
1477intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1478{
1479   struct brw_context *brw = context->driverPrivate;
1480   __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1481
1482   /* Set this up front, so that in case our buffers get invalidated
1483    * while we're getting new buffers, we don't clobber the stamp and
1484    * thus ignore the invalidate. */
1485   drawable->lastStamp = drawable->dri2.stamp;
1486
1487   if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1488      fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1489
1490   if (dri_screen->image.loader)
1491      intel_update_image_buffers(brw, drawable);
1492   else
1493      intel_update_dri2_buffers(brw, drawable);
1494
1495   driUpdateFramebufferSize(&brw->ctx, drawable);
1496}
1497
1498/**
1499 * intel_prepare_render should be called anywhere that curent read/drawbuffer
1500 * state is required.
1501 */
1502void
1503intel_prepare_render(struct brw_context *brw)
1504{
1505   struct gl_context *ctx = &brw->ctx;
1506   __DRIcontext *driContext = brw->driContext;
1507   __DRIdrawable *drawable;
1508
1509   drawable = driContext->driDrawablePriv;
1510   if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1511      if (drawable->lastStamp != drawable->dri2.stamp)
1512         intel_update_renderbuffers(driContext, drawable);
1513      driContext->dri2.draw_stamp = drawable->dri2.stamp;
1514   }
1515
1516   drawable = driContext->driReadablePriv;
1517   if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1518      if (drawable->lastStamp != drawable->dri2.stamp)
1519         intel_update_renderbuffers(driContext, drawable);
1520      driContext->dri2.read_stamp = drawable->dri2.stamp;
1521   }
1522
1523   /* If we're currently rendering to the front buffer, the rendering
1524    * that will happen next will probably dirty the front buffer.  So
1525    * mark it as dirty here.
1526    */
1527   if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
1528      brw->front_buffer_dirty = true;
1529
1530   if (brw->is_shared_buffer_bound) {
1531      /* Subsequent rendering will probably dirty the shared buffer. */
1532      brw->is_shared_buffer_dirty = true;
1533   }
1534}
1535
1536/**
1537 * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1538 *
1539 * To determine which DRI buffers to request, examine the renderbuffers
1540 * attached to the drawable's framebuffer. Then request the buffers with
1541 * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1542 *
1543 * This is called from intel_update_renderbuffers().
1544 *
1545 * \param drawable      Drawable whose buffers are queried.
1546 * \param buffers       [out] List of buffers returned by DRI2 query.
1547 * \param buffer_count  [out] Number of buffers returned.
1548 *
1549 * \see intel_update_renderbuffers()
1550 * \see DRI2GetBuffers()
1551 * \see DRI2GetBuffersWithFormat()
1552 */
1553static void
1554intel_query_dri2_buffers(struct brw_context *brw,
1555                         __DRIdrawable *drawable,
1556                         __DRIbuffer **buffers,
1557                         int *buffer_count)
1558{
1559   __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1560   struct gl_framebuffer *fb = drawable->driverPrivate;
1561   int i = 0;
1562   unsigned attachments[8];
1563
1564   struct intel_renderbuffer *front_rb;
1565   struct intel_renderbuffer *back_rb;
1566
1567   front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1568   back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1569
1570   memset(attachments, 0, sizeof(attachments));
1571   if ((_mesa_is_front_buffer_drawing(fb) ||
1572        _mesa_is_front_buffer_reading(fb) ||
1573        !back_rb) && front_rb) {
1574      /* If a fake front buffer is in use, then querying for
1575       * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1576       * the real front buffer to the fake front buffer.  So before doing the
1577       * query, we need to make sure all the pending drawing has landed in the
1578       * real front buffer.
1579       */
1580      intel_batchbuffer_flush(brw);
1581      intel_flush_front(&brw->ctx);
1582
1583      attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1584      attachments[i++] = intel_bits_per_pixel(front_rb);
1585   } else if (front_rb && brw->front_buffer_dirty) {
1586      /* We have pending front buffer rendering, but we aren't querying for a
1587       * front buffer.  If the front buffer we have is a fake front buffer,
1588       * the X server is going to throw it away when it processes the query.
1589       * So before doing the query, make sure all the pending drawing has
1590       * landed in the real front buffer.
1591       */
1592      intel_batchbuffer_flush(brw);
1593      intel_flush_front(&brw->ctx);
1594   }
1595
1596   if (back_rb) {
1597      attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1598      attachments[i++] = intel_bits_per_pixel(back_rb);
1599   }
1600
1601   assert(i <= ARRAY_SIZE(attachments));
1602
1603   *buffers =
1604      dri_screen->dri2.loader->getBuffersWithFormat(drawable,
1605                                                    &drawable->w,
1606                                                    &drawable->h,
1607                                                    attachments, i / 2,
1608                                                    buffer_count,
1609                                                    drawable->loaderPrivate);
1610}
1611
1612/**
1613 * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1614 *
1615 * This is called from intel_update_renderbuffers().
1616 *
1617 * \par Note:
1618 *    DRI buffers whose attachment point is DRI2BufferStencil or
1619 *    DRI2BufferDepthStencil are handled as special cases.
1620 *
1621 * \param buffer_name is a human readable name, such as "dri2 front buffer",
1622 *        that is passed to brw_bo_gem_create_from_name().
1623 *
1624 * \see intel_update_renderbuffers()
1625 */
1626static void
1627intel_process_dri2_buffer(struct brw_context *brw,
1628                          __DRIdrawable *drawable,
1629                          __DRIbuffer *buffer,
1630                          struct intel_renderbuffer *rb,
1631                          const char *buffer_name)
1632{
1633   struct gl_framebuffer *fb = drawable->driverPrivate;
1634   struct brw_bo *bo;
1635
1636   if (!rb)
1637      return;
1638
1639   unsigned num_samples = rb->Base.Base.NumSamples;
1640
1641   /* We try to avoid closing and reopening the same BO name, because the first
1642    * use of a mapping of the buffer involves a bunch of page faulting which is
1643    * moderately expensive.
1644    */
1645   struct intel_mipmap_tree *last_mt;
1646   if (num_samples == 0)
1647      last_mt = rb->mt;
1648   else
1649      last_mt = rb->singlesample_mt;
1650
1651   uint32_t old_name = 0;
1652   if (last_mt) {
1653       /* The bo already has a name because the miptree was created by a
1654	* previous call to intel_process_dri2_buffer(). If a bo already has a
1655	* name, then brw_bo_flink() is a low-cost getter.  It does not
1656	* create a new name.
1657	*/
1658      brw_bo_flink(last_mt->bo, &old_name);
1659   }
1660
1661   if (old_name == buffer->name)
1662      return;
1663
1664   if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1665      fprintf(stderr,
1666              "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1667              buffer->name, buffer->attachment,
1668              buffer->cpp, buffer->pitch);
1669   }
1670
1671   bo = brw_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1672                                          buffer->name);
1673   if (!bo) {
1674      fprintf(stderr,
1675              "Failed to open BO for returned DRI2 buffer "
1676              "(%dx%d, %s, named %d).\n"
1677              "This is likely a bug in the X Server that will lead to a "
1678              "crash soon.\n",
1679              drawable->w, drawable->h, buffer_name, buffer->name);
1680      return;
1681   }
1682
1683   uint32_t tiling, swizzle;
1684   brw_bo_get_tiling(bo, &tiling, &swizzle);
1685
1686   struct intel_mipmap_tree *mt =
1687      intel_miptree_create_for_bo(brw,
1688                                  bo,
1689                                  intel_rb_format(rb),
1690                                  0,
1691                                  drawable->w,
1692                                  drawable->h,
1693                                  1,
1694                                  buffer->pitch,
1695                                  isl_tiling_from_i915_tiling(tiling),
1696                                  MIPTREE_CREATE_DEFAULT);
1697   if (!mt) {
1698      brw_bo_unreference(bo);
1699      return;
1700   }
1701
1702   /* We got this BO from X11.  We cana't assume that we have coherent texture
1703    * access because X may suddenly decide to use it for scan-out which would
1704    * destroy coherency.
1705    */
1706   bo->cache_coherent = false;
1707
1708   if (!intel_update_winsys_renderbuffer_miptree(brw, rb, mt,
1709                                                 drawable->w, drawable->h,
1710                                                 buffer->pitch)) {
1711      brw_bo_unreference(bo);
1712      intel_miptree_release(&mt);
1713      return;
1714   }
1715
1716   if (_mesa_is_front_buffer_drawing(fb) &&
1717       (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1718        buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1719       rb->Base.Base.NumSamples > 1) {
1720      intel_renderbuffer_upsample(brw, rb);
1721   }
1722
1723   assert(rb->mt);
1724
1725   brw_bo_unreference(bo);
1726}
1727
1728/**
1729 * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1730 *
1731 * To determine which DRI buffers to request, examine the renderbuffers
1732 * attached to the drawable's framebuffer. Then request the buffers from
1733 * the image loader
1734 *
1735 * This is called from intel_update_renderbuffers().
1736 *
1737 * \param drawable      Drawable whose buffers are queried.
1738 * \param buffers       [out] List of buffers returned by DRI2 query.
1739 * \param buffer_count  [out] Number of buffers returned.
1740 *
1741 * \see intel_update_renderbuffers()
1742 */
1743
1744static void
1745intel_update_image_buffer(struct brw_context *intel,
1746                          __DRIdrawable *drawable,
1747                          struct intel_renderbuffer *rb,
1748                          __DRIimage *buffer,
1749                          enum __DRIimageBufferMask buffer_type)
1750{
1751   struct gl_framebuffer *fb = drawable->driverPrivate;
1752
1753   if (!rb || !buffer->bo)
1754      return;
1755
1756   unsigned num_samples = rb->Base.Base.NumSamples;
1757
1758   /* Check and see if we're already bound to the right
1759    * buffer object
1760    */
1761   struct intel_mipmap_tree *last_mt;
1762   if (num_samples == 0)
1763      last_mt = rb->mt;
1764   else
1765      last_mt = rb->singlesample_mt;
1766
1767   if (last_mt && last_mt->bo == buffer->bo) {
1768      if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
1769         intel_miptree_make_shareable(intel, last_mt);
1770      }
1771      return;
1772   }
1773
1774   /* Only allow internal compression if samples == 0.  For multisampled
1775    * window system buffers, the only thing the single-sampled buffer is used
1776    * for is as a resolve target.  If we do any compression beyond what is
1777    * supported by the window system, we will just have to resolve so it's
1778    * probably better to just not bother.
1779    */
1780   const bool allow_internal_aux = (num_samples == 0);
1781
1782   struct intel_mipmap_tree *mt =
1783      intel_miptree_create_for_dri_image(intel, buffer, GL_TEXTURE_2D,
1784                                         intel_rb_format(rb),
1785                                         allow_internal_aux);
1786   if (!mt)
1787      return;
1788
1789   if (!intel_update_winsys_renderbuffer_miptree(intel, rb, mt,
1790                                                 buffer->width, buffer->height,
1791                                                 buffer->pitch)) {
1792      intel_miptree_release(&mt);
1793      return;
1794   }
1795
1796   if (_mesa_is_front_buffer_drawing(fb) &&
1797       buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1798       rb->Base.Base.NumSamples > 1) {
1799      intel_renderbuffer_upsample(intel, rb);
1800   }
1801
1802   if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
1803      /* The compositor and the application may access this image
1804       * concurrently. The display hardware may even scanout the image while
1805       * the GPU is rendering to it.  Aux surfaces cause difficulty with
1806       * concurrent access, so permanently disable aux for this miptree.
1807       *
1808       * Perhaps we could improve overall application performance by
1809       * re-enabling the aux surface when EGL_RENDER_BUFFER transitions to
1810       * EGL_BACK_BUFFER, then disabling it again when EGL_RENDER_BUFFER
1811       * returns to EGL_SINGLE_BUFFER. I expect the wins and losses with this
1812       * approach to be highly dependent on the application's GL usage.
1813       *
1814       * I [chadv] expect clever disabling/reenabling to be counterproductive
1815       * in the use cases I care about: applications that render nearly
1816       * realtime handwriting to the surface while possibly undergiong
1817       * simultaneously scanout as a display plane. The app requires low
1818       * render latency. Even though the app spends most of its time in
1819       * shared-buffer mode, it also frequently transitions between
1820       * shared-buffer (EGL_SINGLE_BUFFER) and double-buffer (EGL_BACK_BUFFER)
1821       * mode.  Visual sutter during the transitions should be avoided.
1822       *
1823       * In this case, I [chadv] believe reducing the GPU workload at
1824       * shared-buffer/double-buffer transitions would offer a smoother app
1825       * experience than any savings due to aux compression. But I've
1826       * collected no data to prove my theory.
1827       */
1828      intel_miptree_make_shareable(intel, mt);
1829   }
1830}
1831
1832static void
1833intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1834{
1835   struct gl_framebuffer *fb = drawable->driverPrivate;
1836   __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1837   struct intel_renderbuffer *front_rb;
1838   struct intel_renderbuffer *back_rb;
1839   struct __DRIimageList images;
1840   mesa_format format;
1841   uint32_t buffer_mask = 0;
1842   int ret;
1843
1844   front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1845   back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1846
1847   if (back_rb)
1848      format = intel_rb_format(back_rb);
1849   else if (front_rb)
1850      format = intel_rb_format(front_rb);
1851   else
1852      return;
1853
1854   if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
1855                    _mesa_is_front_buffer_reading(fb) || !back_rb)) {
1856      buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1857   }
1858
1859   if (back_rb)
1860      buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1861
1862   ret = dri_screen->image.loader->getBuffers(drawable,
1863                                              driGLFormatToImageFormat(format),
1864                                              &drawable->dri2.stamp,
1865                                              drawable->loaderPrivate,
1866                                              buffer_mask,
1867                                              &images);
1868   if (!ret)
1869      return;
1870
1871   if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1872      drawable->w = images.front->width;
1873      drawable->h = images.front->height;
1874      intel_update_image_buffer(brw,
1875                                drawable,
1876                                front_rb,
1877                                images.front,
1878                                __DRI_IMAGE_BUFFER_FRONT);
1879   }
1880
1881   if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1882      drawable->w = images.back->width;
1883      drawable->h = images.back->height;
1884      intel_update_image_buffer(brw,
1885                                drawable,
1886                                back_rb,
1887                                images.back,
1888                                __DRI_IMAGE_BUFFER_BACK);
1889   }
1890
1891   if (images.image_mask & __DRI_IMAGE_BUFFER_SHARED) {
1892      assert(images.image_mask == __DRI_IMAGE_BUFFER_SHARED);
1893      drawable->w = images.back->width;
1894      drawable->h = images.back->height;
1895      intel_update_image_buffer(brw,
1896                                drawable,
1897                                back_rb,
1898                                images.back,
1899                                __DRI_IMAGE_BUFFER_SHARED);
1900      brw->is_shared_buffer_bound = true;
1901   } else {
1902      brw->is_shared_buffer_bound = false;
1903      brw->is_shared_buffer_dirty = false;
1904   }
1905}
1906