1/****************************************************************************
2 * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 ***************************************************************************/
23
24#include "swr_context.h"
25#include "swr_public.h"
26#include "swr_screen.h"
27#include "swr_resource.h"
28#include "swr_fence.h"
29#include "gen_knobs.h"
30
31#include "pipe/p_screen.h"
32#include "pipe/p_defines.h"
33#include "util/u_memory.h"
34#include "util/format/u_format.h"
35#include "util/u_inlines.h"
36#include "util/u_cpu_detect.h"
37#include "util/format/u_format_s3tc.h"
38#include "util/u_string.h"
39#include "util/u_screen.h"
40
41#include "frontend/sw_winsys.h"
42
43#include "jit_api.h"
44
45#include "memory/TilingFunctions.h"
46
47#include <stdio.h>
48#include <map>
49
50/*
51 * Max texture sizes
52 * XXX Check max texture size values against core and sampler.
53 */
54#define SWR_MAX_TEXTURE_SIZE (2 * 1024 * 1024 * 1024ULL) /* 2GB */
55/* Not all texture formats can fit into 2GB limit, but we have to
56   live with that. See lp_limits.h for more details */
57#define SWR_MAX_TEXTURE_2D_SIZE 16384
58#define SWR_MAX_TEXTURE_3D_LEVELS 12  /* 2K x 2K x 2K for now */
59#define SWR_MAX_TEXTURE_CUBE_LEVELS 14  /* 8K x 8K for now */
60#define SWR_MAX_TEXTURE_ARRAY_LAYERS 512 /* 8K x 512 / 8K x 8K x 512 */
61
62/* Default max client_copy_limit */
63#define SWR_CLIENT_COPY_LIMIT 8192
64
65/* Flag indicates creation of alternate surface, to prevent recursive loop
66 * in resource creation when msaa_force_enable is set. */
67#define SWR_RESOURCE_FLAG_ALT_SURFACE (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
68
69
70static const char *
71swr_get_name(struct pipe_screen *screen)
72{
73   static char buf[100];
74   snprintf(buf, sizeof(buf), "SWR (LLVM " MESA_LLVM_VERSION_STRING ", %u bits)",
75            lp_native_vector_width);
76   return buf;
77}
78
79static const char *
80swr_get_vendor(struct pipe_screen *screen)
81{
82   return "Intel Corporation";
83}
84
85static bool
86swr_is_format_supported(struct pipe_screen *_screen,
87                        enum pipe_format format,
88                        enum pipe_texture_target target,
89                        unsigned sample_count,
90                        unsigned storage_sample_count,
91                        unsigned bind)
92{
93   struct swr_screen *screen = swr_screen(_screen);
94   struct sw_winsys *winsys = screen->winsys;
95   const struct util_format_description *format_desc;
96
97   assert(target == PIPE_BUFFER || target == PIPE_TEXTURE_1D
98          || target == PIPE_TEXTURE_1D_ARRAY
99          || target == PIPE_TEXTURE_2D
100          || target == PIPE_TEXTURE_2D_ARRAY
101          || target == PIPE_TEXTURE_RECT
102          || target == PIPE_TEXTURE_3D
103          || target == PIPE_TEXTURE_CUBE
104          || target == PIPE_TEXTURE_CUBE_ARRAY);
105
106   if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
107      return false;
108
109   format_desc = util_format_description(format);
110   if (!format_desc)
111      return false;
112
113   if ((sample_count > screen->msaa_max_count)
114      || !util_is_power_of_two_or_zero(sample_count))
115      return false;
116
117   if (bind & PIPE_BIND_DISPLAY_TARGET) {
118      if (!winsys->is_displaytarget_format_supported(winsys, bind, format))
119         return false;
120   }
121
122   if (bind & PIPE_BIND_RENDER_TARGET) {
123      if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
124         return false;
125
126      if (mesa_to_swr_format(format) == (SWR_FORMAT)-1)
127         return false;
128
129      /*
130       * Although possible, it is unnatural to render into compressed or YUV
131       * surfaces. So disable these here to avoid going into weird paths
132       * inside gallium frontends.
133       */
134      if (format_desc->block.width != 1 || format_desc->block.height != 1)
135         return false;
136   }
137
138   if (bind & PIPE_BIND_DEPTH_STENCIL) {
139      if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
140         return false;
141
142      if (mesa_to_swr_format(format) == (SWR_FORMAT)-1)
143         return false;
144   }
145
146   if (bind & PIPE_BIND_VERTEX_BUFFER) {
147      if (mesa_to_swr_format(format) == (SWR_FORMAT)-1) {
148         return false;
149      }
150   }
151
152   if (format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC ||
153       format_desc->layout == UTIL_FORMAT_LAYOUT_FXT1)
154   {
155      return false;
156   }
157
158   if (format_desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
159       format != PIPE_FORMAT_ETC1_RGB8) {
160      return false;
161   }
162
163   if ((bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) &&
164       ((bind & PIPE_BIND_DISPLAY_TARGET) == 0)) {
165      /* Disable all 3-channel formats, where channel size != 32 bits.
166       * In some cases we run into crashes (in generate_unswizzled_blend()),
167       * for 3-channel RGB16 variants, there was an apparent LLVM bug.
168       * In any case, disabling the shallower 3-channel formats avoids a
169       * number of issues with GL_ARB_copy_image support.
170       */
171      if (format_desc->is_array &&
172          format_desc->nr_channels == 3 &&
173          format_desc->block.bits != 96) {
174         return false;
175      }
176   }
177
178   return TRUE;
179}
180
181static int
182swr_get_param(struct pipe_screen *screen, enum pipe_cap param)
183{
184   switch (param) {
185      /* limits */
186   case PIPE_CAP_MAX_RENDER_TARGETS:
187      return PIPE_MAX_COLOR_BUFS;
188   case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
189      return SWR_MAX_TEXTURE_2D_SIZE;
190   case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
191      return SWR_MAX_TEXTURE_3D_LEVELS;
192   case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
193      return SWR_MAX_TEXTURE_CUBE_LEVELS;
194   case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
195      return MAX_SO_STREAMS;
196   case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
197   case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
198      return MAX_ATTRIBUTES * 4;
199   case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
200   case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
201      return 1024;
202   case PIPE_CAP_MAX_VERTEX_STREAMS:
203      return 4;
204   case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
205      return 2048;
206   case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
207      return SWR_MAX_TEXTURE_ARRAY_LAYERS;
208   case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
209   case PIPE_CAP_MIN_TEXEL_OFFSET:
210      return -8;
211   case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
212   case PIPE_CAP_MAX_TEXEL_OFFSET:
213      return 7;
214   case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
215      return 4;
216   case PIPE_CAP_GLSL_FEATURE_LEVEL:
217      return 330;
218   case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
219      return 140;
220   case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
221      return 16;
222   case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
223      return 64;
224   case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
225      return 65536;
226   case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
227      return 1;
228   case PIPE_CAP_MAX_VIEWPORTS:
229      return KNOB_NUM_VIEWPORTS_SCISSORS;
230   case PIPE_CAP_ENDIANNESS:
231      return PIPE_ENDIAN_NATIVE;
232
233      /* supported features */
234   case PIPE_CAP_NPOT_TEXTURES:
235   case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
236   case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
237   case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
238   case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
239   case PIPE_CAP_VERTEX_SHADER_SATURATE:
240   case PIPE_CAP_POINT_SPRITE:
241   case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
242   case PIPE_CAP_OCCLUSION_QUERY:
243   case PIPE_CAP_QUERY_TIME_ELAPSED:
244   case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
245   case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
246   case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
247   case PIPE_CAP_TEXTURE_SWIZZLE:
248   case PIPE_CAP_BLEND_EQUATION_SEPARATE:
249   case PIPE_CAP_INDEP_BLEND_ENABLE:
250   case PIPE_CAP_INDEP_BLEND_FUNC:
251   case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
252   case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
253   case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
254   case PIPE_CAP_DEPTH_CLIP_DISABLE:
255   case PIPE_CAP_PRIMITIVE_RESTART:
256   case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:
257   case PIPE_CAP_TGSI_INSTANCEID:
258   case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
259   case PIPE_CAP_START_INSTANCE:
260   case PIPE_CAP_SEAMLESS_CUBE_MAP:
261   case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
262   case PIPE_CAP_CONDITIONAL_RENDER:
263   case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
264   case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
265   case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
266   case PIPE_CAP_USER_VERTEX_BUFFERS:
267   case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
268   case PIPE_CAP_QUERY_TIMESTAMP:
269   case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
270   case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
271   case PIPE_CAP_DRAW_INDIRECT:
272   case PIPE_CAP_UMA:
273   case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
274   case PIPE_CAP_CLIP_HALFZ:
275   case PIPE_CAP_POLYGON_OFFSET_CLAMP:
276   case PIPE_CAP_DEPTH_BOUNDS_TEST:
277   case PIPE_CAP_CLEAR_TEXTURE:
278   case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
279   case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
280   case PIPE_CAP_CULL_DISTANCE:
281   case PIPE_CAP_CUBE_MAP_ARRAY:
282   case PIPE_CAP_DOUBLES:
283   case PIPE_CAP_TEXTURE_QUERY_LOD:
284   case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
285   case PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE:
286   case PIPE_CAP_QUERY_SO_OVERFLOW:
287   case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
288      return 1;
289
290   case PIPE_CAP_SHAREABLE_SHADERS:
291      return 0;
292
293   /* MSAA support
294    * If user has explicitly set max_sample_count = 1 (via SWR_MSAA_MAX_COUNT)
295    * then disable all MSAA support and go back to old (FAKE_SW_MSAA) caps. */
296   case PIPE_CAP_TEXTURE_MULTISAMPLE:
297   case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
298      return (swr_screen(screen)->msaa_max_count > 1) ? 1 : 0;
299   case PIPE_CAP_FAKE_SW_MSAA:
300      return (swr_screen(screen)->msaa_max_count > 1) ? 0 : 1;
301
302   /* fetch jit change for 2-4GB buffers requires alignment */
303   case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
304   case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
305   case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
306      return 1;
307
308      /* unsupported features */
309   case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
310   case PIPE_CAP_PCI_GROUP:
311   case PIPE_CAP_PCI_BUS:
312   case PIPE_CAP_PCI_DEVICE:
313   case PIPE_CAP_PCI_FUNCTION:
314   case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
315      return 0;
316   case PIPE_CAP_MAX_GS_INVOCATIONS:
317      return 32;
318   case PIPE_CAP_MAX_SHADER_BUFFER_SIZE:
319      return 1 << 27;
320   case PIPE_CAP_MAX_VARYINGS:
321      return 32;
322
323   case PIPE_CAP_VENDOR_ID:
324      return 0xFFFFFFFF;
325   case PIPE_CAP_DEVICE_ID:
326      return 0xFFFFFFFF;
327   case PIPE_CAP_ACCELERATED:
328      return 0;
329   case PIPE_CAP_VIDEO_MEMORY: {
330      /* XXX: Do we want to return the full amount of system memory ? */
331      uint64_t system_memory;
332
333      if (!os_get_total_physical_memory(&system_memory))
334         return 0;
335
336      return (int)(system_memory >> 20);
337   }
338   default:
339      return u_pipe_screen_get_param_defaults(screen, param);
340   }
341}
342
343static int
344swr_get_shader_param(struct pipe_screen *screen,
345                     enum pipe_shader_type shader,
346                     enum pipe_shader_cap param)
347{
348   if (shader != PIPE_SHADER_VERTEX &&
349       shader != PIPE_SHADER_FRAGMENT &&
350       shader != PIPE_SHADER_GEOMETRY &&
351       shader != PIPE_SHADER_TESS_CTRL &&
352       shader != PIPE_SHADER_TESS_EVAL)
353      return 0;
354
355   if (param == PIPE_SHADER_CAP_MAX_SHADER_BUFFERS ||
356       param == PIPE_SHADER_CAP_MAX_SHADER_IMAGES) {
357      return 0;
358   }
359
360   return gallivm_get_shader_param(param);
361}
362
363
364static float
365swr_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
366{
367   switch (param) {
368   case PIPE_CAPF_MAX_LINE_WIDTH:
369   case PIPE_CAPF_MAX_LINE_WIDTH_AA:
370   case PIPE_CAPF_MAX_POINT_WIDTH:
371      return 255.0; /* arbitrary */
372   case PIPE_CAPF_MAX_POINT_WIDTH_AA:
373      return 0.0;
374   case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
375      return 0.0;
376   case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
377      return 16.0; /* arbitrary */
378   case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
379   case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
380   case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
381      return 0.0f;
382   }
383   /* should only get here on unhandled cases */
384   debug_printf("Unexpected PIPE_CAPF %d query\n", param);
385   return 0.0;
386}
387
388SWR_FORMAT
389mesa_to_swr_format(enum pipe_format format)
390{
391   static const std::map<pipe_format,SWR_FORMAT> mesa2swr = {
392      /* depth / stencil */
393      {PIPE_FORMAT_Z16_UNORM,              R16_UNORM}, // z
394      {PIPE_FORMAT_Z32_FLOAT,              R32_FLOAT}, // z
395      {PIPE_FORMAT_Z24_UNORM_S8_UINT,      R24_UNORM_X8_TYPELESS}, // z
396      {PIPE_FORMAT_Z24X8_UNORM,            R24_UNORM_X8_TYPELESS}, // z
397      {PIPE_FORMAT_Z32_FLOAT_S8X24_UINT,   R32_FLOAT_X8X24_TYPELESS}, // z
398
399      /* alpha */
400      {PIPE_FORMAT_A8_UNORM,               A8_UNORM},
401      {PIPE_FORMAT_A16_UNORM,              A16_UNORM},
402      {PIPE_FORMAT_A16_FLOAT,              A16_FLOAT},
403      {PIPE_FORMAT_A32_FLOAT,              A32_FLOAT},
404
405      /* odd sizes, bgr */
406      {PIPE_FORMAT_B5G6R5_UNORM,           B5G6R5_UNORM},
407      {PIPE_FORMAT_B5G6R5_SRGB,            B5G6R5_UNORM_SRGB},
408      {PIPE_FORMAT_B5G5R5A1_UNORM,         B5G5R5A1_UNORM},
409      {PIPE_FORMAT_B5G5R5X1_UNORM,         B5G5R5X1_UNORM},
410      {PIPE_FORMAT_B4G4R4A4_UNORM,         B4G4R4A4_UNORM},
411      {PIPE_FORMAT_B8G8R8A8_UNORM,         B8G8R8A8_UNORM},
412      {PIPE_FORMAT_B8G8R8A8_SRGB,          B8G8R8A8_UNORM_SRGB},
413      {PIPE_FORMAT_B8G8R8X8_UNORM,         B8G8R8X8_UNORM},
414      {PIPE_FORMAT_B8G8R8X8_SRGB,          B8G8R8X8_UNORM_SRGB},
415
416      /* rgb10a2 */
417      {PIPE_FORMAT_R10G10B10A2_UNORM,      R10G10B10A2_UNORM},
418      {PIPE_FORMAT_R10G10B10A2_SNORM,      R10G10B10A2_SNORM},
419      {PIPE_FORMAT_R10G10B10A2_USCALED,    R10G10B10A2_USCALED},
420      {PIPE_FORMAT_R10G10B10A2_SSCALED,    R10G10B10A2_SSCALED},
421      {PIPE_FORMAT_R10G10B10A2_UINT,       R10G10B10A2_UINT},
422
423      /* rgb10x2 */
424      {PIPE_FORMAT_R10G10B10X2_USCALED,    R10G10B10X2_USCALED},
425
426      /* bgr10a2 */
427      {PIPE_FORMAT_B10G10R10A2_UNORM,      B10G10R10A2_UNORM},
428      {PIPE_FORMAT_B10G10R10A2_SNORM,      B10G10R10A2_SNORM},
429      {PIPE_FORMAT_B10G10R10A2_USCALED,    B10G10R10A2_USCALED},
430      {PIPE_FORMAT_B10G10R10A2_SSCALED,    B10G10R10A2_SSCALED},
431      {PIPE_FORMAT_B10G10R10A2_UINT,       B10G10R10A2_UINT},
432
433      /* bgr10x2 */
434      {PIPE_FORMAT_B10G10R10X2_UNORM,      B10G10R10X2_UNORM},
435
436      /* r11g11b10 */
437      {PIPE_FORMAT_R11G11B10_FLOAT,        R11G11B10_FLOAT},
438
439      /* 32 bits per component */
440      {PIPE_FORMAT_R32_FLOAT,              R32_FLOAT},
441      {PIPE_FORMAT_R32G32_FLOAT,           R32G32_FLOAT},
442      {PIPE_FORMAT_R32G32B32_FLOAT,        R32G32B32_FLOAT},
443      {PIPE_FORMAT_R32G32B32A32_FLOAT,     R32G32B32A32_FLOAT},
444      {PIPE_FORMAT_R32G32B32X32_FLOAT,     R32G32B32X32_FLOAT},
445
446      {PIPE_FORMAT_R32_USCALED,            R32_USCALED},
447      {PIPE_FORMAT_R32G32_USCALED,         R32G32_USCALED},
448      {PIPE_FORMAT_R32G32B32_USCALED,      R32G32B32_USCALED},
449      {PIPE_FORMAT_R32G32B32A32_USCALED,   R32G32B32A32_USCALED},
450
451      {PIPE_FORMAT_R32_SSCALED,            R32_SSCALED},
452      {PIPE_FORMAT_R32G32_SSCALED,         R32G32_SSCALED},
453      {PIPE_FORMAT_R32G32B32_SSCALED,      R32G32B32_SSCALED},
454      {PIPE_FORMAT_R32G32B32A32_SSCALED,   R32G32B32A32_SSCALED},
455
456      {PIPE_FORMAT_R32_UINT,               R32_UINT},
457      {PIPE_FORMAT_R32G32_UINT,            R32G32_UINT},
458      {PIPE_FORMAT_R32G32B32_UINT,         R32G32B32_UINT},
459      {PIPE_FORMAT_R32G32B32A32_UINT,      R32G32B32A32_UINT},
460
461      {PIPE_FORMAT_R32_SINT,               R32_SINT},
462      {PIPE_FORMAT_R32G32_SINT,            R32G32_SINT},
463      {PIPE_FORMAT_R32G32B32_SINT,         R32G32B32_SINT},
464      {PIPE_FORMAT_R32G32B32A32_SINT,      R32G32B32A32_SINT},
465
466      /* 16 bits per component */
467      {PIPE_FORMAT_R16_UNORM,              R16_UNORM},
468      {PIPE_FORMAT_R16G16_UNORM,           R16G16_UNORM},
469      {PIPE_FORMAT_R16G16B16_UNORM,        R16G16B16_UNORM},
470      {PIPE_FORMAT_R16G16B16A16_UNORM,     R16G16B16A16_UNORM},
471      {PIPE_FORMAT_R16G16B16X16_UNORM,     R16G16B16X16_UNORM},
472
473      {PIPE_FORMAT_R16_USCALED,            R16_USCALED},
474      {PIPE_FORMAT_R16G16_USCALED,         R16G16_USCALED},
475      {PIPE_FORMAT_R16G16B16_USCALED,      R16G16B16_USCALED},
476      {PIPE_FORMAT_R16G16B16A16_USCALED,   R16G16B16A16_USCALED},
477
478      {PIPE_FORMAT_R16_SNORM,              R16_SNORM},
479      {PIPE_FORMAT_R16G16_SNORM,           R16G16_SNORM},
480      {PIPE_FORMAT_R16G16B16_SNORM,        R16G16B16_SNORM},
481      {PIPE_FORMAT_R16G16B16A16_SNORM,     R16G16B16A16_SNORM},
482
483      {PIPE_FORMAT_R16_SSCALED,            R16_SSCALED},
484      {PIPE_FORMAT_R16G16_SSCALED,         R16G16_SSCALED},
485      {PIPE_FORMAT_R16G16B16_SSCALED,      R16G16B16_SSCALED},
486      {PIPE_FORMAT_R16G16B16A16_SSCALED,   R16G16B16A16_SSCALED},
487
488      {PIPE_FORMAT_R16_UINT,               R16_UINT},
489      {PIPE_FORMAT_R16G16_UINT,            R16G16_UINT},
490      {PIPE_FORMAT_R16G16B16_UINT,         R16G16B16_UINT},
491      {PIPE_FORMAT_R16G16B16A16_UINT,      R16G16B16A16_UINT},
492
493      {PIPE_FORMAT_R16_SINT,               R16_SINT},
494      {PIPE_FORMAT_R16G16_SINT,            R16G16_SINT},
495      {PIPE_FORMAT_R16G16B16_SINT,         R16G16B16_SINT},
496      {PIPE_FORMAT_R16G16B16A16_SINT,      R16G16B16A16_SINT},
497
498      {PIPE_FORMAT_R16_FLOAT,              R16_FLOAT},
499      {PIPE_FORMAT_R16G16_FLOAT,           R16G16_FLOAT},
500      {PIPE_FORMAT_R16G16B16_FLOAT,        R16G16B16_FLOAT},
501      {PIPE_FORMAT_R16G16B16A16_FLOAT,     R16G16B16A16_FLOAT},
502      {PIPE_FORMAT_R16G16B16X16_FLOAT,     R16G16B16X16_FLOAT},
503
504      /* 8 bits per component */
505      {PIPE_FORMAT_R8_UNORM,               R8_UNORM},
506      {PIPE_FORMAT_R8G8_UNORM,             R8G8_UNORM},
507      {PIPE_FORMAT_R8G8B8_UNORM,           R8G8B8_UNORM},
508      {PIPE_FORMAT_R8G8B8_SRGB,            R8G8B8_UNORM_SRGB},
509      {PIPE_FORMAT_R8G8B8A8_UNORM,         R8G8B8A8_UNORM},
510      {PIPE_FORMAT_R8G8B8A8_SRGB,          R8G8B8A8_UNORM_SRGB},
511      {PIPE_FORMAT_R8G8B8X8_UNORM,         R8G8B8X8_UNORM},
512      {PIPE_FORMAT_R8G8B8X8_SRGB,          R8G8B8X8_UNORM_SRGB},
513
514      {PIPE_FORMAT_R8_USCALED,             R8_USCALED},
515      {PIPE_FORMAT_R8G8_USCALED,           R8G8_USCALED},
516      {PIPE_FORMAT_R8G8B8_USCALED,         R8G8B8_USCALED},
517      {PIPE_FORMAT_R8G8B8A8_USCALED,       R8G8B8A8_USCALED},
518
519      {PIPE_FORMAT_R8_SNORM,               R8_SNORM},
520      {PIPE_FORMAT_R8G8_SNORM,             R8G8_SNORM},
521      {PIPE_FORMAT_R8G8B8_SNORM,           R8G8B8_SNORM},
522      {PIPE_FORMAT_R8G8B8A8_SNORM,         R8G8B8A8_SNORM},
523
524      {PIPE_FORMAT_R8_SSCALED,             R8_SSCALED},
525      {PIPE_FORMAT_R8G8_SSCALED,           R8G8_SSCALED},
526      {PIPE_FORMAT_R8G8B8_SSCALED,         R8G8B8_SSCALED},
527      {PIPE_FORMAT_R8G8B8A8_SSCALED,       R8G8B8A8_SSCALED},
528
529      {PIPE_FORMAT_R8_UINT,                R8_UINT},
530      {PIPE_FORMAT_R8G8_UINT,              R8G8_UINT},
531      {PIPE_FORMAT_R8G8B8_UINT,            R8G8B8_UINT},
532      {PIPE_FORMAT_R8G8B8A8_UINT,          R8G8B8A8_UINT},
533
534      {PIPE_FORMAT_R8_SINT,                R8_SINT},
535      {PIPE_FORMAT_R8G8_SINT,              R8G8_SINT},
536      {PIPE_FORMAT_R8G8B8_SINT,            R8G8B8_SINT},
537      {PIPE_FORMAT_R8G8B8A8_SINT,          R8G8B8A8_SINT},
538
539      /* These formats are valid for vertex data, but should not be used
540       * for render targets.
541       */
542
543      {PIPE_FORMAT_R32_FIXED,              R32_SFIXED},
544      {PIPE_FORMAT_R32G32_FIXED,           R32G32_SFIXED},
545      {PIPE_FORMAT_R32G32B32_FIXED,        R32G32B32_SFIXED},
546      {PIPE_FORMAT_R32G32B32A32_FIXED,     R32G32B32A32_SFIXED},
547
548      {PIPE_FORMAT_R64_FLOAT,              R64_FLOAT},
549      {PIPE_FORMAT_R64G64_FLOAT,           R64G64_FLOAT},
550      {PIPE_FORMAT_R64G64B64_FLOAT,        R64G64B64_FLOAT},
551      {PIPE_FORMAT_R64G64B64A64_FLOAT,     R64G64B64A64_FLOAT},
552
553      /* These formats have entries in SWR but don't have Load/StoreTile
554       * implementations. That means these aren't renderable, and thus having
555       * a mapping entry here is detrimental.
556       */
557      /*
558
559      {PIPE_FORMAT_L8_UNORM,               L8_UNORM},
560      {PIPE_FORMAT_I8_UNORM,               I8_UNORM},
561      {PIPE_FORMAT_L8A8_UNORM,             L8A8_UNORM},
562      {PIPE_FORMAT_L16_UNORM,              L16_UNORM},
563      {PIPE_FORMAT_UYVY,                   YCRCB_SWAPUVY},
564
565      {PIPE_FORMAT_L8_SRGB,                L8_UNORM_SRGB},
566      {PIPE_FORMAT_L8A8_SRGB,              L8A8_UNORM_SRGB},
567
568      {PIPE_FORMAT_DXT1_RGBA,              BC1_UNORM},
569      {PIPE_FORMAT_DXT3_RGBA,              BC2_UNORM},
570      {PIPE_FORMAT_DXT5_RGBA,              BC3_UNORM},
571
572      {PIPE_FORMAT_DXT1_SRGBA,             BC1_UNORM_SRGB},
573      {PIPE_FORMAT_DXT3_SRGBA,             BC2_UNORM_SRGB},
574      {PIPE_FORMAT_DXT5_SRGBA,             BC3_UNORM_SRGB},
575
576      {PIPE_FORMAT_RGTC1_UNORM,            BC4_UNORM},
577      {PIPE_FORMAT_RGTC1_SNORM,            BC4_SNORM},
578      {PIPE_FORMAT_RGTC2_UNORM,            BC5_UNORM},
579      {PIPE_FORMAT_RGTC2_SNORM,            BC5_SNORM},
580
581      {PIPE_FORMAT_L16A16_UNORM,           L16A16_UNORM},
582      {PIPE_FORMAT_I16_UNORM,              I16_UNORM},
583      {PIPE_FORMAT_L16_FLOAT,              L16_FLOAT},
584      {PIPE_FORMAT_L16A16_FLOAT,           L16A16_FLOAT},
585      {PIPE_FORMAT_I16_FLOAT,              I16_FLOAT},
586      {PIPE_FORMAT_L32_FLOAT,              L32_FLOAT},
587      {PIPE_FORMAT_L32A32_FLOAT,           L32A32_FLOAT},
588      {PIPE_FORMAT_I32_FLOAT,              I32_FLOAT},
589
590      {PIPE_FORMAT_I8_UINT,                I8_UINT},
591      {PIPE_FORMAT_L8_UINT,                L8_UINT},
592      {PIPE_FORMAT_L8A8_UINT,              L8A8_UINT},
593
594      {PIPE_FORMAT_I8_SINT,                I8_SINT},
595      {PIPE_FORMAT_L8_SINT,                L8_SINT},
596      {PIPE_FORMAT_L8A8_SINT,              L8A8_SINT},
597
598      */
599   };
600
601   auto it = mesa2swr.find(format);
602   if (it == mesa2swr.end())
603      return (SWR_FORMAT)-1;
604   else
605      return it->second;
606}
607
608static bool
609swr_displaytarget_layout(struct swr_screen *screen, struct swr_resource *res)
610{
611   struct sw_winsys *winsys = screen->winsys;
612   struct sw_displaytarget *dt;
613
614   const unsigned width = align(res->swr.width, res->swr.halign);
615   const unsigned height = align(res->swr.height, res->swr.valign);
616
617   UINT stride;
618   dt = winsys->displaytarget_create(winsys,
619                                     res->base.bind,
620                                     res->base.format,
621                                     width, height,
622                                     64, NULL,
623                                     &stride);
624
625   if (dt == NULL)
626      return false;
627
628   void *map = winsys->displaytarget_map(winsys, dt, 0);
629
630   res->display_target = dt;
631   res->swr.xpBaseAddress = (gfxptr_t)map;
632
633   /* Clear the display target surface */
634   if (map)
635      memset(map, 0, height * stride);
636
637   winsys->displaytarget_unmap(winsys, dt);
638
639   return true;
640}
641
642static bool
643swr_texture_layout(struct swr_screen *screen,
644                   struct swr_resource *res,
645                   bool allocate)
646{
647   struct pipe_resource *pt = &res->base;
648
649   pipe_format fmt = pt->format;
650   const struct util_format_description *desc = util_format_description(fmt);
651
652   res->has_depth = util_format_has_depth(desc);
653   res->has_stencil = util_format_has_stencil(desc);
654
655   if (res->has_stencil && !res->has_depth)
656      fmt = PIPE_FORMAT_R8_UINT;
657
658   /* We always use the SWR layout. For 2D and 3D textures this looks like:
659    *
660    * |<------- pitch ------->|
661    * +=======================+-------
662    * |Array 0                |   ^
663    * |                       |   |
664    * |        Level 0        |   |
665    * |                       |   |
666    * |                       | qpitch
667    * +-----------+-----------+   |
668    * |           | L2L2L2L2  |   |
669    * |  Level 1  | L3L3      |   |
670    * |           | L4        |   v
671    * +===========+===========+-------
672    * |Array 1                |
673    * |                       |
674    * |        Level 0        |
675    * |                       |
676    * |                       |
677    * +-----------+-----------+
678    * |           | L2L2L2L2  |
679    * |  Level 1  | L3L3      |
680    * |           | L4        |
681    * +===========+===========+
682    *
683    * The overall width in bytes is known as the pitch, while the overall
684    * height in rows is the qpitch. Array slices are laid out logically below
685    * one another, qpitch rows apart. For 3D surfaces, the "level" values are
686    * just invalid for the higher array numbers (since depth is also
687    * minified). 1D and 1D array surfaces are stored effectively the same way,
688    * except that pitch never plays into it. All the levels are logically
689    * adjacent to each other on the X axis. The qpitch becomes the number of
690    * elements between array slices, while the pitch is unused.
691    *
692    * Each level's sizes are subject to the valign and halign settings of the
693    * surface. For compressed formats that swr is unaware of, we will use an
694    * appropriately-sized uncompressed format, and scale the widths/heights.
695    *
696    * This surface is stored inside res->swr. For depth/stencil textures,
697    * res->secondary will have an identically-laid-out but R8_UINT-formatted
698    * stencil tree. In the Z32F_S8 case, the primary surface still has 64-bpp
699    * texels, to simplify map/unmap logic which copies the stencil values
700    * in/out.
701    */
702
703   res->swr.width = pt->width0;
704   res->swr.height = pt->height0;
705   res->swr.type = swr_convert_target_type(pt->target);
706   res->swr.tileMode = SWR_TILE_NONE;
707   res->swr.format = mesa_to_swr_format(fmt);
708   res->swr.numSamples = std::max(1u, pt->nr_samples);
709
710   if (pt->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)) {
711      res->swr.halign = KNOB_MACROTILE_X_DIM;
712      res->swr.valign = KNOB_MACROTILE_Y_DIM;
713
714      /* If SWR_MSAA_FORCE_ENABLE is set, turn on MSAA and override requested
715       * surface sample count. */
716      if (screen->msaa_force_enable) {
717         res->swr.numSamples = screen->msaa_max_count;
718         swr_print_info("swr_texture_layout: forcing sample count: %d\n",
719                 res->swr.numSamples);
720      }
721   } else {
722      res->swr.halign = 1;
723      res->swr.valign = 1;
724   }
725
726   unsigned halign = res->swr.halign * util_format_get_blockwidth(fmt);
727   unsigned width = align(pt->width0, halign);
728   if (pt->target == PIPE_TEXTURE_1D || pt->target == PIPE_TEXTURE_1D_ARRAY) {
729      for (int level = 1; level <= pt->last_level; level++)
730         width += align(u_minify(pt->width0, level), halign);
731      res->swr.pitch = util_format_get_blocksize(fmt);
732      res->swr.qpitch = util_format_get_nblocksx(fmt, width);
733   } else {
734      // The pitch is the overall width of the texture in bytes. Most of the
735      // time this is the pitch of level 0 since all the other levels fit
736      // underneath it. However in some degenerate situations, the width of
737      // level1 + level2 may be larger. In that case, we use those
738      // widths. This can happen if, e.g. halign is 32, and the width of level
739      // 0 is 32 or less. In that case, the aligned levels 1 and 2 will also
740      // be 32 each, adding up to 64.
741      unsigned valign = res->swr.valign * util_format_get_blockheight(fmt);
742      if (pt->last_level > 1) {
743         width = std::max<uint32_t>(
744               width,
745               align(u_minify(pt->width0, 1), halign) +
746               align(u_minify(pt->width0, 2), halign));
747      }
748      res->swr.pitch = util_format_get_stride(fmt, width);
749
750      // The qpitch is controlled by either the height of the second LOD, or
751      // the combination of all the later LODs.
752      unsigned height = align(pt->height0, valign);
753      if (pt->last_level == 1) {
754         height += align(u_minify(pt->height0, 1), valign);
755      } else if (pt->last_level > 1) {
756         unsigned level1 = align(u_minify(pt->height0, 1), valign);
757         unsigned level2 = 0;
758         for (int level = 2; level <= pt->last_level; level++) {
759            level2 += align(u_minify(pt->height0, level), valign);
760         }
761         height += std::max(level1, level2);
762      }
763      res->swr.qpitch = util_format_get_nblocksy(fmt, height);
764   }
765
766   if (pt->target == PIPE_TEXTURE_3D)
767      res->swr.depth = pt->depth0;
768   else
769      res->swr.depth = pt->array_size;
770
771   // Fix up swr format if necessary so that LOD offset computation works
772   if (res->swr.format == (SWR_FORMAT)-1) {
773      switch (util_format_get_blocksize(fmt)) {
774      default:
775         unreachable("Unexpected format block size");
776      case 1: res->swr.format = R8_UINT; break;
777      case 2: res->swr.format = R16_UINT; break;
778      case 4: res->swr.format = R32_UINT; break;
779      case 8:
780         if (util_format_is_compressed(fmt))
781            res->swr.format = BC4_UNORM;
782         else
783            res->swr.format = R32G32_UINT;
784         break;
785      case 16:
786         if (util_format_is_compressed(fmt))
787            res->swr.format = BC5_UNORM;
788         else
789            res->swr.format = R32G32B32A32_UINT;
790         break;
791      }
792   }
793
794   for (int level = 0; level <= pt->last_level; level++) {
795      res->mip_offsets[level] =
796         ComputeSurfaceOffset<false>(0, 0, 0, 0, 0, level, &res->swr);
797   }
798
799   size_t total_size = (uint64_t)res->swr.depth * res->swr.qpitch *
800                                 res->swr.pitch * res->swr.numSamples;
801
802   // Let non-sampled textures (e.g. buffer objects) bypass the size limit
803   if (swr_resource_is_texture(&res->base) && total_size > SWR_MAX_TEXTURE_SIZE)
804      return false;
805
806   if (allocate) {
807      res->swr.xpBaseAddress = (gfxptr_t)AlignedMalloc(total_size, 64);
808      if (!res->swr.xpBaseAddress)
809         return false;
810
811      if (res->has_depth && res->has_stencil) {
812         res->secondary = res->swr;
813         res->secondary.format = R8_UINT;
814         res->secondary.pitch = res->swr.pitch / util_format_get_blocksize(fmt);
815
816         for (int level = 0; level <= pt->last_level; level++) {
817            res->secondary_mip_offsets[level] =
818               ComputeSurfaceOffset<false>(0, 0, 0, 0, 0, level, &res->secondary);
819         }
820
821         total_size = res->secondary.depth * res->secondary.qpitch *
822                      res->secondary.pitch * res->secondary.numSamples;
823
824         res->secondary.xpBaseAddress = (gfxptr_t) AlignedMalloc(total_size, 64);
825         if (!res->secondary.xpBaseAddress) {
826            AlignedFree((void *)res->swr.xpBaseAddress);
827            return false;
828         }
829      }
830   }
831
832   return true;
833}
834
835static bool
836swr_can_create_resource(struct pipe_screen *screen,
837                        const struct pipe_resource *templat)
838{
839   struct swr_resource res;
840   memset(&res, 0, sizeof(res));
841   res.base = *templat;
842   return swr_texture_layout(swr_screen(screen), &res, false);
843}
844
845/* Helper function that conditionally creates a single-sample resolve resource
846 * and attaches it to main multisample resource. */
847static bool
848swr_create_resolve_resource(struct pipe_screen *_screen,
849                            struct swr_resource *msaa_res)
850{
851   struct swr_screen *screen = swr_screen(_screen);
852
853   /* If resource is multisample, create a single-sample resolve resource */
854   if (msaa_res->base.nr_samples > 1 || (screen->msaa_force_enable &&
855            !(msaa_res->base.flags & SWR_RESOURCE_FLAG_ALT_SURFACE))) {
856
857      /* Create a single-sample copy of the resource.  Copy the original
858       * resource parameters and set flag to prevent recursion when re-calling
859       * resource_create */
860      struct pipe_resource alt_template = msaa_res->base;
861      alt_template.nr_samples = 0;
862      alt_template.flags |= SWR_RESOURCE_FLAG_ALT_SURFACE;
863
864      /* Note: Display_target is a special single-sample resource, only the
865       * display_target has been created already. */
866      if (msaa_res->base.bind & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT
867               | PIPE_BIND_SHARED)) {
868         /* Allocate the multisample buffers. */
869         if (!swr_texture_layout(screen, msaa_res, true))
870            return false;
871
872         /* Alt resource will only be bound as PIPE_BIND_RENDER_TARGET
873          * remove the DISPLAY_TARGET, SCANOUT, and SHARED bindings */
874         alt_template.bind = PIPE_BIND_RENDER_TARGET;
875      }
876
877      /* Allocate single-sample resolve surface */
878      struct pipe_resource *alt;
879      alt = _screen->resource_create(_screen, &alt_template);
880      if (!alt)
881         return false;
882
883      /* Attach it to the multisample resource */
884      msaa_res->resolve_target = alt;
885
886      /* Hang resolve surface state off the multisample surface state to so
887       * StoreTiles knows where to resolve the surface. */
888      msaa_res->swr.xpAuxBaseAddress = (gfxptr_t)&swr_resource(alt)->swr;
889   }
890
891   return true; /* success */
892}
893
894static struct pipe_resource *
895swr_resource_create(struct pipe_screen *_screen,
896                    const struct pipe_resource *templat)
897{
898   struct swr_screen *screen = swr_screen(_screen);
899   struct swr_resource *res = CALLOC_STRUCT(swr_resource);
900   if (!res)
901      return NULL;
902
903   res->base = *templat;
904   pipe_reference_init(&res->base.reference, 1);
905   res->base.screen = &screen->base;
906
907   if (swr_resource_is_texture(&res->base)) {
908      if (res->base.bind & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT
909                            | PIPE_BIND_SHARED)) {
910         /* displayable surface
911          * first call swr_texture_layout without allocating to finish
912          * filling out the SWR_SURFACE_STATE in res */
913         swr_texture_layout(screen, res, false);
914         if (!swr_displaytarget_layout(screen, res))
915            goto fail;
916      } else {
917         /* texture map */
918         if (!swr_texture_layout(screen, res, true))
919            goto fail;
920      }
921
922      /* If resource was multisample, create resolve resource and attach
923       * it to multisample resource. */
924      if (!swr_create_resolve_resource(_screen, res))
925            goto fail;
926
927   } else {
928      /* other data (vertex buffer, const buffer, etc) */
929      assert(util_format_get_blocksize(templat->format) == 1);
930      assert(templat->height0 == 1);
931      assert(templat->depth0 == 1);
932      assert(templat->last_level == 0);
933
934      /* Easiest to just call swr_texture_layout, as it sets up
935       * SWR_SURFACE_STATE in res */
936      if (!swr_texture_layout(screen, res, true))
937         goto fail;
938   }
939
940   return &res->base;
941
942fail:
943   FREE(res);
944   return NULL;
945}
946
947static void
948swr_resource_destroy(struct pipe_screen *p_screen, struct pipe_resource *pt)
949{
950   struct swr_screen *screen = swr_screen(p_screen);
951   struct swr_resource *spr = swr_resource(pt);
952
953   if (spr->display_target) {
954      /* If resource is display target, winsys manages the buffer and will
955       * free it on displaytarget_destroy. */
956      swr_fence_finish(p_screen, NULL, screen->flush_fence, 0);
957
958      struct sw_winsys *winsys = screen->winsys;
959      winsys->displaytarget_destroy(winsys, spr->display_target);
960
961      if (spr->swr.numSamples > 1) {
962         /* Free an attached resolve resource */
963         struct swr_resource *alt = swr_resource(spr->resolve_target);
964         swr_fence_work_free(screen->flush_fence, (void*)(alt->swr.xpBaseAddress), true);
965
966         /* Free multisample buffer */
967         swr_fence_work_free(screen->flush_fence, (void*)(spr->swr.xpBaseAddress), true);
968      }
969   } else {
970      /* For regular resources, defer deletion */
971      swr_resource_unused(pt);
972
973      if (spr->swr.numSamples > 1) {
974         /* Free an attached resolve resource */
975         struct swr_resource *alt = swr_resource(spr->resolve_target);
976         swr_fence_work_free(screen->flush_fence, (void*)(alt->swr.xpBaseAddress), true);
977      }
978
979      swr_fence_work_free(screen->flush_fence, (void*)(spr->swr.xpBaseAddress), true);
980      swr_fence_work_free(screen->flush_fence,
981                          (void*)(spr->secondary.xpBaseAddress), true);
982
983      /* If work queue grows too large, submit a fence to force queue to
984       * drain.  This is mainly to decrease the amount of memory used by the
985       * piglit streaming-texture-leak test */
986      if (screen->pipe && swr_fence(screen->flush_fence)->work.count > 64)
987         swr_fence_submit(swr_context(screen->pipe), screen->flush_fence);
988   }
989
990   FREE(spr);
991}
992
993
994static void
995swr_flush_frontbuffer(struct pipe_screen *p_screen,
996                      struct pipe_context *pipe,
997                      struct pipe_resource *resource,
998                      unsigned level,
999                      unsigned layer,
1000                      void *context_private,
1001                      struct pipe_box *sub_box)
1002{
1003   struct swr_screen *screen = swr_screen(p_screen);
1004   struct sw_winsys *winsys = screen->winsys;
1005   struct swr_resource *spr = swr_resource(resource);
1006   struct swr_context *ctx = swr_context(pipe);
1007
1008   if (pipe) {
1009      swr_fence_finish(p_screen, NULL, screen->flush_fence, 0);
1010      swr_resource_unused(resource);
1011      ctx->api.pfnSwrEndFrame(ctx->swrContext);
1012   }
1013
1014   /* Multisample resolved into resolve_target at flush with store_resource */
1015   if (pipe && spr->swr.numSamples > 1) {
1016      struct pipe_resource *resolve_target = spr->resolve_target;
1017
1018      /* Once resolved, copy into display target */
1019      SWR_SURFACE_STATE *resolve = &swr_resource(resolve_target)->swr;
1020
1021      void *map = winsys->displaytarget_map(winsys, spr->display_target,
1022                                            PIPE_MAP_WRITE);
1023      memcpy(map, (void*)(resolve->xpBaseAddress), resolve->pitch * resolve->height);
1024      winsys->displaytarget_unmap(winsys, spr->display_target);
1025   }
1026
1027   debug_assert(spr->display_target);
1028   if (spr->display_target)
1029      winsys->displaytarget_display(
1030         winsys, spr->display_target, context_private, sub_box);
1031}
1032
1033
1034void
1035swr_destroy_screen_internal(struct swr_screen **screen)
1036{
1037   struct pipe_screen *p_screen = &(*screen)->base;
1038
1039   swr_fence_finish(p_screen, NULL, (*screen)->flush_fence, 0);
1040   swr_fence_reference(p_screen, &(*screen)->flush_fence, NULL);
1041
1042   JitDestroyContext((*screen)->hJitMgr);
1043
1044   if ((*screen)->pLibrary)
1045      util_dl_close((*screen)->pLibrary);
1046
1047   FREE(*screen);
1048   *screen = NULL;
1049}
1050
1051
1052static void
1053swr_destroy_screen(struct pipe_screen *p_screen)
1054{
1055   struct swr_screen *screen = swr_screen(p_screen);
1056   struct sw_winsys *winsys = screen->winsys;
1057
1058   swr_print_info("SWR destroy screen!\n");
1059
1060   if (winsys->destroy)
1061      winsys->destroy(winsys);
1062
1063   swr_destroy_screen_internal(&screen);
1064}
1065
1066
1067static void
1068swr_validate_env_options(struct swr_screen *screen)
1069{
1070   /* The client_copy_limit sets a maximum on the amount of user-buffer memory
1071    * copied to scratch space on a draw.  Past this, the draw will access
1072    * user-buffer directly and then block.  This is faster than queuing many
1073    * large client draws. */
1074   screen->client_copy_limit = SWR_CLIENT_COPY_LIMIT;
1075   int client_copy_limit =
1076      debug_get_num_option("SWR_CLIENT_COPY_LIMIT", SWR_CLIENT_COPY_LIMIT);
1077   if (client_copy_limit > 0)
1078      screen->client_copy_limit = client_copy_limit;
1079
1080   /* XXX msaa under development, disable by default for now */
1081   screen->msaa_max_count = 1; /* was SWR_MAX_NUM_MULTISAMPLES; */
1082
1083   /* validate env override values, within range and power of 2 */
1084   int msaa_max_count = debug_get_num_option("SWR_MSAA_MAX_COUNT", 1);
1085   if (msaa_max_count != 1) {
1086      if ((msaa_max_count < 1) || (msaa_max_count > SWR_MAX_NUM_MULTISAMPLES)
1087            || !util_is_power_of_two_or_zero(msaa_max_count)) {
1088         fprintf(stderr, "SWR_MSAA_MAX_COUNT invalid: %d\n", msaa_max_count);
1089         fprintf(stderr, "must be power of 2 between 1 and %d" \
1090                         " (or 1 to disable msaa)\n",
1091               SWR_MAX_NUM_MULTISAMPLES);
1092         fprintf(stderr, "(msaa disabled)\n");
1093         msaa_max_count = 1;
1094      }
1095
1096      swr_print_info("SWR_MSAA_MAX_COUNT: %d\n", msaa_max_count);
1097
1098      screen->msaa_max_count = msaa_max_count;
1099   }
1100
1101   screen->msaa_force_enable = debug_get_bool_option(
1102         "SWR_MSAA_FORCE_ENABLE", false);
1103   if (screen->msaa_force_enable)
1104      swr_print_info("SWR_MSAA_FORCE_ENABLE: true\n");
1105}
1106
1107
1108struct pipe_screen *
1109swr_create_screen_internal(struct sw_winsys *winsys)
1110{
1111   struct swr_screen *screen = CALLOC_STRUCT(swr_screen);
1112
1113   if (!screen)
1114      return NULL;
1115
1116   if (!lp_build_init()) {
1117      FREE(screen);
1118      return NULL;
1119   }
1120
1121   screen->winsys = winsys;
1122   screen->base.get_name = swr_get_name;
1123   screen->base.get_vendor = swr_get_vendor;
1124   screen->base.is_format_supported = swr_is_format_supported;
1125   screen->base.context_create = swr_create_context;
1126   screen->base.can_create_resource = swr_can_create_resource;
1127
1128   screen->base.destroy = swr_destroy_screen;
1129   screen->base.get_param = swr_get_param;
1130   screen->base.get_shader_param = swr_get_shader_param;
1131   screen->base.get_paramf = swr_get_paramf;
1132
1133   screen->base.resource_create = swr_resource_create;
1134   screen->base.resource_destroy = swr_resource_destroy;
1135
1136   screen->base.flush_frontbuffer = swr_flush_frontbuffer;
1137
1138   // Pass in "" for architecture for run-time determination
1139   screen->hJitMgr = JitCreateContext(KNOB_SIMD_WIDTH, "", "swr");
1140
1141   swr_fence_init(&screen->base);
1142
1143   swr_validate_env_options(screen);
1144
1145   return &screen->base;
1146}
1147