101e04c3fSmrg/* 201e04c3fSmrg * Copyright 2007 VMware, Inc. 301e04c3fSmrg * Copyright 2016 Advanced Micro Devices, Inc. 401e04c3fSmrg * 501e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 601e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 701e04c3fSmrg * to deal in the Software without restriction, including without limitation 801e04c3fSmrg * on the rights to use, copy, modify, merge, publish, distribute, sub 901e04c3fSmrg * license, and/or sell copies of the Software, and to permit persons to whom 1001e04c3fSmrg * the Software is furnished to do so, subject to the following conditions: 1101e04c3fSmrg * 1201e04c3fSmrg * The above copyright notice and this permission notice (including the next 1301e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1401e04c3fSmrg * Software. 1501e04c3fSmrg * 1601e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1701e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1801e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 1901e04c3fSmrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 2001e04c3fSmrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 2101e04c3fSmrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 2201e04c3fSmrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 2301e04c3fSmrg */ 2401e04c3fSmrg 2501e04c3fSmrg/** 2601e04c3fSmrg * \file 2701e04c3fSmrg * 2801e04c3fSmrg * Common helper functions for PBO up- and downloads. 2901e04c3fSmrg */ 3001e04c3fSmrg 3101e04c3fSmrg#include "state_tracker/st_context.h" 32b9abf16eSmaya#include "state_tracker/st_nir.h" 3301e04c3fSmrg#include "state_tracker/st_pbo.h" 3401e04c3fSmrg#include "state_tracker/st_cb_bufferobjects.h" 3501e04c3fSmrg 3601e04c3fSmrg#include "pipe/p_context.h" 3701e04c3fSmrg#include "pipe/p_defines.h" 3801e04c3fSmrg#include "pipe/p_screen.h" 3901e04c3fSmrg#include "cso_cache/cso_context.h" 4001e04c3fSmrg#include "tgsi/tgsi_ureg.h" 417ec681f3Smrg#include "util/format/u_format.h" 4201e04c3fSmrg#include "util/u_inlines.h" 4301e04c3fSmrg#include "util/u_upload_mgr.h" 4401e04c3fSmrg 45b9abf16eSmaya#include "compiler/nir/nir_builder.h" 46b9abf16eSmaya 4701e04c3fSmrg/* Conversion to apply in the fragment shader. */ 4801e04c3fSmrgenum st_pbo_conversion { 497ec681f3Smrg ST_PBO_CONVERT_FLOAT = 0, 507ec681f3Smrg ST_PBO_CONVERT_UINT, 517ec681f3Smrg ST_PBO_CONVERT_SINT, 5201e04c3fSmrg ST_PBO_CONVERT_UINT_TO_SINT, 5301e04c3fSmrg ST_PBO_CONVERT_SINT_TO_UINT, 5401e04c3fSmrg 5501e04c3fSmrg ST_NUM_PBO_CONVERSIONS 5601e04c3fSmrg}; 5701e04c3fSmrg 5801e04c3fSmrg/* Final setup of buffer addressing information. 5901e04c3fSmrg * 6001e04c3fSmrg * buf_offset is in pixels. 6101e04c3fSmrg * 6201e04c3fSmrg * Returns false if something (e.g. alignment) prevents PBO upload/download. 6301e04c3fSmrg */ 6401e04c3fSmrgbool 6501e04c3fSmrgst_pbo_addresses_setup(struct st_context *st, 6601e04c3fSmrg struct pipe_resource *buf, intptr_t buf_offset, 6701e04c3fSmrg struct st_pbo_addresses *addr) 6801e04c3fSmrg{ 6901e04c3fSmrg unsigned skip_pixels; 7001e04c3fSmrg 7101e04c3fSmrg /* Check alignment against texture buffer requirements. */ 7201e04c3fSmrg { 7301e04c3fSmrg unsigned ofs = (buf_offset * addr->bytes_per_pixel) % st->ctx->Const.TextureBufferOffsetAlignment; 7401e04c3fSmrg if (ofs != 0) { 7501e04c3fSmrg if (ofs % addr->bytes_per_pixel != 0) 7601e04c3fSmrg return false; 7701e04c3fSmrg 7801e04c3fSmrg skip_pixels = ofs / addr->bytes_per_pixel; 7901e04c3fSmrg buf_offset -= skip_pixels; 8001e04c3fSmrg } else { 8101e04c3fSmrg skip_pixels = 0; 8201e04c3fSmrg } 8301e04c3fSmrg } 8401e04c3fSmrg 8501e04c3fSmrg assert(buf_offset >= 0); 8601e04c3fSmrg 8701e04c3fSmrg addr->buffer = buf; 8801e04c3fSmrg addr->first_element = buf_offset; 8901e04c3fSmrg addr->last_element = buf_offset + skip_pixels + addr->width - 1 9001e04c3fSmrg + (addr->height - 1 + (addr->depth - 1) * addr->image_height) * addr->pixels_per_row; 9101e04c3fSmrg 9201e04c3fSmrg if (addr->last_element - addr->first_element > st->ctx->Const.MaxTextureBufferSize - 1) 9301e04c3fSmrg return false; 9401e04c3fSmrg 9501e04c3fSmrg /* This should be ensured by Mesa before calling our callbacks */ 9601e04c3fSmrg assert((addr->last_element + 1) * addr->bytes_per_pixel <= buf->width0); 9701e04c3fSmrg 9801e04c3fSmrg addr->constants.xoffset = -addr->xoffset + skip_pixels; 9901e04c3fSmrg addr->constants.yoffset = -addr->yoffset; 10001e04c3fSmrg addr->constants.stride = addr->pixels_per_row; 10101e04c3fSmrg addr->constants.image_size = addr->pixels_per_row * addr->image_height; 10201e04c3fSmrg addr->constants.layer_offset = 0; 10301e04c3fSmrg 10401e04c3fSmrg return true; 10501e04c3fSmrg} 10601e04c3fSmrg 10701e04c3fSmrg/* Validate and fill buffer addressing information based on GL pixelstore 10801e04c3fSmrg * attributes. 10901e04c3fSmrg * 11001e04c3fSmrg * Returns false if some aspect of the addressing (e.g. alignment) prevents 11101e04c3fSmrg * PBO upload/download. 11201e04c3fSmrg */ 11301e04c3fSmrgbool 11401e04c3fSmrgst_pbo_addresses_pixelstore(struct st_context *st, 11501e04c3fSmrg GLenum gl_target, bool skip_images, 11601e04c3fSmrg const struct gl_pixelstore_attrib *store, 11701e04c3fSmrg const void *pixels, 11801e04c3fSmrg struct st_pbo_addresses *addr) 11901e04c3fSmrg{ 12001e04c3fSmrg struct pipe_resource *buf = st_buffer_object(store->BufferObj)->buffer; 12101e04c3fSmrg intptr_t buf_offset = (intptr_t) pixels; 12201e04c3fSmrg 12301e04c3fSmrg if (buf_offset % addr->bytes_per_pixel) 12401e04c3fSmrg return false; 12501e04c3fSmrg 12601e04c3fSmrg /* Convert to texels */ 12701e04c3fSmrg buf_offset = buf_offset / addr->bytes_per_pixel; 12801e04c3fSmrg 12901e04c3fSmrg /* Determine image height */ 13001e04c3fSmrg if (gl_target == GL_TEXTURE_1D_ARRAY) { 13101e04c3fSmrg addr->image_height = 1; 13201e04c3fSmrg } else { 13301e04c3fSmrg addr->image_height = store->ImageHeight > 0 ? store->ImageHeight : addr->height; 13401e04c3fSmrg } 13501e04c3fSmrg 13601e04c3fSmrg /* Compute the stride, taking store->Alignment into account */ 13701e04c3fSmrg { 13801e04c3fSmrg unsigned pixels_per_row = store->RowLength > 0 ? 13901e04c3fSmrg store->RowLength : addr->width; 14001e04c3fSmrg unsigned bytes_per_row = pixels_per_row * addr->bytes_per_pixel; 14101e04c3fSmrg unsigned remainder = bytes_per_row % store->Alignment; 14201e04c3fSmrg unsigned offset_rows; 14301e04c3fSmrg 14401e04c3fSmrg if (remainder > 0) 14501e04c3fSmrg bytes_per_row += store->Alignment - remainder; 14601e04c3fSmrg 14701e04c3fSmrg if (bytes_per_row % addr->bytes_per_pixel) 14801e04c3fSmrg return false; 14901e04c3fSmrg 15001e04c3fSmrg addr->pixels_per_row = bytes_per_row / addr->bytes_per_pixel; 15101e04c3fSmrg 15201e04c3fSmrg offset_rows = store->SkipRows; 15301e04c3fSmrg if (skip_images) 15401e04c3fSmrg offset_rows += addr->image_height * store->SkipImages; 15501e04c3fSmrg 15601e04c3fSmrg buf_offset += store->SkipPixels + addr->pixels_per_row * offset_rows; 15701e04c3fSmrg } 15801e04c3fSmrg 15901e04c3fSmrg if (!st_pbo_addresses_setup(st, buf, buf_offset, addr)) 16001e04c3fSmrg return false; 16101e04c3fSmrg 16201e04c3fSmrg /* Support GL_PACK_INVERT_MESA */ 16301e04c3fSmrg if (store->Invert) { 16401e04c3fSmrg addr->constants.xoffset += (addr->height - 1) * addr->constants.stride; 16501e04c3fSmrg addr->constants.stride = -addr->constants.stride; 16601e04c3fSmrg } 16701e04c3fSmrg 16801e04c3fSmrg return true; 16901e04c3fSmrg} 17001e04c3fSmrg 17101e04c3fSmrg/* For download from a framebuffer, we may have to invert the Y axis. The 17201e04c3fSmrg * setup is as follows: 17301e04c3fSmrg * - set viewport to inverted, so that the position sysval is correct for 17401e04c3fSmrg * texel fetches 17501e04c3fSmrg * - this function adjusts the fragment shader's constant buffer to compute 17601e04c3fSmrg * the correct destination addresses. 17701e04c3fSmrg */ 17801e04c3fSmrgvoid 17901e04c3fSmrgst_pbo_addresses_invert_y(struct st_pbo_addresses *addr, 18001e04c3fSmrg unsigned viewport_height) 18101e04c3fSmrg{ 18201e04c3fSmrg addr->constants.xoffset += 18301e04c3fSmrg (viewport_height - 1 + 2 * addr->constants.yoffset) * addr->constants.stride; 18401e04c3fSmrg addr->constants.stride = -addr->constants.stride; 18501e04c3fSmrg} 18601e04c3fSmrg 18701e04c3fSmrg/* Setup all vertex pipeline state, rasterizer state, and fragment shader 18801e04c3fSmrg * constants, and issue the draw call for PBO upload/download. 18901e04c3fSmrg * 19001e04c3fSmrg * The caller is responsible for saving and restoring state, as well as for 19101e04c3fSmrg * setting other fragment shader state (fragment shader, samplers), and 19201e04c3fSmrg * framebuffer/viewport/DSA/blend state. 19301e04c3fSmrg */ 19401e04c3fSmrgbool 19501e04c3fSmrgst_pbo_draw(struct st_context *st, const struct st_pbo_addresses *addr, 19601e04c3fSmrg unsigned surface_width, unsigned surface_height) 19701e04c3fSmrg{ 19801e04c3fSmrg struct cso_context *cso = st->cso_context; 1997ec681f3Smrg struct pipe_context *pipe = st->pipe; 20001e04c3fSmrg 20101e04c3fSmrg /* Setup vertex and geometry shaders */ 20201e04c3fSmrg if (!st->pbo.vs) { 20301e04c3fSmrg st->pbo.vs = st_pbo_create_vs(st); 20401e04c3fSmrg if (!st->pbo.vs) 20501e04c3fSmrg return false; 20601e04c3fSmrg } 20701e04c3fSmrg 20801e04c3fSmrg if (addr->depth != 1 && st->pbo.use_gs && !st->pbo.gs) { 20901e04c3fSmrg st->pbo.gs = st_pbo_create_gs(st); 21001e04c3fSmrg if (!st->pbo.gs) 21101e04c3fSmrg return false; 21201e04c3fSmrg } 21301e04c3fSmrg 21401e04c3fSmrg cso_set_vertex_shader_handle(cso, st->pbo.vs); 21501e04c3fSmrg 21601e04c3fSmrg cso_set_geometry_shader_handle(cso, addr->depth != 1 ? st->pbo.gs : NULL); 21701e04c3fSmrg 21801e04c3fSmrg cso_set_tessctrl_shader_handle(cso, NULL); 21901e04c3fSmrg 22001e04c3fSmrg cso_set_tesseval_shader_handle(cso, NULL); 22101e04c3fSmrg 22201e04c3fSmrg /* Upload vertices */ 22301e04c3fSmrg { 22401e04c3fSmrg struct pipe_vertex_buffer vbo = {0}; 2257ec681f3Smrg struct cso_velems_state velem; 22601e04c3fSmrg 22701e04c3fSmrg float x0 = (float) addr->xoffset / surface_width * 2.0f - 1.0f; 22801e04c3fSmrg float y0 = (float) addr->yoffset / surface_height * 2.0f - 1.0f; 22901e04c3fSmrg float x1 = (float) (addr->xoffset + addr->width) / surface_width * 2.0f - 1.0f; 23001e04c3fSmrg float y1 = (float) (addr->yoffset + addr->height) / surface_height * 2.0f - 1.0f; 23101e04c3fSmrg 23201e04c3fSmrg float *verts = NULL; 23301e04c3fSmrg 23401e04c3fSmrg vbo.stride = 2 * sizeof(float); 23501e04c3fSmrg 23601e04c3fSmrg u_upload_alloc(st->pipe->stream_uploader, 0, 8 * sizeof(float), 4, 23701e04c3fSmrg &vbo.buffer_offset, &vbo.buffer.resource, (void **) &verts); 23801e04c3fSmrg if (!verts) 23901e04c3fSmrg return false; 24001e04c3fSmrg 24101e04c3fSmrg verts[0] = x0; 24201e04c3fSmrg verts[1] = y0; 24301e04c3fSmrg verts[2] = x0; 24401e04c3fSmrg verts[3] = y1; 24501e04c3fSmrg verts[4] = x1; 24601e04c3fSmrg verts[5] = y0; 24701e04c3fSmrg verts[6] = x1; 24801e04c3fSmrg verts[7] = y1; 24901e04c3fSmrg 25001e04c3fSmrg u_upload_unmap(st->pipe->stream_uploader); 25101e04c3fSmrg 2527ec681f3Smrg velem.count = 1; 2537ec681f3Smrg velem.velems[0].src_offset = 0; 2547ec681f3Smrg velem.velems[0].instance_divisor = 0; 2557ec681f3Smrg velem.velems[0].vertex_buffer_index = 0; 2567ec681f3Smrg velem.velems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; 2577ec681f3Smrg velem.velems[0].dual_slot = false; 25801e04c3fSmrg 2597ec681f3Smrg cso_set_vertex_elements(cso, &velem); 26001e04c3fSmrg 2617ec681f3Smrg cso_set_vertex_buffers(cso, 0, 1, &vbo); 2627ec681f3Smrg st->last_num_vbuffers = MAX2(st->last_num_vbuffers, 1); 26301e04c3fSmrg 26401e04c3fSmrg pipe_resource_reference(&vbo.buffer.resource, NULL); 26501e04c3fSmrg } 26601e04c3fSmrg 26701e04c3fSmrg /* Upload constants */ 26801e04c3fSmrg { 26901e04c3fSmrg struct pipe_constant_buffer cb; 27001e04c3fSmrg 27101e04c3fSmrg cb.buffer = NULL; 27201e04c3fSmrg cb.user_buffer = &addr->constants; 27301e04c3fSmrg cb.buffer_offset = 0; 27401e04c3fSmrg cb.buffer_size = sizeof(addr->constants); 27501e04c3fSmrg 2767ec681f3Smrg pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, false, &cb); 27701e04c3fSmrg 27801e04c3fSmrg pipe_resource_reference(&cb.buffer, NULL); 27901e04c3fSmrg } 28001e04c3fSmrg 28101e04c3fSmrg /* Rasterizer state */ 28201e04c3fSmrg cso_set_rasterizer(cso, &st->pbo.raster); 28301e04c3fSmrg 28401e04c3fSmrg /* Disable stream output */ 28501e04c3fSmrg cso_set_stream_outputs(cso, 0, NULL, 0); 28601e04c3fSmrg 28701e04c3fSmrg if (addr->depth == 1) { 28801e04c3fSmrg cso_draw_arrays(cso, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); 28901e04c3fSmrg } else { 29001e04c3fSmrg cso_draw_arrays_instanced(cso, PIPE_PRIM_TRIANGLE_STRIP, 29101e04c3fSmrg 0, 4, 0, addr->depth); 29201e04c3fSmrg } 29301e04c3fSmrg 29401e04c3fSmrg return true; 29501e04c3fSmrg} 29601e04c3fSmrg 29701e04c3fSmrgvoid * 29801e04c3fSmrgst_pbo_create_vs(struct st_context *st) 29901e04c3fSmrg{ 3007ec681f3Smrg const struct glsl_type *vec4 = glsl_vec4_type(); 3017ec681f3Smrg const nir_shader_compiler_options *options = 3027ec681f3Smrg st_get_nir_compiler_options(st, MESA_SHADER_VERTEX); 303b9abf16eSmaya 3047ec681f3Smrg nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options, 3057ec681f3Smrg "st/pbo VS"); 30601e04c3fSmrg 3077ec681f3Smrg nir_variable *in_pos = nir_variable_create(b.shader, nir_var_shader_in, 3087ec681f3Smrg vec4, "in_pos"); 3097ec681f3Smrg in_pos->data.location = VERT_ATTRIB_POS; 31001e04c3fSmrg 3117ec681f3Smrg nir_variable *out_pos = nir_variable_create(b.shader, nir_var_shader_out, 3127ec681f3Smrg vec4, "out_pos"); 3137ec681f3Smrg out_pos->data.location = VARYING_SLOT_POS; 3147ec681f3Smrg out_pos->data.interpolation = INTERP_MODE_NONE; 31501e04c3fSmrg 3167ec681f3Smrg nir_copy_var(&b, out_pos, in_pos); 31701e04c3fSmrg 31801e04c3fSmrg if (st->pbo.layers) { 3197ec681f3Smrg nir_variable *instance_id = nir_variable_create(b.shader, 3207ec681f3Smrg nir_var_system_value, 3217ec681f3Smrg glsl_int_type(), 3227ec681f3Smrg "instance_id"); 3237ec681f3Smrg instance_id->data.location = SYSTEM_VALUE_INSTANCE_ID; 32401e04c3fSmrg 32501e04c3fSmrg if (st->pbo.use_gs) { 3267ec681f3Smrg unsigned swiz_x[4] = {0, 0, 0, 0}; 3277ec681f3Smrg nir_store_var(&b, out_pos, 3287ec681f3Smrg nir_swizzle(&b, nir_i2f32(&b, nir_load_var(&b, instance_id)), swiz_x, 4), 3297ec681f3Smrg (1 << 2)); 33001e04c3fSmrg } else { 3317ec681f3Smrg nir_variable *out_layer = nir_variable_create(b.shader, 3327ec681f3Smrg nir_var_shader_out, 3337ec681f3Smrg glsl_int_type(), 3347ec681f3Smrg "out_layer"); 3357ec681f3Smrg out_layer->data.location = VARYING_SLOT_LAYER; 3367ec681f3Smrg out_layer->data.interpolation = INTERP_MODE_NONE; 3377ec681f3Smrg nir_copy_var(&b, out_layer, instance_id); 33801e04c3fSmrg } 33901e04c3fSmrg } 34001e04c3fSmrg 3417ec681f3Smrg return st_nir_finish_builtin_shader(st, b.shader); 34201e04c3fSmrg} 34301e04c3fSmrg 34401e04c3fSmrgvoid * 34501e04c3fSmrgst_pbo_create_gs(struct st_context *st) 34601e04c3fSmrg{ 34701e04c3fSmrg static const int zero = 0; 34801e04c3fSmrg struct ureg_program *ureg; 34901e04c3fSmrg struct ureg_dst out_pos; 35001e04c3fSmrg struct ureg_dst out_layer; 35101e04c3fSmrg struct ureg_src in_pos; 35201e04c3fSmrg struct ureg_src imm; 35301e04c3fSmrg unsigned i; 35401e04c3fSmrg 35501e04c3fSmrg ureg = ureg_create(PIPE_SHADER_GEOMETRY); 35601e04c3fSmrg if (!ureg) 35701e04c3fSmrg return NULL; 35801e04c3fSmrg 35901e04c3fSmrg ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, PIPE_PRIM_TRIANGLES); 36001e04c3fSmrg ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, PIPE_PRIM_TRIANGLE_STRIP); 36101e04c3fSmrg ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, 3); 36201e04c3fSmrg 36301e04c3fSmrg out_pos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); 36401e04c3fSmrg out_layer = ureg_DECL_output(ureg, TGSI_SEMANTIC_LAYER, 0); 36501e04c3fSmrg 36601e04c3fSmrg in_pos = ureg_DECL_input(ureg, TGSI_SEMANTIC_POSITION, 0, 0, 1); 36701e04c3fSmrg 36801e04c3fSmrg imm = ureg_DECL_immediate_int(ureg, &zero, 1); 36901e04c3fSmrg 37001e04c3fSmrg for (i = 0; i < 3; ++i) { 37101e04c3fSmrg struct ureg_src in_pos_vertex = ureg_src_dimension(in_pos, i); 37201e04c3fSmrg 37301e04c3fSmrg /* out_pos = in_pos[i] */ 37401e04c3fSmrg ureg_MOV(ureg, out_pos, in_pos_vertex); 37501e04c3fSmrg 37601e04c3fSmrg /* out_layer.x = f2i(in_pos[i].z) */ 37701e04c3fSmrg ureg_F2I(ureg, ureg_writemask(out_layer, TGSI_WRITEMASK_X), 37801e04c3fSmrg ureg_scalar(in_pos_vertex, TGSI_SWIZZLE_Z)); 37901e04c3fSmrg 38001e04c3fSmrg ureg_EMIT(ureg, ureg_scalar(imm, TGSI_SWIZZLE_X)); 38101e04c3fSmrg } 38201e04c3fSmrg 38301e04c3fSmrg ureg_END(ureg); 38401e04c3fSmrg 38501e04c3fSmrg return ureg_create_shader_and_destroy(ureg, st->pipe); 38601e04c3fSmrg} 38701e04c3fSmrg 388b9abf16eSmayastatic const struct glsl_type * 3897ec681f3Smrgsampler_type_for_target(enum pipe_texture_target target, 3907ec681f3Smrg enum st_pbo_conversion conv) 391b9abf16eSmaya{ 392b9abf16eSmaya bool is_array = target >= PIPE_TEXTURE_1D_ARRAY; 393b9abf16eSmaya static const enum glsl_sampler_dim dim[] = { 394b9abf16eSmaya [PIPE_BUFFER] = GLSL_SAMPLER_DIM_BUF, 395b9abf16eSmaya [PIPE_TEXTURE_1D] = GLSL_SAMPLER_DIM_1D, 396b9abf16eSmaya [PIPE_TEXTURE_2D] = GLSL_SAMPLER_DIM_2D, 397b9abf16eSmaya [PIPE_TEXTURE_3D] = GLSL_SAMPLER_DIM_3D, 398b9abf16eSmaya [PIPE_TEXTURE_CUBE] = GLSL_SAMPLER_DIM_CUBE, 399b9abf16eSmaya [PIPE_TEXTURE_RECT] = GLSL_SAMPLER_DIM_RECT, 400b9abf16eSmaya [PIPE_TEXTURE_1D_ARRAY] = GLSL_SAMPLER_DIM_1D, 401b9abf16eSmaya [PIPE_TEXTURE_2D_ARRAY] = GLSL_SAMPLER_DIM_2D, 402b9abf16eSmaya [PIPE_TEXTURE_CUBE_ARRAY] = GLSL_SAMPLER_DIM_CUBE, 403b9abf16eSmaya }; 404b9abf16eSmaya 4057ec681f3Smrg static const enum glsl_base_type type[] = { 4067ec681f3Smrg [ST_PBO_CONVERT_FLOAT] = GLSL_TYPE_FLOAT, 4077ec681f3Smrg [ST_PBO_CONVERT_UINT] = GLSL_TYPE_UINT, 4087ec681f3Smrg [ST_PBO_CONVERT_UINT_TO_SINT] = GLSL_TYPE_UINT, 4097ec681f3Smrg [ST_PBO_CONVERT_SINT] = GLSL_TYPE_INT, 4107ec681f3Smrg [ST_PBO_CONVERT_SINT_TO_UINT] = GLSL_TYPE_INT, 4117ec681f3Smrg }; 4127ec681f3Smrg 4137ec681f3Smrg return glsl_sampler_type(dim[target], false, is_array, type[conv]); 414b9abf16eSmaya} 415b9abf16eSmaya 4167ec681f3Smrg 41701e04c3fSmrgstatic void * 4187ec681f3Smrgcreate_fs(struct st_context *st, bool download, 4197ec681f3Smrg enum pipe_texture_target target, 4207ec681f3Smrg enum st_pbo_conversion conversion, 4217ec681f3Smrg bool need_layer) 422b9abf16eSmaya{ 4237ec681f3Smrg struct pipe_screen *screen = st->screen; 424b9abf16eSmaya const nir_shader_compiler_options *options = 4257ec681f3Smrg st_get_nir_compiler_options(st, MESA_SHADER_FRAGMENT); 426b9abf16eSmaya bool pos_is_sysval = 427b9abf16eSmaya screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL); 428b9abf16eSmaya 4297ec681f3Smrg nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options, 4307ec681f3Smrg download ? 4317ec681f3Smrg "st/pbo download FS" : 4327ec681f3Smrg "st/pbo upload FS"); 433b9abf16eSmaya 434b9abf16eSmaya nir_ssa_def *zero = nir_imm_int(&b, 0); 435b9abf16eSmaya 436b9abf16eSmaya /* param = [ -xoffset + skip_pixels, -yoffset, stride, image_height ] */ 437b9abf16eSmaya nir_variable *param_var = 438b9abf16eSmaya nir_variable_create(b.shader, nir_var_uniform, glsl_vec4_type(), "param"); 439b9abf16eSmaya b.shader->num_uniforms += 4; 440b9abf16eSmaya nir_ssa_def *param = nir_load_var(&b, param_var); 441b9abf16eSmaya 442b9abf16eSmaya nir_variable *fragcoord = 443b9abf16eSmaya nir_variable_create(b.shader, pos_is_sysval ? nir_var_system_value : 444b9abf16eSmaya nir_var_shader_in, glsl_vec4_type(), "gl_FragCoord"); 445b9abf16eSmaya fragcoord->data.location = pos_is_sysval ? SYSTEM_VALUE_FRAG_COORD 446b9abf16eSmaya : VARYING_SLOT_POS; 447b9abf16eSmaya nir_ssa_def *coord = nir_load_var(&b, fragcoord); 448b9abf16eSmaya 449b9abf16eSmaya nir_ssa_def *layer = NULL; 450b9abf16eSmaya if (st->pbo.layers && (!download || target == PIPE_TEXTURE_1D_ARRAY || 451b9abf16eSmaya target == PIPE_TEXTURE_2D_ARRAY || 452b9abf16eSmaya target == PIPE_TEXTURE_3D || 453b9abf16eSmaya target == PIPE_TEXTURE_CUBE || 454b9abf16eSmaya target == PIPE_TEXTURE_CUBE_ARRAY)) { 4557ec681f3Smrg if (need_layer) { 4567ec681f3Smrg nir_variable *var = nir_variable_create(b.shader, nir_var_shader_in, 4577ec681f3Smrg glsl_int_type(), "gl_Layer"); 4587ec681f3Smrg var->data.location = VARYING_SLOT_LAYER; 4597ec681f3Smrg var->data.interpolation = INTERP_MODE_FLAT; 4607ec681f3Smrg layer = nir_load_var(&b, var); 4617ec681f3Smrg } 4627ec681f3Smrg else { 4637ec681f3Smrg layer = zero; 4647ec681f3Smrg } 465b9abf16eSmaya } 466b9abf16eSmaya 467b9abf16eSmaya /* offset_pos = param.xy + f2i(coord.xy) */ 468b9abf16eSmaya nir_ssa_def *offset_pos = 469b9abf16eSmaya nir_iadd(&b, nir_channels(&b, param, TGSI_WRITEMASK_XY), 470b9abf16eSmaya nir_f2i32(&b, nir_channels(&b, coord, TGSI_WRITEMASK_XY))); 471b9abf16eSmaya 472b9abf16eSmaya /* addr = offset_pos.x + offset_pos.y * stride */ 473b9abf16eSmaya nir_ssa_def *pbo_addr = 474b9abf16eSmaya nir_iadd(&b, nir_channel(&b, offset_pos, 0), 475b9abf16eSmaya nir_imul(&b, nir_channel(&b, offset_pos, 1), 476b9abf16eSmaya nir_channel(&b, param, 2))); 477b9abf16eSmaya if (layer) { 478b9abf16eSmaya /* pbo_addr += image_height * layer */ 479b9abf16eSmaya pbo_addr = nir_iadd(&b, pbo_addr, 480b9abf16eSmaya nir_imul(&b, layer, nir_channel(&b, param, 3))); 481b9abf16eSmaya } 482b9abf16eSmaya 483b9abf16eSmaya nir_ssa_def *texcoord; 484b9abf16eSmaya if (download) { 485b9abf16eSmaya texcoord = nir_f2i32(&b, nir_channels(&b, coord, TGSI_WRITEMASK_XY)); 486b9abf16eSmaya 4877ec681f3Smrg if (target == PIPE_TEXTURE_1D) { 4887ec681f3Smrg unsigned sw = 0; 4897ec681f3Smrg texcoord = nir_swizzle(&b, texcoord, &sw, 1); 4907ec681f3Smrg } 4917ec681f3Smrg 492b9abf16eSmaya if (layer) { 493b9abf16eSmaya nir_ssa_def *src_layer = layer; 494b9abf16eSmaya 495b9abf16eSmaya if (target == PIPE_TEXTURE_3D) { 496b9abf16eSmaya nir_variable *layer_offset_var = 497b9abf16eSmaya nir_variable_create(b.shader, nir_var_uniform, 498b9abf16eSmaya glsl_int_type(), "layer_offset"); 499b9abf16eSmaya b.shader->num_uniforms += 1; 500b9abf16eSmaya layer_offset_var->data.driver_location = 4; 501b9abf16eSmaya nir_ssa_def *layer_offset = nir_load_var(&b, layer_offset_var); 502b9abf16eSmaya 503b9abf16eSmaya src_layer = nir_iadd(&b, layer, layer_offset); 504b9abf16eSmaya } 505b9abf16eSmaya 5067ec681f3Smrg if (target == PIPE_TEXTURE_1D_ARRAY) { 5077ec681f3Smrg texcoord = nir_vec2(&b, nir_channel(&b, texcoord, 0), 5087ec681f3Smrg src_layer); 5097ec681f3Smrg } else { 5107ec681f3Smrg texcoord = nir_vec3(&b, nir_channel(&b, texcoord, 0), 5117ec681f3Smrg nir_channel(&b, texcoord, 1), 5127ec681f3Smrg src_layer); 5137ec681f3Smrg } 514b9abf16eSmaya } 515b9abf16eSmaya } else { 516b9abf16eSmaya texcoord = pbo_addr; 517b9abf16eSmaya } 518b9abf16eSmaya 519b9abf16eSmaya nir_variable *tex_var = 520b9abf16eSmaya nir_variable_create(b.shader, nir_var_uniform, 5217ec681f3Smrg sampler_type_for_target(target, conversion), 5227ec681f3Smrg "tex"); 523b9abf16eSmaya tex_var->data.explicit_binding = true; 524b9abf16eSmaya tex_var->data.binding = 0; 525b9abf16eSmaya 526b9abf16eSmaya nir_deref_instr *tex_deref = nir_build_deref_var(&b, tex_var); 527b9abf16eSmaya 528b9abf16eSmaya nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3); 529b9abf16eSmaya tex->op = nir_texop_txf; 530b9abf16eSmaya tex->sampler_dim = glsl_get_sampler_dim(tex_var->type); 531b9abf16eSmaya tex->coord_components = 532b9abf16eSmaya glsl_get_sampler_coordinate_components(tex_var->type); 5337ec681f3Smrg tex->is_array = target >= PIPE_TEXTURE_1D_ARRAY; 5347ec681f3Smrg 5357ec681f3Smrg tex->dest_type = nir_get_nir_type_for_glsl_base_type(glsl_get_sampler_result_type(tex_var->type)); 536b9abf16eSmaya tex->src[0].src_type = nir_tex_src_texture_deref; 537b9abf16eSmaya tex->src[0].src = nir_src_for_ssa(&tex_deref->dest.ssa); 538b9abf16eSmaya tex->src[1].src_type = nir_tex_src_sampler_deref; 539b9abf16eSmaya tex->src[1].src = nir_src_for_ssa(&tex_deref->dest.ssa); 540b9abf16eSmaya tex->src[2].src_type = nir_tex_src_coord; 541b9abf16eSmaya tex->src[2].src = nir_src_for_ssa(texcoord); 542b9abf16eSmaya nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL); 543b9abf16eSmaya nir_builder_instr_insert(&b, &tex->instr); 544b9abf16eSmaya nir_ssa_def *result = &tex->dest.ssa; 545b9abf16eSmaya 546b9abf16eSmaya if (conversion == ST_PBO_CONVERT_SINT_TO_UINT) 547b9abf16eSmaya result = nir_imax(&b, result, zero); 548b9abf16eSmaya else if (conversion == ST_PBO_CONVERT_UINT_TO_SINT) 549b9abf16eSmaya result = nir_umin(&b, result, nir_imm_int(&b, (1u << 31) - 1)); 550b9abf16eSmaya 551b9abf16eSmaya if (download) { 5527ec681f3Smrg static const enum glsl_base_type type[] = { 5537ec681f3Smrg [ST_PBO_CONVERT_FLOAT] = GLSL_TYPE_FLOAT, 5547ec681f3Smrg [ST_PBO_CONVERT_UINT] = GLSL_TYPE_UINT, 5557ec681f3Smrg [ST_PBO_CONVERT_UINT_TO_SINT] = GLSL_TYPE_INT, 5567ec681f3Smrg [ST_PBO_CONVERT_SINT] = GLSL_TYPE_INT, 5577ec681f3Smrg [ST_PBO_CONVERT_SINT_TO_UINT] = GLSL_TYPE_UINT, 5587ec681f3Smrg }; 559b9abf16eSmaya nir_variable *img_var = 560b9abf16eSmaya nir_variable_create(b.shader, nir_var_uniform, 561b9abf16eSmaya glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, 5627ec681f3Smrg type[conversion]), "img"); 5637ec681f3Smrg img_var->data.access = ACCESS_NON_READABLE; 564b9abf16eSmaya img_var->data.explicit_binding = true; 565b9abf16eSmaya img_var->data.binding = 0; 566b9abf16eSmaya nir_deref_instr *img_deref = nir_build_deref_var(&b, img_var); 5677ec681f3Smrg 5687ec681f3Smrg nir_image_deref_store(&b, &img_deref->dest.ssa, 5697ec681f3Smrg nir_vec4(&b, pbo_addr, zero, zero, zero), 5707ec681f3Smrg zero, 5717ec681f3Smrg result, 5727ec681f3Smrg nir_imm_int(&b, 0), 5737ec681f3Smrg .image_dim = GLSL_SAMPLER_DIM_BUF); 574b9abf16eSmaya } else { 575b9abf16eSmaya nir_variable *color = 576b9abf16eSmaya nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(), 577b9abf16eSmaya "gl_FragColor"); 578b9abf16eSmaya color->data.location = FRAG_RESULT_COLOR; 579b9abf16eSmaya 580b9abf16eSmaya nir_store_var(&b, color, result, TGSI_WRITEMASK_XYZW); 581b9abf16eSmaya } 582b9abf16eSmaya 5837ec681f3Smrg return st_nir_finish_builtin_shader(st, b.shader); 584b9abf16eSmaya} 585b9abf16eSmaya 58601e04c3fSmrgstatic enum st_pbo_conversion 58701e04c3fSmrgget_pbo_conversion(enum pipe_format src_format, enum pipe_format dst_format) 58801e04c3fSmrg{ 58901e04c3fSmrg if (util_format_is_pure_uint(src_format)) { 5907ec681f3Smrg if (util_format_is_pure_uint(dst_format)) 5917ec681f3Smrg return ST_PBO_CONVERT_UINT; 59201e04c3fSmrg if (util_format_is_pure_sint(dst_format)) 59301e04c3fSmrg return ST_PBO_CONVERT_UINT_TO_SINT; 59401e04c3fSmrg } else if (util_format_is_pure_sint(src_format)) { 5957ec681f3Smrg if (util_format_is_pure_sint(dst_format)) 5967ec681f3Smrg return ST_PBO_CONVERT_SINT; 59701e04c3fSmrg if (util_format_is_pure_uint(dst_format)) 59801e04c3fSmrg return ST_PBO_CONVERT_SINT_TO_UINT; 59901e04c3fSmrg } 60001e04c3fSmrg 6017ec681f3Smrg return ST_PBO_CONVERT_FLOAT; 60201e04c3fSmrg} 60301e04c3fSmrg 60401e04c3fSmrgvoid * 60501e04c3fSmrgst_pbo_get_upload_fs(struct st_context *st, 60601e04c3fSmrg enum pipe_format src_format, 6077ec681f3Smrg enum pipe_format dst_format, 6087ec681f3Smrg bool need_layer) 60901e04c3fSmrg{ 61001e04c3fSmrg STATIC_ASSERT(ARRAY_SIZE(st->pbo.upload_fs) == ST_NUM_PBO_CONVERSIONS); 61101e04c3fSmrg 61201e04c3fSmrg enum st_pbo_conversion conversion = get_pbo_conversion(src_format, dst_format); 61301e04c3fSmrg 6147ec681f3Smrg if (!st->pbo.upload_fs[conversion][need_layer]) 6157ec681f3Smrg st->pbo.upload_fs[conversion][need_layer] = create_fs(st, false, 0, conversion, need_layer); 61601e04c3fSmrg 6177ec681f3Smrg return st->pbo.upload_fs[conversion][need_layer]; 61801e04c3fSmrg} 61901e04c3fSmrg 62001e04c3fSmrgvoid * 62101e04c3fSmrgst_pbo_get_download_fs(struct st_context *st, enum pipe_texture_target target, 62201e04c3fSmrg enum pipe_format src_format, 6237ec681f3Smrg enum pipe_format dst_format, 6247ec681f3Smrg bool need_layer) 62501e04c3fSmrg{ 62601e04c3fSmrg STATIC_ASSERT(ARRAY_SIZE(st->pbo.download_fs) == ST_NUM_PBO_CONVERSIONS); 62701e04c3fSmrg assert(target < PIPE_MAX_TEXTURE_TYPES); 62801e04c3fSmrg 62901e04c3fSmrg enum st_pbo_conversion conversion = get_pbo_conversion(src_format, dst_format); 63001e04c3fSmrg 6317ec681f3Smrg if (!st->pbo.download_fs[conversion][target][need_layer]) 6327ec681f3Smrg st->pbo.download_fs[conversion][target][need_layer] = create_fs(st, true, target, conversion, need_layer); 63301e04c3fSmrg 6347ec681f3Smrg return st->pbo.download_fs[conversion][target][need_layer]; 63501e04c3fSmrg} 63601e04c3fSmrg 63701e04c3fSmrgvoid 63801e04c3fSmrgst_init_pbo_helpers(struct st_context *st) 63901e04c3fSmrg{ 6407ec681f3Smrg struct pipe_screen *screen = st->screen; 64101e04c3fSmrg 64201e04c3fSmrg st->pbo.upload_enabled = 64301e04c3fSmrg screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OBJECTS) && 64401e04c3fSmrg screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT) >= 1 && 64501e04c3fSmrg screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_INTEGERS); 64601e04c3fSmrg if (!st->pbo.upload_enabled) 64701e04c3fSmrg return; 64801e04c3fSmrg 64901e04c3fSmrg st->pbo.download_enabled = 65001e04c3fSmrg st->pbo.upload_enabled && 65101e04c3fSmrg screen->get_param(screen, PIPE_CAP_SAMPLER_VIEW_TARGET) && 65201e04c3fSmrg screen->get_param(screen, PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT) && 65301e04c3fSmrg screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, 65401e04c3fSmrg PIPE_SHADER_CAP_MAX_SHADER_IMAGES) >= 1; 65501e04c3fSmrg 65601e04c3fSmrg st->pbo.rgba_only = 65701e04c3fSmrg screen->get_param(screen, PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY); 65801e04c3fSmrg 65901e04c3fSmrg if (screen->get_param(screen, PIPE_CAP_TGSI_INSTANCEID)) { 66001e04c3fSmrg if (screen->get_param(screen, PIPE_CAP_TGSI_VS_LAYER_VIEWPORT)) { 66101e04c3fSmrg st->pbo.layers = true; 66201e04c3fSmrg } else if (screen->get_param(screen, PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES) >= 3) { 66301e04c3fSmrg st->pbo.layers = true; 66401e04c3fSmrg st->pbo.use_gs = true; 66501e04c3fSmrg } 66601e04c3fSmrg } 66701e04c3fSmrg 66801e04c3fSmrg /* Blend state */ 66901e04c3fSmrg memset(&st->pbo.upload_blend, 0, sizeof(struct pipe_blend_state)); 67001e04c3fSmrg st->pbo.upload_blend.rt[0].colormask = PIPE_MASK_RGBA; 67101e04c3fSmrg 67201e04c3fSmrg /* Rasterizer state */ 67301e04c3fSmrg memset(&st->pbo.raster, 0, sizeof(struct pipe_rasterizer_state)); 67401e04c3fSmrg st->pbo.raster.half_pixel_center = 1; 67501e04c3fSmrg} 67601e04c3fSmrg 67701e04c3fSmrgvoid 67801e04c3fSmrgst_destroy_pbo_helpers(struct st_context *st) 67901e04c3fSmrg{ 68001e04c3fSmrg unsigned i; 68101e04c3fSmrg 68201e04c3fSmrg for (i = 0; i < ARRAY_SIZE(st->pbo.upload_fs); ++i) { 6837ec681f3Smrg for (unsigned j = 0; j < ARRAY_SIZE(st->pbo.upload_fs[0]); j++) { 6847ec681f3Smrg if (st->pbo.upload_fs[i][j]) { 6857ec681f3Smrg st->pipe->delete_fs_state(st->pipe, st->pbo.upload_fs[i][j]); 6867ec681f3Smrg st->pbo.upload_fs[i][j] = NULL; 6877ec681f3Smrg } 68801e04c3fSmrg } 68901e04c3fSmrg } 69001e04c3fSmrg 69101e04c3fSmrg for (i = 0; i < ARRAY_SIZE(st->pbo.download_fs); ++i) { 69201e04c3fSmrg for (unsigned j = 0; j < ARRAY_SIZE(st->pbo.download_fs[0]); ++j) { 6937ec681f3Smrg for (unsigned k = 0; k < ARRAY_SIZE(st->pbo.download_fs[0][0]); k++) { 6947ec681f3Smrg if (st->pbo.download_fs[i][j][k]) { 6957ec681f3Smrg st->pipe->delete_fs_state(st->pipe, st->pbo.download_fs[i][j][k]); 6967ec681f3Smrg st->pbo.download_fs[i][j][k] = NULL; 6977ec681f3Smrg } 69801e04c3fSmrg } 69901e04c3fSmrg } 70001e04c3fSmrg } 70101e04c3fSmrg 70201e04c3fSmrg if (st->pbo.gs) { 7037ec681f3Smrg st->pipe->delete_gs_state(st->pipe, st->pbo.gs); 70401e04c3fSmrg st->pbo.gs = NULL; 70501e04c3fSmrg } 70601e04c3fSmrg 70701e04c3fSmrg if (st->pbo.vs) { 7087ec681f3Smrg st->pipe->delete_vs_state(st->pipe, st->pbo.vs); 70901e04c3fSmrg st->pbo.vs = NULL; 71001e04c3fSmrg } 71101e04c3fSmrg} 712