1/* 2 * Copyright 2007 VMware, Inc. 3 * Copyright 2016 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 25/** 26 * \file 27 * 28 * Common helper functions for PBO up- and downloads. 29 */ 30 31#include "state_tracker/st_context.h" 32#include "state_tracker/st_nir.h" 33#include "state_tracker/st_pbo.h" 34#include "state_tracker/st_cb_bufferobjects.h" 35 36#include "pipe/p_context.h" 37#include "pipe/p_defines.h" 38#include "pipe/p_screen.h" 39#include "cso_cache/cso_context.h" 40#include "tgsi/tgsi_ureg.h" 41#include "util/u_format.h" 42#include "util/u_inlines.h" 43#include "util/u_upload_mgr.h" 44 45#include "compiler/nir/nir_builder.h" 46 47/* Conversion to apply in the fragment shader. */ 48enum st_pbo_conversion { 49 ST_PBO_CONVERT_NONE = 0, 50 ST_PBO_CONVERT_UINT_TO_SINT, 51 ST_PBO_CONVERT_SINT_TO_UINT, 52 53 ST_NUM_PBO_CONVERSIONS 54}; 55 56/* Final setup of buffer addressing information. 57 * 58 * buf_offset is in pixels. 59 * 60 * Returns false if something (e.g. alignment) prevents PBO upload/download. 61 */ 62bool 63st_pbo_addresses_setup(struct st_context *st, 64 struct pipe_resource *buf, intptr_t buf_offset, 65 struct st_pbo_addresses *addr) 66{ 67 unsigned skip_pixels; 68 69 /* Check alignment against texture buffer requirements. */ 70 { 71 unsigned ofs = (buf_offset * addr->bytes_per_pixel) % st->ctx->Const.TextureBufferOffsetAlignment; 72 if (ofs != 0) { 73 if (ofs % addr->bytes_per_pixel != 0) 74 return false; 75 76 skip_pixels = ofs / addr->bytes_per_pixel; 77 buf_offset -= skip_pixels; 78 } else { 79 skip_pixels = 0; 80 } 81 } 82 83 assert(buf_offset >= 0); 84 85 addr->buffer = buf; 86 addr->first_element = buf_offset; 87 addr->last_element = buf_offset + skip_pixels + addr->width - 1 88 + (addr->height - 1 + (addr->depth - 1) * addr->image_height) * addr->pixels_per_row; 89 90 if (addr->last_element - addr->first_element > st->ctx->Const.MaxTextureBufferSize - 1) 91 return false; 92 93 /* This should be ensured by Mesa before calling our callbacks */ 94 assert((addr->last_element + 1) * addr->bytes_per_pixel <= buf->width0); 95 96 addr->constants.xoffset = -addr->xoffset + skip_pixels; 97 addr->constants.yoffset = -addr->yoffset; 98 addr->constants.stride = addr->pixels_per_row; 99 addr->constants.image_size = addr->pixels_per_row * addr->image_height; 100 addr->constants.layer_offset = 0; 101 102 return true; 103} 104 105/* Validate and fill buffer addressing information based on GL pixelstore 106 * attributes. 107 * 108 * Returns false if some aspect of the addressing (e.g. alignment) prevents 109 * PBO upload/download. 110 */ 111bool 112st_pbo_addresses_pixelstore(struct st_context *st, 113 GLenum gl_target, bool skip_images, 114 const struct gl_pixelstore_attrib *store, 115 const void *pixels, 116 struct st_pbo_addresses *addr) 117{ 118 struct pipe_resource *buf = st_buffer_object(store->BufferObj)->buffer; 119 intptr_t buf_offset = (intptr_t) pixels; 120 121 if (buf_offset % addr->bytes_per_pixel) 122 return false; 123 124 /* Convert to texels */ 125 buf_offset = buf_offset / addr->bytes_per_pixel; 126 127 /* Determine image height */ 128 if (gl_target == GL_TEXTURE_1D_ARRAY) { 129 addr->image_height = 1; 130 } else { 131 addr->image_height = store->ImageHeight > 0 ? store->ImageHeight : addr->height; 132 } 133 134 /* Compute the stride, taking store->Alignment into account */ 135 { 136 unsigned pixels_per_row = store->RowLength > 0 ? 137 store->RowLength : addr->width; 138 unsigned bytes_per_row = pixels_per_row * addr->bytes_per_pixel; 139 unsigned remainder = bytes_per_row % store->Alignment; 140 unsigned offset_rows; 141 142 if (remainder > 0) 143 bytes_per_row += store->Alignment - remainder; 144 145 if (bytes_per_row % addr->bytes_per_pixel) 146 return false; 147 148 addr->pixels_per_row = bytes_per_row / addr->bytes_per_pixel; 149 150 offset_rows = store->SkipRows; 151 if (skip_images) 152 offset_rows += addr->image_height * store->SkipImages; 153 154 buf_offset += store->SkipPixels + addr->pixels_per_row * offset_rows; 155 } 156 157 if (!st_pbo_addresses_setup(st, buf, buf_offset, addr)) 158 return false; 159 160 /* Support GL_PACK_INVERT_MESA */ 161 if (store->Invert) { 162 addr->constants.xoffset += (addr->height - 1) * addr->constants.stride; 163 addr->constants.stride = -addr->constants.stride; 164 } 165 166 return true; 167} 168 169/* For download from a framebuffer, we may have to invert the Y axis. The 170 * setup is as follows: 171 * - set viewport to inverted, so that the position sysval is correct for 172 * texel fetches 173 * - this function adjusts the fragment shader's constant buffer to compute 174 * the correct destination addresses. 175 */ 176void 177st_pbo_addresses_invert_y(struct st_pbo_addresses *addr, 178 unsigned viewport_height) 179{ 180 addr->constants.xoffset += 181 (viewport_height - 1 + 2 * addr->constants.yoffset) * addr->constants.stride; 182 addr->constants.stride = -addr->constants.stride; 183} 184 185/* Setup all vertex pipeline state, rasterizer state, and fragment shader 186 * constants, and issue the draw call for PBO upload/download. 187 * 188 * The caller is responsible for saving and restoring state, as well as for 189 * setting other fragment shader state (fragment shader, samplers), and 190 * framebuffer/viewport/DSA/blend state. 191 */ 192bool 193st_pbo_draw(struct st_context *st, const struct st_pbo_addresses *addr, 194 unsigned surface_width, unsigned surface_height) 195{ 196 struct cso_context *cso = st->cso_context; 197 198 /* Setup vertex and geometry shaders */ 199 if (!st->pbo.vs) { 200 st->pbo.vs = st_pbo_create_vs(st); 201 if (!st->pbo.vs) 202 return false; 203 } 204 205 if (addr->depth != 1 && st->pbo.use_gs && !st->pbo.gs) { 206 st->pbo.gs = st_pbo_create_gs(st); 207 if (!st->pbo.gs) 208 return false; 209 } 210 211 cso_set_vertex_shader_handle(cso, st->pbo.vs); 212 213 cso_set_geometry_shader_handle(cso, addr->depth != 1 ? st->pbo.gs : NULL); 214 215 cso_set_tessctrl_shader_handle(cso, NULL); 216 217 cso_set_tesseval_shader_handle(cso, NULL); 218 219 /* Upload vertices */ 220 { 221 struct pipe_vertex_buffer vbo = {0}; 222 struct pipe_vertex_element velem; 223 224 float x0 = (float) addr->xoffset / surface_width * 2.0f - 1.0f; 225 float y0 = (float) addr->yoffset / surface_height * 2.0f - 1.0f; 226 float x1 = (float) (addr->xoffset + addr->width) / surface_width * 2.0f - 1.0f; 227 float y1 = (float) (addr->yoffset + addr->height) / surface_height * 2.0f - 1.0f; 228 229 float *verts = NULL; 230 231 vbo.stride = 2 * sizeof(float); 232 233 u_upload_alloc(st->pipe->stream_uploader, 0, 8 * sizeof(float), 4, 234 &vbo.buffer_offset, &vbo.buffer.resource, (void **) &verts); 235 if (!verts) 236 return false; 237 238 verts[0] = x0; 239 verts[1] = y0; 240 verts[2] = x0; 241 verts[3] = y1; 242 verts[4] = x1; 243 verts[5] = y0; 244 verts[6] = x1; 245 verts[7] = y1; 246 247 u_upload_unmap(st->pipe->stream_uploader); 248 249 velem.src_offset = 0; 250 velem.instance_divisor = 0; 251 velem.vertex_buffer_index = 0; 252 velem.src_format = PIPE_FORMAT_R32G32_FLOAT; 253 254 cso_set_vertex_elements(cso, 1, &velem); 255 256 cso_set_vertex_buffers(cso, velem.vertex_buffer_index, 1, &vbo); 257 258 pipe_resource_reference(&vbo.buffer.resource, NULL); 259 } 260 261 /* Upload constants */ 262 { 263 struct pipe_constant_buffer cb; 264 265 cb.buffer = NULL; 266 cb.user_buffer = &addr->constants; 267 cb.buffer_offset = 0; 268 cb.buffer_size = sizeof(addr->constants); 269 270 cso_set_constant_buffer(cso, PIPE_SHADER_FRAGMENT, 0, &cb); 271 272 pipe_resource_reference(&cb.buffer, NULL); 273 } 274 275 /* Rasterizer state */ 276 cso_set_rasterizer(cso, &st->pbo.raster); 277 278 /* Disable stream output */ 279 cso_set_stream_outputs(cso, 0, NULL, 0); 280 281 if (addr->depth == 1) { 282 cso_draw_arrays(cso, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); 283 } else { 284 cso_draw_arrays_instanced(cso, PIPE_PRIM_TRIANGLE_STRIP, 285 0, 4, 0, addr->depth); 286 } 287 288 return true; 289} 290 291void * 292st_pbo_create_vs(struct st_context *st) 293{ 294 struct pipe_screen *pscreen = st->pipe->screen; 295 bool use_nir = PIPE_SHADER_IR_NIR == 296 pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, 297 PIPE_SHADER_CAP_PREFERRED_IR); 298 299 if (use_nir) { 300 unsigned inputs[] = { VERT_ATTRIB_POS, SYSTEM_VALUE_INSTANCE_ID, }; 301 unsigned outputs[] = { VARYING_SLOT_POS, VARYING_SLOT_LAYER }; 302 303 return st_nir_make_passthrough_shader(st, "st/pbo VS", 304 MESA_SHADER_VERTEX, 305 st->pbo.layers ? 2 : 1, 306 inputs, outputs, NULL, (1 << 1)); 307 } 308 309 struct ureg_program *ureg; 310 struct ureg_src in_pos; 311 struct ureg_src in_instanceid; 312 struct ureg_dst out_pos; 313 struct ureg_dst out_layer; 314 315 ureg = ureg_create(PIPE_SHADER_VERTEX); 316 if (!ureg) 317 return NULL; 318 319 in_pos = ureg_DECL_vs_input(ureg, TGSI_SEMANTIC_POSITION); 320 321 out_pos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); 322 323 if (st->pbo.layers) { 324 in_instanceid = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_INSTANCEID, 0); 325 326 if (!st->pbo.use_gs) 327 out_layer = ureg_DECL_output(ureg, TGSI_SEMANTIC_LAYER, 0); 328 } 329 330 /* out_pos = in_pos */ 331 ureg_MOV(ureg, out_pos, in_pos); 332 333 if (st->pbo.layers) { 334 if (st->pbo.use_gs) { 335 /* out_pos.z = i2f(gl_InstanceID) */ 336 ureg_I2F(ureg, ureg_writemask(out_pos, TGSI_WRITEMASK_Z), 337 ureg_scalar(in_instanceid, TGSI_SWIZZLE_X)); 338 } else { 339 /* out_layer = gl_InstanceID */ 340 ureg_MOV(ureg, ureg_writemask(out_layer, TGSI_WRITEMASK_X), 341 ureg_scalar(in_instanceid, TGSI_SWIZZLE_X)); 342 } 343 } 344 345 ureg_END(ureg); 346 347 return ureg_create_shader_and_destroy(ureg, st->pipe); 348} 349 350void * 351st_pbo_create_gs(struct st_context *st) 352{ 353 static const int zero = 0; 354 struct ureg_program *ureg; 355 struct ureg_dst out_pos; 356 struct ureg_dst out_layer; 357 struct ureg_src in_pos; 358 struct ureg_src imm; 359 unsigned i; 360 361 ureg = ureg_create(PIPE_SHADER_GEOMETRY); 362 if (!ureg) 363 return NULL; 364 365 ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, PIPE_PRIM_TRIANGLES); 366 ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, PIPE_PRIM_TRIANGLE_STRIP); 367 ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, 3); 368 369 out_pos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); 370 out_layer = ureg_DECL_output(ureg, TGSI_SEMANTIC_LAYER, 0); 371 372 in_pos = ureg_DECL_input(ureg, TGSI_SEMANTIC_POSITION, 0, 0, 1); 373 374 imm = ureg_DECL_immediate_int(ureg, &zero, 1); 375 376 for (i = 0; i < 3; ++i) { 377 struct ureg_src in_pos_vertex = ureg_src_dimension(in_pos, i); 378 379 /* out_pos = in_pos[i] */ 380 ureg_MOV(ureg, out_pos, in_pos_vertex); 381 382 /* out_layer.x = f2i(in_pos[i].z) */ 383 ureg_F2I(ureg, ureg_writemask(out_layer, TGSI_WRITEMASK_X), 384 ureg_scalar(in_pos_vertex, TGSI_SWIZZLE_Z)); 385 386 ureg_EMIT(ureg, ureg_scalar(imm, TGSI_SWIZZLE_X)); 387 } 388 389 ureg_END(ureg); 390 391 return ureg_create_shader_and_destroy(ureg, st->pipe); 392} 393 394static void 395build_conversion(struct ureg_program *ureg, const struct ureg_dst *temp, 396 enum st_pbo_conversion conversion) 397{ 398 switch (conversion) { 399 case ST_PBO_CONVERT_SINT_TO_UINT: 400 ureg_IMAX(ureg, *temp, ureg_src(*temp), ureg_imm1i(ureg, 0)); 401 break; 402 case ST_PBO_CONVERT_UINT_TO_SINT: 403 ureg_UMIN(ureg, *temp, ureg_src(*temp), ureg_imm1u(ureg, (1u << 31) - 1)); 404 break; 405 default: 406 /* no-op */ 407 break; 408 } 409} 410 411static const struct glsl_type * 412sampler_type_for_target(enum pipe_texture_target target) 413{ 414 bool is_array = target >= PIPE_TEXTURE_1D_ARRAY; 415 static const enum glsl_sampler_dim dim[] = { 416 [PIPE_BUFFER] = GLSL_SAMPLER_DIM_BUF, 417 [PIPE_TEXTURE_1D] = GLSL_SAMPLER_DIM_1D, 418 [PIPE_TEXTURE_2D] = GLSL_SAMPLER_DIM_2D, 419 [PIPE_TEXTURE_3D] = GLSL_SAMPLER_DIM_3D, 420 [PIPE_TEXTURE_CUBE] = GLSL_SAMPLER_DIM_CUBE, 421 [PIPE_TEXTURE_RECT] = GLSL_SAMPLER_DIM_RECT, 422 [PIPE_TEXTURE_1D_ARRAY] = GLSL_SAMPLER_DIM_1D, 423 [PIPE_TEXTURE_2D_ARRAY] = GLSL_SAMPLER_DIM_2D, 424 [PIPE_TEXTURE_CUBE_ARRAY] = GLSL_SAMPLER_DIM_CUBE, 425 }; 426 427 return glsl_sampler_type(dim[target], false, is_array, GLSL_TYPE_FLOAT); 428} 429 430static void * 431create_fs_nir(struct st_context *st, 432 bool download, 433 enum pipe_texture_target target, 434 enum st_pbo_conversion conversion) 435{ 436 struct pipe_screen *screen = st->pipe->screen; 437 struct nir_builder b; 438 const nir_shader_compiler_options *options = 439 st->ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions; 440 bool pos_is_sysval = 441 screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL); 442 443 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, options); 444 445 nir_ssa_def *zero = nir_imm_int(&b, 0); 446 447 /* param = [ -xoffset + skip_pixels, -yoffset, stride, image_height ] */ 448 nir_variable *param_var = 449 nir_variable_create(b.shader, nir_var_uniform, glsl_vec4_type(), "param"); 450 b.shader->num_uniforms += 4; 451 nir_ssa_def *param = nir_load_var(&b, param_var); 452 453 nir_variable *fragcoord = 454 nir_variable_create(b.shader, pos_is_sysval ? nir_var_system_value : 455 nir_var_shader_in, glsl_vec4_type(), "gl_FragCoord"); 456 fragcoord->data.location = pos_is_sysval ? SYSTEM_VALUE_FRAG_COORD 457 : VARYING_SLOT_POS; 458 nir_ssa_def *coord = nir_load_var(&b, fragcoord); 459 460 nir_ssa_def *layer = NULL; 461 if (st->pbo.layers && (!download || target == PIPE_TEXTURE_1D_ARRAY || 462 target == PIPE_TEXTURE_2D_ARRAY || 463 target == PIPE_TEXTURE_3D || 464 target == PIPE_TEXTURE_CUBE || 465 target == PIPE_TEXTURE_CUBE_ARRAY)) { 466 nir_variable *var = nir_variable_create(b.shader, nir_var_shader_in, 467 glsl_int_type(), "gl_Layer"); 468 var->data.location = VARYING_SLOT_LAYER; 469 var->data.interpolation = INTERP_MODE_FLAT; 470 layer = nir_load_var(&b, var); 471 } 472 473 /* offset_pos = param.xy + f2i(coord.xy) */ 474 nir_ssa_def *offset_pos = 475 nir_iadd(&b, nir_channels(&b, param, TGSI_WRITEMASK_XY), 476 nir_f2i32(&b, nir_channels(&b, coord, TGSI_WRITEMASK_XY))); 477 478 /* addr = offset_pos.x + offset_pos.y * stride */ 479 nir_ssa_def *pbo_addr = 480 nir_iadd(&b, nir_channel(&b, offset_pos, 0), 481 nir_imul(&b, nir_channel(&b, offset_pos, 1), 482 nir_channel(&b, param, 2))); 483 if (layer) { 484 /* pbo_addr += image_height * layer */ 485 pbo_addr = nir_iadd(&b, pbo_addr, 486 nir_imul(&b, layer, nir_channel(&b, param, 3))); 487 } 488 489 nir_ssa_def *texcoord; 490 if (download) { 491 texcoord = nir_f2i32(&b, nir_channels(&b, coord, TGSI_WRITEMASK_XY)); 492 493 if (layer) { 494 nir_ssa_def *src_layer = layer; 495 496 if (target == PIPE_TEXTURE_3D) { 497 nir_variable *layer_offset_var = 498 nir_variable_create(b.shader, nir_var_uniform, 499 glsl_int_type(), "layer_offset"); 500 b.shader->num_uniforms += 1; 501 layer_offset_var->data.driver_location = 4; 502 nir_ssa_def *layer_offset = nir_load_var(&b, layer_offset_var); 503 504 src_layer = nir_iadd(&b, layer, layer_offset); 505 } 506 507 texcoord = nir_vec3(&b, nir_channel(&b, texcoord, 0), 508 nir_channel(&b, texcoord, 1), 509 src_layer); 510 } 511 } else { 512 texcoord = pbo_addr; 513 } 514 515 nir_variable *tex_var = 516 nir_variable_create(b.shader, nir_var_uniform, 517 sampler_type_for_target(target), "tex"); 518 tex_var->data.explicit_binding = true; 519 tex_var->data.binding = 0; 520 521 nir_deref_instr *tex_deref = nir_build_deref_var(&b, tex_var); 522 523 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3); 524 tex->op = nir_texop_txf; 525 tex->sampler_dim = glsl_get_sampler_dim(tex_var->type); 526 tex->coord_components = 527 glsl_get_sampler_coordinate_components(tex_var->type); 528 tex->dest_type = nir_type_float; 529 tex->src[0].src_type = nir_tex_src_texture_deref; 530 tex->src[0].src = nir_src_for_ssa(&tex_deref->dest.ssa); 531 tex->src[1].src_type = nir_tex_src_sampler_deref; 532 tex->src[1].src = nir_src_for_ssa(&tex_deref->dest.ssa); 533 tex->src[2].src_type = nir_tex_src_coord; 534 tex->src[2].src = nir_src_for_ssa(texcoord); 535 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL); 536 nir_builder_instr_insert(&b, &tex->instr); 537 nir_ssa_def *result = &tex->dest.ssa; 538 539 if (conversion == ST_PBO_CONVERT_SINT_TO_UINT) 540 result = nir_imax(&b, result, zero); 541 else if (conversion == ST_PBO_CONVERT_UINT_TO_SINT) 542 result = nir_umin(&b, result, nir_imm_int(&b, (1u << 31) - 1)); 543 544 if (download) { 545 nir_variable *img_var = 546 nir_variable_create(b.shader, nir_var_uniform, 547 glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, 548 GLSL_TYPE_FLOAT), "img"); 549 img_var->data.image.access = ACCESS_NON_READABLE; 550 img_var->data.explicit_binding = true; 551 img_var->data.binding = 0; 552 nir_deref_instr *img_deref = nir_build_deref_var(&b, img_var); 553 nir_intrinsic_instr *intrin = 554 nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store); 555 intrin->src[0] = nir_src_for_ssa(&img_deref->dest.ssa); 556 intrin->src[1] = 557 nir_src_for_ssa(nir_vec4(&b, pbo_addr, zero, zero, zero)); 558 intrin->src[2] = nir_src_for_ssa(zero); 559 intrin->src[3] = nir_src_for_ssa(result); 560 intrin->num_components = 4; 561 nir_builder_instr_insert(&b, &intrin->instr); 562 } else { 563 nir_variable *color = 564 nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(), 565 "gl_FragColor"); 566 color->data.location = FRAG_RESULT_COLOR; 567 568 nir_store_var(&b, color, result, TGSI_WRITEMASK_XYZW); 569 } 570 571 return st_nir_finish_builtin_shader(st, b.shader, download ? 572 "st/pbo download FS" : 573 "st/pbo upload FS"); 574} 575 576static void * 577create_fs_tgsi(struct st_context *st, bool download, 578 enum pipe_texture_target target, 579 enum st_pbo_conversion conversion) 580{ 581 struct pipe_context *pipe = st->pipe; 582 struct pipe_screen *screen = pipe->screen; 583 struct ureg_program *ureg; 584 bool have_layer; 585 struct ureg_dst out; 586 struct ureg_src sampler; 587 struct ureg_src pos; 588 struct ureg_src layer; 589 struct ureg_src const0; 590 struct ureg_src const1; 591 struct ureg_dst temp0; 592 593 have_layer = 594 st->pbo.layers && 595 (!download || target == PIPE_TEXTURE_1D_ARRAY 596 || target == PIPE_TEXTURE_2D_ARRAY 597 || target == PIPE_TEXTURE_3D 598 || target == PIPE_TEXTURE_CUBE 599 || target == PIPE_TEXTURE_CUBE_ARRAY); 600 601 ureg = ureg_create(PIPE_SHADER_FRAGMENT); 602 if (!ureg) 603 return NULL; 604 605 if (!download) { 606 out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); 607 } else { 608 struct ureg_src image; 609 610 /* writeonly images do not require an explicitly given format. */ 611 image = ureg_DECL_image(ureg, 0, TGSI_TEXTURE_BUFFER, PIPE_FORMAT_NONE, 612 true, false); 613 out = ureg_dst(image); 614 } 615 616 sampler = ureg_DECL_sampler(ureg, 0); 617 if (screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL)) { 618 pos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0); 619 } else { 620 pos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0, 621 TGSI_INTERPOLATE_LINEAR); 622 } 623 if (have_layer) { 624 layer = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_LAYER, 0, 625 TGSI_INTERPOLATE_CONSTANT); 626 } 627 const0 = ureg_DECL_constant(ureg, 0); 628 const1 = ureg_DECL_constant(ureg, 1); 629 temp0 = ureg_DECL_temporary(ureg); 630 631 /* Note: const0 = [ -xoffset + skip_pixels, -yoffset, stride, image_height ] */ 632 633 /* temp0.xy = f2i(temp0.xy) */ 634 ureg_F2I(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY), 635 ureg_swizzle(pos, 636 TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, 637 TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y)); 638 639 /* temp0.xy = temp0.xy + const0.xy */ 640 ureg_UADD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY), 641 ureg_swizzle(ureg_src(temp0), 642 TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, 643 TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y), 644 ureg_swizzle(const0, 645 TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, 646 TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y)); 647 648 /* temp0.x = const0.z * temp0.y + temp0.x */ 649 ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X), 650 ureg_scalar(const0, TGSI_SWIZZLE_Z), 651 ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_Y), 652 ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X)); 653 654 if (have_layer) { 655 /* temp0.x = const0.w * layer + temp0.x */ 656 ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X), 657 ureg_scalar(const0, TGSI_SWIZZLE_W), 658 ureg_scalar(layer, TGSI_SWIZZLE_X), 659 ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X)); 660 } 661 662 /* temp0.w = 0 */ 663 ureg_MOV(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_W), ureg_imm1u(ureg, 0)); 664 665 if (download) { 666 struct ureg_dst temp1; 667 struct ureg_src op[2]; 668 669 temp1 = ureg_DECL_temporary(ureg); 670 671 /* temp1.xy = pos.xy */ 672 ureg_F2I(ureg, ureg_writemask(temp1, TGSI_WRITEMASK_XY), pos); 673 674 /* temp1.zw = 0 */ 675 ureg_MOV(ureg, ureg_writemask(temp1, TGSI_WRITEMASK_ZW), ureg_imm1u(ureg, 0)); 676 677 if (have_layer) { 678 struct ureg_dst temp1_layer = 679 ureg_writemask(temp1, target == PIPE_TEXTURE_1D_ARRAY ? TGSI_WRITEMASK_Y 680 : TGSI_WRITEMASK_Z); 681 682 /* temp1.y/z = layer */ 683 ureg_MOV(ureg, temp1_layer, ureg_scalar(layer, TGSI_SWIZZLE_X)); 684 685 if (target == PIPE_TEXTURE_3D) { 686 /* temp1.z += layer_offset */ 687 ureg_UADD(ureg, temp1_layer, 688 ureg_scalar(ureg_src(temp1), TGSI_SWIZZLE_Z), 689 ureg_scalar(const1, TGSI_SWIZZLE_X)); 690 } 691 } 692 693 /* temp1 = txf(sampler, temp1) */ 694 ureg_TXF(ureg, temp1, util_pipe_tex_to_tgsi_tex(target, 1), 695 ureg_src(temp1), sampler); 696 697 build_conversion(ureg, &temp1, conversion); 698 699 /* store(out, temp0, temp1) */ 700 op[0] = ureg_src(temp0); 701 op[1] = ureg_src(temp1); 702 ureg_memory_insn(ureg, TGSI_OPCODE_STORE, &out, 1, op, 2, 0, 703 TGSI_TEXTURE_BUFFER, PIPE_FORMAT_NONE); 704 705 ureg_release_temporary(ureg, temp1); 706 } else { 707 /* out = txf(sampler, temp0.x) */ 708 ureg_TXF(ureg, temp0, TGSI_TEXTURE_BUFFER, ureg_src(temp0), sampler); 709 710 build_conversion(ureg, &temp0, conversion); 711 712 ureg_MOV(ureg, out, ureg_src(temp0)); 713 } 714 715 ureg_release_temporary(ureg, temp0); 716 717 ureg_END(ureg); 718 719 return ureg_create_shader_and_destroy(ureg, pipe); 720} 721 722static void * 723create_fs(struct st_context *st, bool download, 724 enum pipe_texture_target target, 725 enum st_pbo_conversion conversion) 726{ 727 struct pipe_screen *pscreen = st->pipe->screen; 728 bool use_nir = PIPE_SHADER_IR_NIR == 729 pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, 730 PIPE_SHADER_CAP_PREFERRED_IR); 731 732 if (use_nir) 733 return create_fs_nir(st, download, target, conversion); 734 735 return create_fs_tgsi(st, download, target, conversion); 736} 737 738static enum st_pbo_conversion 739get_pbo_conversion(enum pipe_format src_format, enum pipe_format dst_format) 740{ 741 if (util_format_is_pure_uint(src_format)) { 742 if (util_format_is_pure_sint(dst_format)) 743 return ST_PBO_CONVERT_UINT_TO_SINT; 744 } else if (util_format_is_pure_sint(src_format)) { 745 if (util_format_is_pure_uint(dst_format)) 746 return ST_PBO_CONVERT_SINT_TO_UINT; 747 } 748 749 return ST_PBO_CONVERT_NONE; 750} 751 752void * 753st_pbo_get_upload_fs(struct st_context *st, 754 enum pipe_format src_format, 755 enum pipe_format dst_format) 756{ 757 STATIC_ASSERT(ARRAY_SIZE(st->pbo.upload_fs) == ST_NUM_PBO_CONVERSIONS); 758 759 enum st_pbo_conversion conversion = get_pbo_conversion(src_format, dst_format); 760 761 if (!st->pbo.upload_fs[conversion]) 762 st->pbo.upload_fs[conversion] = create_fs(st, false, 0, conversion); 763 764 return st->pbo.upload_fs[conversion]; 765} 766 767void * 768st_pbo_get_download_fs(struct st_context *st, enum pipe_texture_target target, 769 enum pipe_format src_format, 770 enum pipe_format dst_format) 771{ 772 STATIC_ASSERT(ARRAY_SIZE(st->pbo.download_fs) == ST_NUM_PBO_CONVERSIONS); 773 assert(target < PIPE_MAX_TEXTURE_TYPES); 774 775 enum st_pbo_conversion conversion = get_pbo_conversion(src_format, dst_format); 776 777 if (!st->pbo.download_fs[conversion][target]) 778 st->pbo.download_fs[conversion][target] = create_fs(st, true, target, conversion); 779 780 return st->pbo.download_fs[conversion][target]; 781} 782 783void 784st_init_pbo_helpers(struct st_context *st) 785{ 786 struct pipe_context *pipe = st->pipe; 787 struct pipe_screen *screen = pipe->screen; 788 789 st->pbo.upload_enabled = 790 screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OBJECTS) && 791 screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT) >= 1 && 792 screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_INTEGERS); 793 if (!st->pbo.upload_enabled) 794 return; 795 796 st->pbo.download_enabled = 797 st->pbo.upload_enabled && 798 screen->get_param(screen, PIPE_CAP_SAMPLER_VIEW_TARGET) && 799 screen->get_param(screen, PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT) && 800 screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, 801 PIPE_SHADER_CAP_MAX_SHADER_IMAGES) >= 1; 802 803 st->pbo.rgba_only = 804 screen->get_param(screen, PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY); 805 806 if (screen->get_param(screen, PIPE_CAP_TGSI_INSTANCEID)) { 807 if (screen->get_param(screen, PIPE_CAP_TGSI_VS_LAYER_VIEWPORT)) { 808 st->pbo.layers = true; 809 } else if (screen->get_param(screen, PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES) >= 3) { 810 st->pbo.layers = true; 811 st->pbo.use_gs = true; 812 } 813 } 814 815 /* Blend state */ 816 memset(&st->pbo.upload_blend, 0, sizeof(struct pipe_blend_state)); 817 st->pbo.upload_blend.rt[0].colormask = PIPE_MASK_RGBA; 818 819 /* Rasterizer state */ 820 memset(&st->pbo.raster, 0, sizeof(struct pipe_rasterizer_state)); 821 st->pbo.raster.half_pixel_center = 1; 822} 823 824void 825st_destroy_pbo_helpers(struct st_context *st) 826{ 827 unsigned i; 828 829 for (i = 0; i < ARRAY_SIZE(st->pbo.upload_fs); ++i) { 830 if (st->pbo.upload_fs[i]) { 831 cso_delete_fragment_shader(st->cso_context, st->pbo.upload_fs[i]); 832 st->pbo.upload_fs[i] = NULL; 833 } 834 } 835 836 for (i = 0; i < ARRAY_SIZE(st->pbo.download_fs); ++i) { 837 for (unsigned j = 0; j < ARRAY_SIZE(st->pbo.download_fs[0]); ++j) { 838 if (st->pbo.download_fs[i][j]) { 839 cso_delete_fragment_shader(st->cso_context, st->pbo.download_fs[i][j]); 840 st->pbo.download_fs[i][j] = NULL; 841 } 842 } 843 } 844 845 if (st->pbo.gs) { 846 cso_delete_geometry_shader(st->cso_context, st->pbo.gs); 847 st->pbo.gs = NULL; 848 } 849 850 if (st->pbo.vs) { 851 cso_delete_vertex_shader(st->cso_context, st->pbo.vs); 852 st->pbo.vs = NULL; 853 } 854} 855