1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2018 Red Hat 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg * 23b8e80941Smrg * Authors: 24b8e80941Smrg * Rob Clark (robdclark@gmail.com) 25b8e80941Smrg */ 26b8e80941Smrg 27b8e80941Smrg#include "math.h" 28b8e80941Smrg 29b8e80941Smrg#include "nir/nir_builtin_builder.h" 30b8e80941Smrg 31b8e80941Smrg#include "vtn_private.h" 32b8e80941Smrg#include "OpenCL.std.h" 33b8e80941Smrg 34b8e80941Smrgtypedef nir_ssa_def *(*nir_handler)(struct vtn_builder *b, enum OpenCLstd opcode, 35b8e80941Smrg unsigned num_srcs, nir_ssa_def **srcs, 36b8e80941Smrg const struct glsl_type *dest_type); 37b8e80941Smrg 38b8e80941Smrgstatic void 39b8e80941Smrghandle_instr(struct vtn_builder *b, enum OpenCLstd opcode, const uint32_t *w, 40b8e80941Smrg unsigned count, nir_handler handler) 41b8e80941Smrg{ 42b8e80941Smrg const struct glsl_type *dest_type = 43b8e80941Smrg vtn_value(b, w[1], vtn_value_type_type)->type->type; 44b8e80941Smrg 45b8e80941Smrg unsigned num_srcs = count - 5; 46b8e80941Smrg nir_ssa_def *srcs[3] = { NULL }; 47b8e80941Smrg vtn_assert(num_srcs <= ARRAY_SIZE(srcs)); 48b8e80941Smrg for (unsigned i = 0; i < num_srcs; i++) { 49b8e80941Smrg srcs[i] = vtn_ssa_value(b, w[i + 5])->def; 50b8e80941Smrg } 51b8e80941Smrg 52b8e80941Smrg nir_ssa_def *result = handler(b, opcode, num_srcs, srcs, dest_type); 53b8e80941Smrg if (result) { 54b8e80941Smrg struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); 55b8e80941Smrg val->ssa = vtn_create_ssa_value(b, dest_type); 56b8e80941Smrg val->ssa->def = result; 57b8e80941Smrg } else { 58b8e80941Smrg vtn_assert(dest_type == glsl_void_type()); 59b8e80941Smrg } 60b8e80941Smrg} 61b8e80941Smrg 62b8e80941Smrgstatic nir_op 63b8e80941Smrgnir_alu_op_for_opencl_opcode(struct vtn_builder *b, enum OpenCLstd opcode) 64b8e80941Smrg{ 65b8e80941Smrg switch (opcode) { 66b8e80941Smrg case Fabs: return nir_op_fabs; 67b8e80941Smrg case SAbs: return nir_op_iabs; 68b8e80941Smrg case SAdd_sat: return nir_op_iadd_sat; 69b8e80941Smrg case UAdd_sat: return nir_op_uadd_sat; 70b8e80941Smrg case Ceil: return nir_op_fceil; 71b8e80941Smrg case Cos: return nir_op_fcos; 72b8e80941Smrg case Exp2: return nir_op_fexp2; 73b8e80941Smrg case Log2: return nir_op_flog2; 74b8e80941Smrg case Floor: return nir_op_ffloor; 75b8e80941Smrg case SHadd: return nir_op_ihadd; 76b8e80941Smrg case UHadd: return nir_op_uhadd; 77b8e80941Smrg case Fma: return nir_op_ffma; 78b8e80941Smrg case Fmax: return nir_op_fmax; 79b8e80941Smrg case SMax: return nir_op_imax; 80b8e80941Smrg case UMax: return nir_op_umax; 81b8e80941Smrg case Fmin: return nir_op_fmin; 82b8e80941Smrg case SMin: return nir_op_imin; 83b8e80941Smrg case UMin: return nir_op_umin; 84b8e80941Smrg case Fmod: return nir_op_fmod; 85b8e80941Smrg case Mix: return nir_op_flrp; 86b8e80941Smrg case SMul_hi: return nir_op_imul_high; 87b8e80941Smrg case UMul_hi: return nir_op_umul_high; 88b8e80941Smrg case Popcount: return nir_op_bit_count; 89b8e80941Smrg case Pow: return nir_op_fpow; 90b8e80941Smrg case Remainder: return nir_op_frem; 91b8e80941Smrg case SRhadd: return nir_op_irhadd; 92b8e80941Smrg case URhadd: return nir_op_urhadd; 93b8e80941Smrg case Rsqrt: return nir_op_frsq; 94b8e80941Smrg case Sign: return nir_op_fsign; 95b8e80941Smrg case Sin: return nir_op_fsin; 96b8e80941Smrg case Sqrt: return nir_op_fsqrt; 97b8e80941Smrg case SSub_sat: return nir_op_isub_sat; 98b8e80941Smrg case USub_sat: return nir_op_usub_sat; 99b8e80941Smrg case Trunc: return nir_op_ftrunc; 100b8e80941Smrg /* uhm... */ 101b8e80941Smrg case UAbs: return nir_op_imov; 102b8e80941Smrg default: 103b8e80941Smrg vtn_fail("No NIR equivalent"); 104b8e80941Smrg } 105b8e80941Smrg} 106b8e80941Smrg 107b8e80941Smrgstatic nir_ssa_def * 108b8e80941Smrghandle_alu(struct vtn_builder *b, enum OpenCLstd opcode, unsigned num_srcs, 109b8e80941Smrg nir_ssa_def **srcs, const struct glsl_type *dest_type) 110b8e80941Smrg{ 111b8e80941Smrg return nir_build_alu(&b->nb, nir_alu_op_for_opencl_opcode(b, opcode), 112b8e80941Smrg srcs[0], srcs[1], srcs[2], NULL); 113b8e80941Smrg} 114b8e80941Smrg 115b8e80941Smrgstatic nir_ssa_def * 116b8e80941Smrghandle_special(struct vtn_builder *b, enum OpenCLstd opcode, unsigned num_srcs, 117b8e80941Smrg nir_ssa_def **srcs, const struct glsl_type *dest_type) 118b8e80941Smrg{ 119b8e80941Smrg nir_builder *nb = &b->nb; 120b8e80941Smrg 121b8e80941Smrg switch (opcode) { 122b8e80941Smrg case SAbs_diff: 123b8e80941Smrg return nir_iabs_diff(nb, srcs[0], srcs[1]); 124b8e80941Smrg case UAbs_diff: 125b8e80941Smrg return nir_uabs_diff(nb, srcs[0], srcs[1]); 126b8e80941Smrg case Bitselect: 127b8e80941Smrg return nir_bitselect(nb, srcs[0], srcs[1], srcs[2]); 128b8e80941Smrg case FClamp: 129b8e80941Smrg return nir_fclamp(nb, srcs[0], srcs[1], srcs[2]); 130b8e80941Smrg case SClamp: 131b8e80941Smrg return nir_iclamp(nb, srcs[0], srcs[1], srcs[2]); 132b8e80941Smrg case UClamp: 133b8e80941Smrg return nir_uclamp(nb, srcs[0], srcs[1], srcs[2]); 134b8e80941Smrg case Copysign: 135b8e80941Smrg return nir_copysign(nb, srcs[0], srcs[1]); 136b8e80941Smrg case Cross: 137b8e80941Smrg if (glsl_get_components(dest_type) == 4) 138b8e80941Smrg return nir_cross4(nb, srcs[0], srcs[1]); 139b8e80941Smrg return nir_cross3(nb, srcs[0], srcs[1]); 140b8e80941Smrg case Degrees: 141b8e80941Smrg return nir_degrees(nb, srcs[0]); 142b8e80941Smrg case Fdim: 143b8e80941Smrg return nir_fdim(nb, srcs[0], srcs[1]); 144b8e80941Smrg case Distance: 145b8e80941Smrg return nir_distance(nb, srcs[0], srcs[1]); 146b8e80941Smrg case Fast_distance: 147b8e80941Smrg return nir_fast_distance(nb, srcs[0], srcs[1]); 148b8e80941Smrg case Fast_length: 149b8e80941Smrg return nir_fast_length(nb, srcs[0]); 150b8e80941Smrg case Fast_normalize: 151b8e80941Smrg return nir_fast_normalize(nb, srcs[0]); 152b8e80941Smrg case Length: 153b8e80941Smrg return nir_length(nb, srcs[0]); 154b8e80941Smrg case Mad: 155b8e80941Smrg return nir_fmad(nb, srcs[0], srcs[1], srcs[2]); 156b8e80941Smrg case Maxmag: 157b8e80941Smrg return nir_maxmag(nb, srcs[0], srcs[1]); 158b8e80941Smrg case Minmag: 159b8e80941Smrg return nir_minmag(nb, srcs[0], srcs[1]); 160b8e80941Smrg case Nan: 161b8e80941Smrg return nir_nan(nb, srcs[0]); 162b8e80941Smrg case Nextafter: 163b8e80941Smrg return nir_nextafter(nb, srcs[0], srcs[1]); 164b8e80941Smrg case Normalize: 165b8e80941Smrg return nir_normalize(nb, srcs[0]); 166b8e80941Smrg case Radians: 167b8e80941Smrg return nir_radians(nb, srcs[0]); 168b8e80941Smrg case Rotate: 169b8e80941Smrg return nir_rotate(nb, srcs[0], srcs[1]); 170b8e80941Smrg case Smoothstep: 171b8e80941Smrg return nir_smoothstep(nb, srcs[0], srcs[1], srcs[2]); 172b8e80941Smrg case Select: 173b8e80941Smrg return nir_select(nb, srcs[0], srcs[1], srcs[2]); 174b8e80941Smrg case Step: 175b8e80941Smrg return nir_sge(nb, srcs[1], srcs[0]); 176b8e80941Smrg case S_Upsample: 177b8e80941Smrg case U_Upsample: 178b8e80941Smrg return nir_upsample(nb, srcs[0], srcs[1]); 179b8e80941Smrg default: 180b8e80941Smrg vtn_fail("No NIR equivalent"); 181b8e80941Smrg return NULL; 182b8e80941Smrg } 183b8e80941Smrg} 184b8e80941Smrg 185b8e80941Smrgstatic void 186b8e80941Smrg_handle_v_load_store(struct vtn_builder *b, enum OpenCLstd opcode, 187b8e80941Smrg const uint32_t *w, unsigned count, bool load) 188b8e80941Smrg{ 189b8e80941Smrg struct vtn_type *type; 190b8e80941Smrg if (load) 191b8e80941Smrg type = vtn_value(b, w[1], vtn_value_type_type)->type; 192b8e80941Smrg else 193b8e80941Smrg type = vtn_untyped_value(b, w[5])->type; 194b8e80941Smrg unsigned a = load ? 0 : 1; 195b8e80941Smrg 196b8e80941Smrg const struct glsl_type *dest_type = type->type; 197b8e80941Smrg unsigned components = glsl_get_vector_elements(dest_type); 198b8e80941Smrg unsigned stride = components * glsl_get_bit_size(dest_type) / 8; 199b8e80941Smrg 200b8e80941Smrg nir_ssa_def *offset = vtn_ssa_value(b, w[5 + a])->def; 201b8e80941Smrg struct vtn_value *p = vtn_value(b, w[6 + a], vtn_value_type_pointer); 202b8e80941Smrg 203b8e80941Smrg nir_deref_instr *deref = vtn_pointer_to_deref(b, p->pointer); 204b8e80941Smrg 205b8e80941Smrg /* 1. cast to vec type with adjusted stride */ 206b8e80941Smrg deref = nir_build_deref_cast(&b->nb, &deref->dest.ssa, deref->mode, 207b8e80941Smrg dest_type, stride); 208b8e80941Smrg /* 2. deref ptr_as_array */ 209b8e80941Smrg deref = nir_build_deref_ptr_as_array(&b->nb, deref, offset); 210b8e80941Smrg 211b8e80941Smrg if (load) { 212b8e80941Smrg struct vtn_ssa_value *val = vtn_local_load(b, deref, p->type->access); 213b8e80941Smrg vtn_push_ssa(b, w[2], type, val); 214b8e80941Smrg } else { 215b8e80941Smrg struct vtn_ssa_value *val = vtn_ssa_value(b, w[5]); 216b8e80941Smrg vtn_local_store(b, val, deref, p->type->access); 217b8e80941Smrg } 218b8e80941Smrg} 219b8e80941Smrg 220b8e80941Smrgstatic void 221b8e80941Smrgvtn_handle_opencl_vload(struct vtn_builder *b, enum OpenCLstd opcode, 222b8e80941Smrg const uint32_t *w, unsigned count) 223b8e80941Smrg{ 224b8e80941Smrg _handle_v_load_store(b, opcode, w, count, true); 225b8e80941Smrg} 226b8e80941Smrg 227b8e80941Smrgstatic void 228b8e80941Smrgvtn_handle_opencl_vstore(struct vtn_builder *b, enum OpenCLstd opcode, 229b8e80941Smrg const uint32_t *w, unsigned count) 230b8e80941Smrg{ 231b8e80941Smrg _handle_v_load_store(b, opcode, w, count, false); 232b8e80941Smrg} 233b8e80941Smrg 234b8e80941Smrgstatic nir_ssa_def * 235b8e80941Smrghandle_printf(struct vtn_builder *b, enum OpenCLstd opcode, unsigned num_srcs, 236b8e80941Smrg nir_ssa_def **srcs, const struct glsl_type *dest_type) 237b8e80941Smrg{ 238b8e80941Smrg /* hahah, yeah, right.. */ 239b8e80941Smrg return nir_imm_int(&b->nb, -1); 240b8e80941Smrg} 241b8e80941Smrg 242b8e80941Smrgbool 243b8e80941Smrgvtn_handle_opencl_instruction(struct vtn_builder *b, uint32_t ext_opcode, 244b8e80941Smrg const uint32_t *w, unsigned count) 245b8e80941Smrg{ 246b8e80941Smrg switch (ext_opcode) { 247b8e80941Smrg case Fabs: 248b8e80941Smrg case SAbs: 249b8e80941Smrg case UAbs: 250b8e80941Smrg case SAdd_sat: 251b8e80941Smrg case UAdd_sat: 252b8e80941Smrg case Ceil: 253b8e80941Smrg case Cos: 254b8e80941Smrg case Exp2: 255b8e80941Smrg case Log2: 256b8e80941Smrg case Floor: 257b8e80941Smrg case Fma: 258b8e80941Smrg case Fmax: 259b8e80941Smrg case SHadd: 260b8e80941Smrg case UHadd: 261b8e80941Smrg case SMax: 262b8e80941Smrg case UMax: 263b8e80941Smrg case Fmin: 264b8e80941Smrg case SMin: 265b8e80941Smrg case UMin: 266b8e80941Smrg case Mix: 267b8e80941Smrg case Fmod: 268b8e80941Smrg case SMul_hi: 269b8e80941Smrg case UMul_hi: 270b8e80941Smrg case Popcount: 271b8e80941Smrg case Pow: 272b8e80941Smrg case Remainder: 273b8e80941Smrg case SRhadd: 274b8e80941Smrg case URhadd: 275b8e80941Smrg case Rsqrt: 276b8e80941Smrg case Sign: 277b8e80941Smrg case Sin: 278b8e80941Smrg case Sqrt: 279b8e80941Smrg case SSub_sat: 280b8e80941Smrg case USub_sat: 281b8e80941Smrg case Trunc: 282b8e80941Smrg handle_instr(b, ext_opcode, w, count, handle_alu); 283b8e80941Smrg return true; 284b8e80941Smrg case SAbs_diff: 285b8e80941Smrg case UAbs_diff: 286b8e80941Smrg case Bitselect: 287b8e80941Smrg case FClamp: 288b8e80941Smrg case SClamp: 289b8e80941Smrg case UClamp: 290b8e80941Smrg case Copysign: 291b8e80941Smrg case Cross: 292b8e80941Smrg case Degrees: 293b8e80941Smrg case Fdim: 294b8e80941Smrg case Distance: 295b8e80941Smrg case Fast_distance: 296b8e80941Smrg case Fast_length: 297b8e80941Smrg case Fast_normalize: 298b8e80941Smrg case Length: 299b8e80941Smrg case Mad: 300b8e80941Smrg case Maxmag: 301b8e80941Smrg case Minmag: 302b8e80941Smrg case Nan: 303b8e80941Smrg case Nextafter: 304b8e80941Smrg case Normalize: 305b8e80941Smrg case Radians: 306b8e80941Smrg case Rotate: 307b8e80941Smrg case Select: 308b8e80941Smrg case Step: 309b8e80941Smrg case Smoothstep: 310b8e80941Smrg case S_Upsample: 311b8e80941Smrg case U_Upsample: 312b8e80941Smrg handle_instr(b, ext_opcode, w, count, handle_special); 313b8e80941Smrg return true; 314b8e80941Smrg case Vloadn: 315b8e80941Smrg vtn_handle_opencl_vload(b, ext_opcode, w, count); 316b8e80941Smrg return true; 317b8e80941Smrg case Vstoren: 318b8e80941Smrg vtn_handle_opencl_vstore(b, ext_opcode, w, count); 319b8e80941Smrg return true; 320b8e80941Smrg case Printf: 321b8e80941Smrg handle_instr(b, ext_opcode, w, count, handle_printf); 322b8e80941Smrg return true; 323b8e80941Smrg case Prefetch: 324b8e80941Smrg /* TODO maybe add a nir instruction for this? */ 325b8e80941Smrg return true; 326b8e80941Smrg default: 327b8e80941Smrg vtn_fail("unhandled opencl opc: %u\n", ext_opcode); 328b8e80941Smrg return false; 329b8e80941Smrg } 330b8e80941Smrg} 331