1/************************************************************************** 2 * 3 * Copyright 2003 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "util/u_math.h" 29#include "i915_context.h" 30#include "i915_fpc.h" 31#include "i915_reg.h" 32 33uint32_t 34i915_get_temp(struct i915_fp_compile *p) 35{ 36 int bit = ffs(~p->temp_flag); 37 if (!bit) { 38 i915_program_error(p, "i915_get_temp: out of temporaries"); 39 return 0; 40 } 41 42 p->temp_flag |= 1 << (bit - 1); 43 return bit - 1; 44} 45 46static void 47i915_release_temp(struct i915_fp_compile *p, int reg) 48{ 49 p->temp_flag &= ~(1 << reg); 50} 51 52/** 53 * Get unpreserved temporary, a temp whose value is not preserved between 54 * PS program phases. 55 */ 56uint32_t 57i915_get_utemp(struct i915_fp_compile *p) 58{ 59 int bit = ffs(~p->utemp_flag); 60 if (!bit) { 61 i915_program_error(p, "i915_get_utemp: out of temporaries"); 62 return 0; 63 } 64 65 p->utemp_flag |= 1 << (bit - 1); 66 return UREG(REG_TYPE_U, (bit - 1)); 67} 68 69void 70i915_release_utemps(struct i915_fp_compile *p) 71{ 72 p->utemp_flag = ~0x7; 73} 74 75uint32_t 76i915_emit_decl(struct i915_fp_compile *p, uint32_t type, uint32_t nr, 77 uint32_t d0_flags) 78{ 79 uint32_t reg = UREG(type, nr); 80 81 if (type == REG_TYPE_T) { 82 if (p->decl_t & (1 << nr)) 83 return reg; 84 85 p->decl_t |= (1 << nr); 86 } else if (type == REG_TYPE_S) { 87 if (p->decl_s & (1 << nr)) 88 return reg; 89 90 p->decl_s |= (1 << nr); 91 } else 92 return reg; 93 94 if (p->decl < p->declarations + I915_PROGRAM_SIZE) { 95 *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags); 96 *(p->decl++) = D1_MBZ; 97 *(p->decl++) = D2_MBZ; 98 } else 99 i915_program_error(p, "Out of declarations"); 100 101 p->nr_decl_insn++; 102 return reg; 103} 104 105uint32_t 106i915_emit_arith(struct i915_fp_compile *p, uint32_t op, uint32_t dest, 107 uint32_t mask, uint32_t saturate, uint32_t src0, uint32_t src1, 108 uint32_t src2) 109{ 110 uint32_t c[3]; 111 uint32_t nr_const = 0; 112 113 assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); 114 dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)); 115 assert(dest); 116 117 if (GET_UREG_TYPE(src0) == REG_TYPE_CONST) 118 c[nr_const++] = 0; 119 if (GET_UREG_TYPE(src1) == REG_TYPE_CONST) 120 c[nr_const++] = 1; 121 if (GET_UREG_TYPE(src2) == REG_TYPE_CONST) 122 c[nr_const++] = 2; 123 124 /* Recursively call this function to MOV additional const values 125 * into temporary registers. Use utemp registers for this - 126 * currently shouldn't be possible to run out, but keep an eye on 127 * this. 128 */ 129 if (nr_const > 1) { 130 uint32_t s[3], first, i, old_utemp_flag; 131 132 s[0] = src0; 133 s[1] = src1; 134 s[2] = src2; 135 old_utemp_flag = p->utemp_flag; 136 137 first = GET_UREG_NR(s[c[0]]); 138 for (i = 1; i < nr_const; i++) { 139 if (GET_UREG_NR(s[c[i]]) != first) { 140 uint32_t tmp = i915_get_utemp(p); 141 142 i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0, s[c[i]], 0, 143 0); 144 s[c[i]] = tmp; 145 } 146 } 147 148 src0 = s[0]; 149 src1 = s[1]; 150 src2 = s[2]; 151 p->utemp_flag = old_utemp_flag; /* restore */ 152 } 153 154 if (p->csr < p->program + I915_PROGRAM_SIZE) { 155 *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0)); 156 *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1)); 157 *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2)); 158 } else 159 i915_program_error(p, "Out of instructions"); 160 161 if (GET_UREG_TYPE(dest) == REG_TYPE_R) 162 p->register_phases[GET_UREG_NR(dest)] = p->nr_tex_indirect; 163 164 p->nr_alu_insn++; 165 return dest; 166} 167 168/** 169 * Emit a texture load or texkill instruction. 170 * \param dest the dest i915 register 171 * \param destmask the dest register writemask 172 * \param sampler the i915 sampler register 173 * \param coord the i915 source texcoord operand 174 * \param opcode the instruction opcode 175 */ 176uint32_t 177i915_emit_texld(struct i915_fp_compile *p, uint32_t dest, uint32_t destmask, 178 uint32_t sampler, uint32_t coord, uint32_t opcode, 179 uint32_t num_coord) 180{ 181 const uint32_t k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord)); 182 183 int temp = -1; 184 uint32_t ignore = 0; 185 186 /* Eliminate the useless texture coordinates. Otherwise we end up generating 187 * a swizzle for no reason below. */ 188 switch (num_coord) { 189 case 1: 190 /* For 1D textures, make sure that the Y coordinate is actually 191 * initialized. It seems that if the channel is never written during the 192 * program, texturing returns undefined results (even if the Y wrap is 193 * REPEAT). 194 */ 195 coord = swizzle(coord, X, X, Z, W); 196 FALLTHROUGH; 197 case 2: 198 ignore |= (0xf << UREG_CHANNEL_Z_SHIFT); 199 FALLTHROUGH; 200 case 3: 201 ignore |= (0xf << UREG_CHANNEL_W_SHIFT); 202 } 203 204 if ((coord & ~ignore) != (k & ~ignore) || 205 GET_UREG_TYPE(coord) == REG_TYPE_CONST) { 206 /* texcoord is swizzled or negated. Need to allocate a new temporary 207 * register (a utemp / unpreserved temp) won't do. 208 */ 209 uint32_t tempReg; 210 211 temp = i915_get_temp(p); /* get temp reg index */ 212 tempReg = UREG(REG_TYPE_R, temp); /* make i915 register */ 213 214 i915_emit_arith(p, A0_MOV, tempReg, 215 A0_DEST_CHANNEL_ALL, /* dest reg, writemask */ 216 0, /* saturate */ 217 coord, 0, 0); /* src0, src1, src2 */ 218 219 /* new src texcoord is tempReg */ 220 coord = tempReg; 221 } 222 223 /* Don't worry about saturate as we only support 224 */ 225 if (destmask != A0_DEST_CHANNEL_ALL) { 226 /* if not writing to XYZW... */ 227 uint32_t tmp = i915_get_utemp(p); 228 i915_emit_texld(p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, opcode, 229 num_coord); 230 i915_emit_arith(p, A0_MOV, dest, destmask, 0, tmp, 0, 0); 231 /* XXX release utemp here? */ 232 } else { 233 assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); 234 assert(dest == UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest))); 235 236 /* Output register being oC or oD defines a phase boundary */ 237 if (GET_UREG_TYPE(dest) == REG_TYPE_OC || 238 GET_UREG_TYPE(dest) == REG_TYPE_OD) 239 p->nr_tex_indirect++; 240 241 /* Reading from an r# register whose contents depend on output of the 242 * current phase defines a phase boundary. 243 */ 244 if (GET_UREG_TYPE(coord) == REG_TYPE_R && 245 p->register_phases[GET_UREG_NR(coord)] == p->nr_tex_indirect) 246 p->nr_tex_indirect++; 247 248 if (p->csr < p->program + I915_PROGRAM_SIZE) { 249 *(p->csr++) = (opcode | T0_DEST(dest) | T0_SAMPLER(sampler)); 250 251 *(p->csr++) = T1_ADDRESS_REG(coord); 252 *(p->csr++) = T2_MBZ; 253 } else 254 i915_program_error(p, "Out of instructions"); 255 256 if (GET_UREG_TYPE(dest) == REG_TYPE_R) 257 p->register_phases[GET_UREG_NR(dest)] = p->nr_tex_indirect; 258 259 p->nr_tex_insn++; 260 } 261 262 if (temp >= 0) 263 i915_release_temp(p, temp); 264 265 return dest; 266} 267 268uint32_t 269i915_emit_const1f(struct i915_fp_compile *p, float c0) 270{ 271 struct i915_fragment_shader *ifs = p->shader; 272 unsigned reg, idx; 273 274 if (c0 == 0.0) 275 return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO); 276 if (c0 == 1.0) 277 return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE); 278 279 for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { 280 if (ifs->constant_flags[reg] == I915_CONSTFLAG_USER) 281 continue; 282 for (idx = 0; idx < 4; idx++) { 283 if (!(ifs->constant_flags[reg] & (1 << idx)) || 284 ifs->constants[reg][idx] == c0) { 285 ifs->constants[reg][idx] = c0; 286 ifs->constant_flags[reg] |= 1 << idx; 287 if (reg + 1 > ifs->num_constants) 288 ifs->num_constants = reg + 1; 289 return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE); 290 } 291 } 292 } 293 294 i915_program_error(p, "i915_emit_const1f: out of constants"); 295 return 0; 296} 297 298uint32_t 299i915_emit_const2f(struct i915_fp_compile *p, float c0, float c1) 300{ 301 struct i915_fragment_shader *ifs = p->shader; 302 unsigned reg, idx; 303 304 if (c0 == 0.0) 305 return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W); 306 if (c0 == 1.0) 307 return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W); 308 309 if (c1 == 0.0) 310 return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W); 311 if (c1 == 1.0) 312 return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W); 313 314 // XXX emit swizzle here for 0, 1, -1 and any combination thereof 315 // we can use swizzle + neg for that 316 for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { 317 if (ifs->constant_flags[reg] == 0xf || 318 ifs->constant_flags[reg] == I915_CONSTFLAG_USER) 319 continue; 320 for (idx = 0; idx < 3; idx++) { 321 if (!(ifs->constant_flags[reg] & (3 << idx))) { 322 ifs->constants[reg][idx + 0] = c0; 323 ifs->constants[reg][idx + 1] = c1; 324 ifs->constant_flags[reg] |= 3 << idx; 325 if (reg + 1 > ifs->num_constants) 326 ifs->num_constants = reg + 1; 327 return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE); 328 } 329 } 330 } 331 332 i915_program_error(p, "i915_emit_const2f: out of constants"); 333 return 0; 334} 335 336uint32_t 337i915_emit_const4f(struct i915_fp_compile *p, float c0, float c1, float c2, 338 float c3) 339{ 340 struct i915_fragment_shader *ifs = p->shader; 341 unsigned reg; 342 343 // XXX emit swizzle here for 0, 1, -1 and any combination thereof 344 // we can use swizzle + neg for that 345 for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { 346 if (ifs->constant_flags[reg] == 0xf && ifs->constants[reg][0] == c0 && 347 ifs->constants[reg][1] == c1 && ifs->constants[reg][2] == c2 && 348 ifs->constants[reg][3] == c3) { 349 return UREG(REG_TYPE_CONST, reg); 350 } else if (ifs->constant_flags[reg] == 0) { 351 352 ifs->constants[reg][0] = c0; 353 ifs->constants[reg][1] = c1; 354 ifs->constants[reg][2] = c2; 355 ifs->constants[reg][3] = c3; 356 ifs->constant_flags[reg] = 0xf; 357 if (reg + 1 > ifs->num_constants) 358 ifs->num_constants = reg + 1; 359 return UREG(REG_TYPE_CONST, reg); 360 } 361 } 362 363 i915_program_error(p, "i915_emit_const4f: out of constants"); 364 return 0; 365} 366 367uint32_t 368i915_emit_const4fv(struct i915_fp_compile *p, const float *c) 369{ 370 return i915_emit_const4f(p, c[0], c[1], c[2], c[3]); 371} 372