1/* 2 * Copyright (C) 2009 Nicolai Haehnle. 3 * 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sublicense, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial 16 * portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 */ 27 28#include "radeon_program_pair.h" 29 30#include "radeon_compiler.h" 31#include "radeon_compiler_util.h" 32 33#include "util/compiler.h" 34 35 36/** 37 * Finally rewrite ADD, MOV, MUL as the appropriate native instruction 38 * and reverse the order of arguments for CMP. 39 */ 40static void final_rewrite(struct rc_sub_instruction *inst) 41{ 42 struct rc_src_register tmp; 43 44 switch(inst->Opcode) { 45 case RC_OPCODE_ADD: 46 inst->SrcReg[2] = inst->SrcReg[1]; 47 inst->SrcReg[1].File = RC_FILE_NONE; 48 inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; 49 inst->SrcReg[1].Negate = RC_MASK_NONE; 50 inst->Opcode = RC_OPCODE_MAD; 51 break; 52 case RC_OPCODE_CMP: 53 tmp = inst->SrcReg[2]; 54 inst->SrcReg[2] = inst->SrcReg[0]; 55 inst->SrcReg[0] = tmp; 56 break; 57 case RC_OPCODE_MOV: 58 /* AMD say we should use CMP. 59 * However, when we transform 60 * KIL -r0; 61 * into 62 * CMP tmp, -r0, -r0, 0; 63 * KIL tmp; 64 * we get incorrect behaviour on R500 when r0 == 0.0. 65 * It appears that the R500 KIL hardware treats -0.0 as less 66 * than zero. 67 */ 68 inst->SrcReg[1].File = RC_FILE_NONE; 69 inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; 70 inst->SrcReg[2].File = RC_FILE_NONE; 71 inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; 72 inst->Opcode = RC_OPCODE_MAD; 73 break; 74 case RC_OPCODE_MUL: 75 inst->SrcReg[2].File = RC_FILE_NONE; 76 inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; 77 inst->Opcode = RC_OPCODE_MAD; 78 break; 79 default: 80 /* nothing to do */ 81 break; 82 } 83} 84 85 86/** 87 * Classify an instruction according to which ALUs etc. it needs 88 */ 89static void classify_instruction(struct rc_sub_instruction * inst, 90 int * needrgb, int * needalpha, int * istranscendent) 91{ 92 *needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0; 93 *needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0; 94 *istranscendent = 0; 95 96 if (inst->WriteALUResult == RC_ALURESULT_X) 97 *needrgb = 1; 98 else if (inst->WriteALUResult == RC_ALURESULT_W) 99 *needalpha = 1; 100 101 switch(inst->Opcode) { 102 case RC_OPCODE_ADD: 103 case RC_OPCODE_CMP: 104 case RC_OPCODE_CND: 105 case RC_OPCODE_DDX: 106 case RC_OPCODE_DDY: 107 case RC_OPCODE_FRC: 108 case RC_OPCODE_MAD: 109 case RC_OPCODE_MAX: 110 case RC_OPCODE_MIN: 111 case RC_OPCODE_MOV: 112 case RC_OPCODE_MUL: 113 break; 114 case RC_OPCODE_COS: 115 case RC_OPCODE_EX2: 116 case RC_OPCODE_LG2: 117 case RC_OPCODE_RCP: 118 case RC_OPCODE_RSQ: 119 case RC_OPCODE_SIN: 120 *istranscendent = 1; 121 *needalpha = 1; 122 break; 123 case RC_OPCODE_DP4: 124 *needalpha = 1; 125 FALLTHROUGH; 126 case RC_OPCODE_DP3: 127 *needrgb = 1; 128 break; 129 default: 130 break; 131 } 132} 133 134static void src_uses(struct rc_src_register src, unsigned int * rgb, 135 unsigned int * alpha) 136{ 137 int j; 138 for(j = 0; j < 4; ++j) { 139 unsigned int swz = GET_SWZ(src.Swizzle, j); 140 if (swz < 3) 141 *rgb = 1; 142 else if (swz < 4) 143 *alpha = 1; 144 } 145} 146 147/** 148 * Fill the given ALU instruction's opcodes and source operands into the given pair, 149 * if possible. 150 */ 151static void set_pair_instruction(struct r300_fragment_program_compiler *c, 152 struct rc_pair_instruction * pair, 153 struct rc_sub_instruction * inst) 154{ 155 int needrgb, needalpha, istranscendent; 156 const struct rc_opcode_info * opcode; 157 int i; 158 159 memset(pair, 0, sizeof(struct rc_pair_instruction)); 160 161 classify_instruction(inst, &needrgb, &needalpha, &istranscendent); 162 163 if (needrgb) { 164 if (istranscendent) 165 pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA; 166 else 167 pair->RGB.Opcode = inst->Opcode; 168 if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) 169 pair->RGB.Saturate = 1; 170 } 171 if (needalpha) { 172 pair->Alpha.Opcode = inst->Opcode; 173 if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) 174 pair->Alpha.Saturate = 1; 175 } 176 177 opcode = rc_get_opcode_info(inst->Opcode); 178 179 /* Presubtract handling: 180 * We need to make sure that the values used by the presubtract 181 * operation end up in src0 or src1. */ 182 if(inst->PreSub.Opcode != RC_PRESUB_NONE) { 183 /* rc_pair_alloc_source() will fill in data for 184 * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */ 185 int j; 186 for(j = 0; j < 3; j++) { 187 int src_regs; 188 if(inst->SrcReg[j].File != RC_FILE_PRESUB) 189 continue; 190 191 src_regs = rc_presubtract_src_reg_count( 192 inst->PreSub.Opcode); 193 for(i = 0; i < src_regs; i++) { 194 unsigned int rgb = 0; 195 unsigned int alpha = 0; 196 src_uses(inst->SrcReg[j], &rgb, &alpha); 197 if(rgb) { 198 pair->RGB.Src[i].File = 199 inst->PreSub.SrcReg[i].File; 200 pair->RGB.Src[i].Index = 201 inst->PreSub.SrcReg[i].Index; 202 pair->RGB.Src[i].Used = 1; 203 } 204 if(alpha) { 205 pair->Alpha.Src[i].File = 206 inst->PreSub.SrcReg[i].File; 207 pair->Alpha.Src[i].Index = 208 inst->PreSub.SrcReg[i].Index; 209 pair->Alpha.Src[i].Used = 1; 210 } 211 } 212 } 213 } 214 215 for(i = 0; i < opcode->NumSrcRegs; ++i) { 216 int source; 217 if (needrgb && !istranscendent) { 218 unsigned int srcrgb = 0; 219 unsigned int srcalpha = 0; 220 unsigned int srcmask = 0; 221 int j; 222 /* We don't care about the alpha channel here. We only 223 * want the part of the swizzle that writes to rgb, 224 * since we are creating an rgb instruction. */ 225 for(j = 0; j < 3; ++j) { 226 unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); 227 228 if (swz < RC_SWIZZLE_W) 229 srcrgb = 1; 230 else if (swz == RC_SWIZZLE_W) 231 srcalpha = 1; 232 233 if (swz < RC_SWIZZLE_UNUSED) 234 srcmask |= 1 << j; 235 } 236 source = rc_pair_alloc_source(pair, srcrgb, srcalpha, 237 inst->SrcReg[i].File, inst->SrcReg[i].Index); 238 if (source < 0) { 239 rc_error(&c->Base, "Failed to translate " 240 "rgb instruction.\n"); 241 return; 242 } 243 pair->RGB.Arg[i].Source = source; 244 pair->RGB.Arg[i].Swizzle = 245 rc_init_swizzle(inst->SrcReg[i].Swizzle, 3); 246 pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; 247 pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z)); 248 } 249 if (needalpha) { 250 unsigned int srcrgb = 0; 251 unsigned int srcalpha = 0; 252 unsigned int swz; 253 if (istranscendent) { 254 swz = rc_get_scalar_src_swz(inst->SrcReg[i].Swizzle); 255 } else { 256 swz = GET_SWZ(inst->SrcReg[i].Swizzle, 3); 257 } 258 259 if (swz < 3) 260 srcrgb = 1; 261 else if (swz < 4) 262 srcalpha = 1; 263 source = rc_pair_alloc_source(pair, srcrgb, srcalpha, 264 inst->SrcReg[i].File, inst->SrcReg[i].Index); 265 if (source < 0) { 266 rc_error(&c->Base, "Failed to translate " 267 "alpha instruction.\n"); 268 return; 269 } 270 pair->Alpha.Arg[i].Source = source; 271 pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1); 272 pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; 273 274 if (istranscendent) { 275 pair->Alpha.Arg[i].Negate = 276 !!(inst->SrcReg[i].Negate & 277 inst->DstReg.WriteMask); 278 } else { 279 pair->Alpha.Arg[i].Negate = 280 !!(inst->SrcReg[i].Negate & RC_MASK_W); 281 } 282 } 283 } 284 285 /* Destination handling */ 286 if (inst->DstReg.File == RC_FILE_OUTPUT) { 287 if (inst->DstReg.Index == c->OutputDepth) { 288 pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); 289 } else { 290 for (i = 0; i < 4; i++) { 291 if (inst->DstReg.Index == c->OutputColor[i]) { 292 pair->RGB.Target = i; 293 pair->Alpha.Target = i; 294 pair->RGB.OutputWriteMask |= 295 inst->DstReg.WriteMask & RC_MASK_XYZ; 296 pair->Alpha.OutputWriteMask |= 297 GET_BIT(inst->DstReg.WriteMask, 3); 298 break; 299 } 300 } 301 } 302 } else { 303 if (needrgb) { 304 pair->RGB.DestIndex = inst->DstReg.Index; 305 pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; 306 } 307 308 if (needalpha) { 309 pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3); 310 if (pair->Alpha.WriteMask) { 311 pair->Alpha.DestIndex = inst->DstReg.Index; 312 } 313 } 314 } 315 316 if (needrgb) { 317 pair->RGB.Omod = inst->Omod; 318 } 319 if (needalpha) { 320 pair->Alpha.Omod = inst->Omod; 321 } 322 323 if (inst->WriteALUResult) { 324 pair->WriteALUResult = inst->WriteALUResult; 325 pair->ALUResultCompare = inst->ALUResultCompare; 326 } 327} 328 329 330static void check_opcode_support(struct r300_fragment_program_compiler *c, 331 struct rc_sub_instruction *inst) 332{ 333 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); 334 335 if (opcode->HasDstReg) { 336 if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) { 337 rc_error(&c->Base, "Fragment program does not support signed Saturate.\n"); 338 return; 339 } 340 } 341 342 for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { 343 if (inst->SrcReg[i].RelAddr) { 344 rc_error(&c->Base, "Fragment program does not support relative addressing " 345 " of source operands.\n"); 346 return; 347 } 348 } 349} 350 351 352/** 353 * Translate all ALU instructions into corresponding pair instructions, 354 * performing no other changes. 355 */ 356void rc_pair_translate(struct radeon_compiler *cc, void *user) 357{ 358 struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; 359 360 for(struct rc_instruction * inst = c->Base.Program.Instructions.Next; 361 inst != &c->Base.Program.Instructions; 362 inst = inst->Next) { 363 const struct rc_opcode_info * opcode; 364 struct rc_sub_instruction copy; 365 366 if (inst->Type != RC_INSTRUCTION_NORMAL) 367 continue; 368 369 opcode = rc_get_opcode_info(inst->U.I.Opcode); 370 371 if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL) 372 continue; 373 374 copy = inst->U.I; 375 376 check_opcode_support(c, ©); 377 378 final_rewrite(©); 379 inst->Type = RC_INSTRUCTION_PAIR; 380 set_pair_instruction(c, &inst->U.P, ©); 381 } 382} 383