1848b8605Smrg/************************************************************************** 2848b8605Smrg * 3848b8605Smrg * Copyright © 2010 Luca Barbieri 4848b8605Smrg * 5848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a 6848b8605Smrg * copy of this software and associated documentation files (the "Software"), 7848b8605Smrg * to deal in the Software without restriction, including without limitation 8848b8605Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9848b8605Smrg * and/or sell copies of the Software, and to permit persons to whom the 10848b8605Smrg * Software is furnished to do so, subject to the following conditions: 11848b8605Smrg * 12848b8605Smrg * The above copyright notice and this permission notice (including the next 13848b8605Smrg * paragraph) shall be included in all copies or substantial portions of the 14848b8605Smrg * Software. 15848b8605Smrg * 16848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17848b8605Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19848b8605Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20848b8605Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21848b8605Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22848b8605Smrg * DEALINGS IN THE SOFTWARE. 23848b8605Smrg * 24848b8605Smrg **************************************************************************/ 25848b8605Smrg 26848b8605Smrg#include <stdio.h> 27848b8605Smrg#include "translate/translate.h" 28848b8605Smrg#include "util/u_memory.h" 29848b8605Smrg#include "util/u_format.h" 30848b8605Smrg#include "util/u_half.h" 31848b8605Smrg#include "util/u_cpu_detect.h" 32848b8605Smrg#include "rtasm/rtasm_cpu.h" 33848b8605Smrg 34848b8605Smrg/* don't use this for serious use */ 35848b8605Smrgstatic double rand_double() 36848b8605Smrg{ 37848b8605Smrg const double rm = (double)RAND_MAX + 1; 38848b8605Smrg double div = 1; 39848b8605Smrg double v = 0; 40848b8605Smrg unsigned i; 41848b8605Smrg for(i = 0; i < 4; ++i) 42848b8605Smrg { 43848b8605Smrg div *= rm; 44848b8605Smrg v += (double)rand() / div; 45848b8605Smrg } 46848b8605Smrg return v; 47848b8605Smrg} 48848b8605Smrg 49848b8605Smrgint main(int argc, char** argv) 50848b8605Smrg{ 51848b8605Smrg struct translate *(*create_fn)(const struct translate_key *key) = 0; 52848b8605Smrg 53848b8605Smrg struct translate_key key; 54848b8605Smrg unsigned output_format; 55848b8605Smrg unsigned input_format; 56848b8605Smrg unsigned buffer_size = 4096; 57848b8605Smrg unsigned char* buffer[5]; 58848b8605Smrg unsigned char* byte_buffer; 59848b8605Smrg float* float_buffer; 60848b8605Smrg double* double_buffer; 61848b8605Smrg uint16_t *half_buffer; 62848b8605Smrg unsigned * elts; 63848b8605Smrg unsigned count = 4; 64848b8605Smrg unsigned i, j, k; 65848b8605Smrg unsigned passed = 0; 66848b8605Smrg unsigned total = 0; 67848b8605Smrg const float error = 0.03125; 68848b8605Smrg 69848b8605Smrg create_fn = 0; 70848b8605Smrg 71848b8605Smrg util_cpu_detect(); 72848b8605Smrg 73b8e80941Smrg if (argc <= 1 || 74b8e80941Smrg !strcmp(argv[1], "default") ) 75b8e80941Smrg create_fn = translate_create; 76848b8605Smrg else if (!strcmp(argv[1], "generic")) 77848b8605Smrg create_fn = translate_generic_create; 78848b8605Smrg else if (!strcmp(argv[1], "x86")) 79848b8605Smrg create_fn = translate_sse2_create; 80848b8605Smrg else if (!strcmp(argv[1], "nosse")) 81848b8605Smrg { 82848b8605Smrg util_cpu_caps.has_sse = 0; 83848b8605Smrg util_cpu_caps.has_sse2 = 0; 84848b8605Smrg util_cpu_caps.has_sse3 = 0; 85848b8605Smrg util_cpu_caps.has_sse4_1 = 0; 86848b8605Smrg create_fn = translate_sse2_create; 87848b8605Smrg } 88848b8605Smrg else if (!strcmp(argv[1], "sse")) 89848b8605Smrg { 90848b8605Smrg if(!util_cpu_caps.has_sse || !rtasm_cpu_has_sse()) 91848b8605Smrg { 92848b8605Smrg printf("Error: CPU doesn't support SSE (test with qemu)\n"); 93848b8605Smrg return 2; 94848b8605Smrg } 95848b8605Smrg util_cpu_caps.has_sse2 = 0; 96848b8605Smrg util_cpu_caps.has_sse3 = 0; 97848b8605Smrg util_cpu_caps.has_sse4_1 = 0; 98848b8605Smrg create_fn = translate_sse2_create; 99848b8605Smrg } 100848b8605Smrg else if (!strcmp(argv[1], "sse2")) 101848b8605Smrg { 102848b8605Smrg if(!util_cpu_caps.has_sse2 || !rtasm_cpu_has_sse()) 103848b8605Smrg { 104848b8605Smrg printf("Error: CPU doesn't support SSE2 (test with qemu)\n"); 105848b8605Smrg return 2; 106848b8605Smrg } 107848b8605Smrg util_cpu_caps.has_sse3 = 0; 108848b8605Smrg util_cpu_caps.has_sse4_1 = 0; 109848b8605Smrg create_fn = translate_sse2_create; 110848b8605Smrg } 111848b8605Smrg else if (!strcmp(argv[1], "sse3")) 112848b8605Smrg { 113848b8605Smrg if(!util_cpu_caps.has_sse3 || !rtasm_cpu_has_sse()) 114848b8605Smrg { 115848b8605Smrg printf("Error: CPU doesn't support SSE3 (test with qemu)\n"); 116848b8605Smrg return 2; 117848b8605Smrg } 118848b8605Smrg util_cpu_caps.has_sse4_1 = 0; 119848b8605Smrg create_fn = translate_sse2_create; 120848b8605Smrg } 121848b8605Smrg else if (!strcmp(argv[1], "sse4.1")) 122848b8605Smrg { 123848b8605Smrg if(!util_cpu_caps.has_sse4_1 || !rtasm_cpu_has_sse()) 124848b8605Smrg { 125848b8605Smrg printf("Error: CPU doesn't support SSE4.1 (test with qemu)\n"); 126848b8605Smrg return 2; 127848b8605Smrg } 128848b8605Smrg create_fn = translate_sse2_create; 129848b8605Smrg } 130848b8605Smrg 131848b8605Smrg if (!create_fn) 132848b8605Smrg { 133b8e80941Smrg printf("Usage: ./translate_test [default|generic|x86|nosse|sse|sse2|sse3|sse4.1]\n"); 134848b8605Smrg return 2; 135848b8605Smrg } 136848b8605Smrg 137b8e80941Smrg for (i = 1; i < ARRAY_SIZE(buffer); ++i) 138848b8605Smrg buffer[i] = align_malloc(buffer_size, 4096); 139848b8605Smrg 140848b8605Smrg byte_buffer = align_malloc(buffer_size, 4096); 141848b8605Smrg float_buffer = align_malloc(buffer_size, 4096); 142848b8605Smrg double_buffer = align_malloc(buffer_size, 4096); 143848b8605Smrg half_buffer = align_malloc(buffer_size, 4096); 144848b8605Smrg 145848b8605Smrg elts = align_malloc(count * sizeof *elts, 4096); 146848b8605Smrg 147848b8605Smrg key.nr_elements = 1; 148848b8605Smrg key.element[0].input_buffer = 0; 149848b8605Smrg key.element[0].input_offset = 0; 150848b8605Smrg key.element[0].output_offset = 0; 151848b8605Smrg key.element[0].type = TRANSLATE_ELEMENT_NORMAL; 152848b8605Smrg key.element[0].instance_divisor = 0; 153848b8605Smrg 154848b8605Smrg srand(4359025); 155848b8605Smrg 156848b8605Smrg /* avoid negative values that work badly when converted to unsigned format*/ 157848b8605Smrg for (i = 0; i < buffer_size; ++i) 158848b8605Smrg byte_buffer[i] = rand() & 0x7f7f7f7f; 159848b8605Smrg 160848b8605Smrg for (i = 0; i < buffer_size / sizeof(float); ++i) 161848b8605Smrg float_buffer[i] = (float)rand_double(); 162848b8605Smrg 163848b8605Smrg for (i = 0; i < buffer_size / sizeof(double); ++i) 164848b8605Smrg double_buffer[i] = rand_double(); 165848b8605Smrg 166848b8605Smrg for (i = 0; i < buffer_size / sizeof(double); ++i) 167848b8605Smrg half_buffer[i] = util_float_to_half((float) rand_double()); 168848b8605Smrg 169848b8605Smrg for (i = 0; i < count; ++i) 170848b8605Smrg elts[i] = i; 171848b8605Smrg 172848b8605Smrg for (output_format = 1; output_format < PIPE_FORMAT_COUNT; ++output_format) 173848b8605Smrg { 174848b8605Smrg const struct util_format_description* output_format_desc = util_format_description(output_format); 175848b8605Smrg unsigned output_format_size; 176848b8605Smrg unsigned output_normalized = 0; 177848b8605Smrg 178848b8605Smrg if (!output_format_desc 179848b8605Smrg || !output_format_desc->fetch_rgba_float 180848b8605Smrg || !output_format_desc->pack_rgba_float 181848b8605Smrg || output_format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB 182848b8605Smrg || output_format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN 183848b8605Smrg || !translate_is_output_format_supported(output_format)) 184848b8605Smrg continue; 185848b8605Smrg 186848b8605Smrg for(i = 0; i < output_format_desc->nr_channels; ++i) 187848b8605Smrg { 188848b8605Smrg if(output_format_desc->channel[i].type != UTIL_FORMAT_TYPE_FLOAT) 189848b8605Smrg output_normalized |= (1 << output_format_desc->channel[i].normalized); 190848b8605Smrg } 191848b8605Smrg 192848b8605Smrg output_format_size = util_format_get_stride(output_format, 1); 193848b8605Smrg 194848b8605Smrg for (input_format = 1; input_format < PIPE_FORMAT_COUNT; ++input_format) 195848b8605Smrg { 196848b8605Smrg const struct util_format_description* input_format_desc = util_format_description(input_format); 197848b8605Smrg unsigned input_format_size; 198848b8605Smrg struct translate* translate[2]; 199848b8605Smrg unsigned fail = 0; 200848b8605Smrg unsigned used_generic = 0; 201848b8605Smrg unsigned input_normalized = 0; 202848b8605Smrg boolean input_is_float = FALSE; 203848b8605Smrg 204848b8605Smrg if (!input_format_desc 205848b8605Smrg || !input_format_desc->fetch_rgba_float 206848b8605Smrg || !input_format_desc->pack_rgba_float 207848b8605Smrg || input_format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB 208848b8605Smrg || input_format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN 209848b8605Smrg || !translate_is_output_format_supported(input_format)) 210848b8605Smrg continue; 211848b8605Smrg 212848b8605Smrg input_format_size = util_format_get_stride(input_format, 1); 213848b8605Smrg 214848b8605Smrg for(i = 0; i < input_format_desc->nr_channels; ++i) 215848b8605Smrg { 216848b8605Smrg if(input_format_desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) 217848b8605Smrg { 218848b8605Smrg input_is_float = 1; 219848b8605Smrg input_normalized |= 1 << 1; 220848b8605Smrg } 221848b8605Smrg else 222848b8605Smrg input_normalized |= (1 << input_format_desc->channel[i].normalized); 223848b8605Smrg } 224848b8605Smrg 225848b8605Smrg if(((input_normalized | output_normalized) == 3) 226848b8605Smrg || ((input_normalized & 1) && (output_normalized & 1) 227848b8605Smrg && input_format_size * output_format_desc->nr_channels > output_format_size * input_format_desc->nr_channels)) 228848b8605Smrg continue; 229848b8605Smrg 230848b8605Smrg key.element[0].input_format = input_format; 231848b8605Smrg key.element[0].output_format = output_format; 232848b8605Smrg key.output_stride = output_format_size; 233848b8605Smrg translate[0] = create_fn(&key); 234848b8605Smrg if (!translate[0]) 235848b8605Smrg continue; 236848b8605Smrg 237848b8605Smrg key.element[0].input_format = output_format; 238848b8605Smrg key.element[0].output_format = input_format; 239848b8605Smrg key.output_stride = input_format_size; 240848b8605Smrg translate[1] = create_fn(&key); 241848b8605Smrg if(!translate[1]) 242848b8605Smrg { 243848b8605Smrg used_generic = 1; 244848b8605Smrg translate[1] = translate_generic_create(&key); 245848b8605Smrg if(!translate[1]) 246848b8605Smrg continue; 247848b8605Smrg } 248848b8605Smrg 249848b8605Smrg for(i = 1; i < 5; ++i) 250848b8605Smrg memset(buffer[i], 0xcd - (0x22 * i), 4096); 251848b8605Smrg 252848b8605Smrg if(input_is_float && input_format_desc->channel[0].size == 32) 253848b8605Smrg buffer[0] = (unsigned char*)float_buffer; 254848b8605Smrg else if(input_is_float && input_format_desc->channel[0].size == 64) 255848b8605Smrg buffer[0] = (unsigned char*)double_buffer; 256848b8605Smrg else if(input_is_float && input_format_desc->channel[0].size == 16) 257848b8605Smrg buffer[0] = (unsigned char*)half_buffer; 258848b8605Smrg else if(input_is_float) 259848b8605Smrg abort(); 260848b8605Smrg else 261848b8605Smrg buffer[0] = byte_buffer; 262848b8605Smrg 263848b8605Smrg translate[0]->set_buffer(translate[0], 0, buffer[0], input_format_size, count - 1); 264848b8605Smrg translate[0]->run_elts(translate[0], elts, count, 0, 0, buffer[1]); 265848b8605Smrg translate[1]->set_buffer(translate[1], 0, buffer[1], output_format_size, count - 1); 266848b8605Smrg translate[1]->run_elts(translate[1], elts, count, 0, 0, buffer[2]); 267848b8605Smrg translate[0]->set_buffer(translate[0], 0, buffer[2], input_format_size, count - 1); 268848b8605Smrg translate[0]->run_elts(translate[0], elts, count, 0, 0, buffer[3]); 269848b8605Smrg translate[1]->set_buffer(translate[1], 0, buffer[3], output_format_size, count - 1); 270848b8605Smrg translate[1]->run_elts(translate[1], elts, count, 0, 0, buffer[4]); 271848b8605Smrg 272848b8605Smrg for (i = 0; i < count; ++i) 273848b8605Smrg { 274848b8605Smrg float a[4]; 275848b8605Smrg float b[4]; 276848b8605Smrg input_format_desc->fetch_rgba_float(a, buffer[2] + i * input_format_size, 0, 0); 277848b8605Smrg input_format_desc->fetch_rgba_float(b, buffer[4] + i * input_format_size, 0, 0); 278848b8605Smrg 279848b8605Smrg for (j = 0; j < count; ++j) 280848b8605Smrg { 281848b8605Smrg float d = a[j] - b[j]; 282848b8605Smrg if (d > error || d < -error) 283848b8605Smrg { 284848b8605Smrg fail = 1; 285848b8605Smrg break; 286848b8605Smrg } 287848b8605Smrg } 288848b8605Smrg } 289848b8605Smrg 290848b8605Smrg printf("%s%s: %s -> %s -> %s -> %s -> %s\n", 291848b8605Smrg fail ? "FAIL" : "PASS", 292848b8605Smrg used_generic ? "[GENERIC]" : "", 293848b8605Smrg input_format_desc->name, output_format_desc->name, input_format_desc->name, output_format_desc->name, input_format_desc->name); 294848b8605Smrg 295848b8605Smrg if (1) 296848b8605Smrg { 297b8e80941Smrg for (i = 0; i < ARRAY_SIZE(buffer); ++i) 298848b8605Smrg { 299848b8605Smrg unsigned format_size = (i & 1) ? output_format_size : input_format_size; 300848b8605Smrg printf("%c ", (i == 2 || i == 4) ? '*' : ' '); 301848b8605Smrg for (j = 0; j < count; ++j) 302848b8605Smrg { 303848b8605Smrg for (k = 0; k < format_size; ++k) 304848b8605Smrg { 305848b8605Smrg printf("%02x", buffer[i][j * format_size + k]); 306848b8605Smrg } 307848b8605Smrg printf(" "); 308848b8605Smrg } 309848b8605Smrg printf("\n"); 310848b8605Smrg } 311848b8605Smrg } 312848b8605Smrg 313848b8605Smrg if (!fail) 314848b8605Smrg ++passed; 315848b8605Smrg ++total; 316848b8605Smrg 317848b8605Smrg if(translate[1]) 318848b8605Smrg translate[1]->release(translate[1]); 319848b8605Smrg translate[0]->release(translate[0]); 320848b8605Smrg } 321848b8605Smrg } 322848b8605Smrg 323848b8605Smrg printf("%u/%u tests passed for translate_%s\n", passed, total, argv[1]); 324848b8605Smrg return passed != total; 325848b8605Smrg} 326