1848b8605Smrg/**************************************************************************
2848b8605Smrg *
3848b8605Smrg * Copyright © 2010 Luca Barbieri
4848b8605Smrg *
5848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a
6848b8605Smrg * copy of this software and associated documentation files (the "Software"),
7848b8605Smrg * to deal in the Software without restriction, including without limitation
8848b8605Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9848b8605Smrg * and/or sell copies of the Software, and to permit persons to whom the
10848b8605Smrg * Software is furnished to do so, subject to the following conditions:
11848b8605Smrg *
12848b8605Smrg * The above copyright notice and this permission notice (including the next
13848b8605Smrg * paragraph) shall be included in all copies or substantial portions of the
14848b8605Smrg * Software.
15848b8605Smrg *
16848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17848b8605Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19848b8605Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20848b8605Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21848b8605Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22848b8605Smrg * DEALINGS IN THE SOFTWARE.
23848b8605Smrg *
24848b8605Smrg **************************************************************************/
25848b8605Smrg
26848b8605Smrg#include <stdio.h>
27848b8605Smrg#include "translate/translate.h"
28848b8605Smrg#include "util/u_memory.h"
29848b8605Smrg#include "util/u_format.h"
30848b8605Smrg#include "util/u_half.h"
31848b8605Smrg#include "util/u_cpu_detect.h"
32848b8605Smrg#include "rtasm/rtasm_cpu.h"
33848b8605Smrg
34848b8605Smrg/* don't use this for serious use */
35848b8605Smrgstatic double rand_double()
36848b8605Smrg{
37848b8605Smrg   const double rm = (double)RAND_MAX + 1;
38848b8605Smrg   double div = 1;
39848b8605Smrg   double v = 0;
40848b8605Smrg   unsigned i;
41848b8605Smrg   for(i = 0; i < 4; ++i)
42848b8605Smrg   {
43848b8605Smrg      div *= rm;
44848b8605Smrg      v += (double)rand() / div;
45848b8605Smrg   }
46848b8605Smrg   return v;
47848b8605Smrg}
48848b8605Smrg
49848b8605Smrgint main(int argc, char** argv)
50848b8605Smrg{
51848b8605Smrg   struct translate *(*create_fn)(const struct translate_key *key) = 0;
52848b8605Smrg
53848b8605Smrg   struct translate_key key;
54848b8605Smrg   unsigned output_format;
55848b8605Smrg   unsigned input_format;
56848b8605Smrg   unsigned buffer_size = 4096;
57848b8605Smrg   unsigned char* buffer[5];
58848b8605Smrg   unsigned char* byte_buffer;
59848b8605Smrg   float* float_buffer;
60848b8605Smrg   double* double_buffer;
61848b8605Smrg   uint16_t *half_buffer;
62848b8605Smrg   unsigned * elts;
63848b8605Smrg   unsigned count = 4;
64848b8605Smrg   unsigned i, j, k;
65848b8605Smrg   unsigned passed = 0;
66848b8605Smrg   unsigned total = 0;
67848b8605Smrg   const float error = 0.03125;
68848b8605Smrg
69848b8605Smrg   create_fn = 0;
70848b8605Smrg
71848b8605Smrg   util_cpu_detect();
72848b8605Smrg
73b8e80941Smrg   if (argc <= 1 ||
74b8e80941Smrg       !strcmp(argv[1], "default") )
75b8e80941Smrg      create_fn = translate_create;
76848b8605Smrg   else if (!strcmp(argv[1], "generic"))
77848b8605Smrg      create_fn = translate_generic_create;
78848b8605Smrg   else if (!strcmp(argv[1], "x86"))
79848b8605Smrg      create_fn = translate_sse2_create;
80848b8605Smrg   else if (!strcmp(argv[1], "nosse"))
81848b8605Smrg   {
82848b8605Smrg      util_cpu_caps.has_sse = 0;
83848b8605Smrg      util_cpu_caps.has_sse2 = 0;
84848b8605Smrg      util_cpu_caps.has_sse3 = 0;
85848b8605Smrg      util_cpu_caps.has_sse4_1 = 0;
86848b8605Smrg      create_fn = translate_sse2_create;
87848b8605Smrg   }
88848b8605Smrg   else if (!strcmp(argv[1], "sse"))
89848b8605Smrg   {
90848b8605Smrg      if(!util_cpu_caps.has_sse || !rtasm_cpu_has_sse())
91848b8605Smrg      {
92848b8605Smrg         printf("Error: CPU doesn't support SSE (test with qemu)\n");
93848b8605Smrg         return 2;
94848b8605Smrg      }
95848b8605Smrg      util_cpu_caps.has_sse2 = 0;
96848b8605Smrg      util_cpu_caps.has_sse3 = 0;
97848b8605Smrg      util_cpu_caps.has_sse4_1 = 0;
98848b8605Smrg      create_fn = translate_sse2_create;
99848b8605Smrg   }
100848b8605Smrg   else if (!strcmp(argv[1], "sse2"))
101848b8605Smrg   {
102848b8605Smrg      if(!util_cpu_caps.has_sse2 || !rtasm_cpu_has_sse())
103848b8605Smrg      {
104848b8605Smrg         printf("Error: CPU doesn't support SSE2 (test with qemu)\n");
105848b8605Smrg         return 2;
106848b8605Smrg      }
107848b8605Smrg      util_cpu_caps.has_sse3 = 0;
108848b8605Smrg      util_cpu_caps.has_sse4_1 = 0;
109848b8605Smrg      create_fn = translate_sse2_create;
110848b8605Smrg   }
111848b8605Smrg   else if (!strcmp(argv[1], "sse3"))
112848b8605Smrg   {
113848b8605Smrg      if(!util_cpu_caps.has_sse3 || !rtasm_cpu_has_sse())
114848b8605Smrg      {
115848b8605Smrg         printf("Error: CPU doesn't support SSE3 (test with qemu)\n");
116848b8605Smrg         return 2;
117848b8605Smrg      }
118848b8605Smrg      util_cpu_caps.has_sse4_1 = 0;
119848b8605Smrg      create_fn = translate_sse2_create;
120848b8605Smrg   }
121848b8605Smrg   else if (!strcmp(argv[1], "sse4.1"))
122848b8605Smrg   {
123848b8605Smrg      if(!util_cpu_caps.has_sse4_1 || !rtasm_cpu_has_sse())
124848b8605Smrg      {
125848b8605Smrg         printf("Error: CPU doesn't support SSE4.1 (test with qemu)\n");
126848b8605Smrg         return 2;
127848b8605Smrg      }
128848b8605Smrg      create_fn = translate_sse2_create;
129848b8605Smrg   }
130848b8605Smrg
131848b8605Smrg   if (!create_fn)
132848b8605Smrg   {
133b8e80941Smrg      printf("Usage: ./translate_test [default|generic|x86|nosse|sse|sse2|sse3|sse4.1]\n");
134848b8605Smrg      return 2;
135848b8605Smrg   }
136848b8605Smrg
137b8e80941Smrg   for (i = 1; i < ARRAY_SIZE(buffer); ++i)
138848b8605Smrg      buffer[i] = align_malloc(buffer_size, 4096);
139848b8605Smrg
140848b8605Smrg   byte_buffer = align_malloc(buffer_size, 4096);
141848b8605Smrg   float_buffer = align_malloc(buffer_size, 4096);
142848b8605Smrg   double_buffer = align_malloc(buffer_size, 4096);
143848b8605Smrg   half_buffer = align_malloc(buffer_size, 4096);
144848b8605Smrg
145848b8605Smrg   elts = align_malloc(count * sizeof *elts, 4096);
146848b8605Smrg
147848b8605Smrg   key.nr_elements = 1;
148848b8605Smrg   key.element[0].input_buffer = 0;
149848b8605Smrg   key.element[0].input_offset = 0;
150848b8605Smrg   key.element[0].output_offset = 0;
151848b8605Smrg   key.element[0].type = TRANSLATE_ELEMENT_NORMAL;
152848b8605Smrg   key.element[0].instance_divisor = 0;
153848b8605Smrg
154848b8605Smrg   srand(4359025);
155848b8605Smrg
156848b8605Smrg   /* avoid negative values that work badly when converted to unsigned format*/
157848b8605Smrg   for (i = 0; i < buffer_size; ++i)
158848b8605Smrg      byte_buffer[i] = rand() & 0x7f7f7f7f;
159848b8605Smrg
160848b8605Smrg   for (i = 0; i < buffer_size / sizeof(float); ++i)
161848b8605Smrg      float_buffer[i] = (float)rand_double();
162848b8605Smrg
163848b8605Smrg   for (i = 0; i < buffer_size / sizeof(double); ++i)
164848b8605Smrg      double_buffer[i] = rand_double();
165848b8605Smrg
166848b8605Smrg   for (i = 0; i < buffer_size / sizeof(double); ++i)
167848b8605Smrg      half_buffer[i] = util_float_to_half((float) rand_double());
168848b8605Smrg
169848b8605Smrg   for (i = 0; i < count; ++i)
170848b8605Smrg      elts[i] = i;
171848b8605Smrg
172848b8605Smrg   for (output_format = 1; output_format < PIPE_FORMAT_COUNT; ++output_format)
173848b8605Smrg   {
174848b8605Smrg      const struct util_format_description* output_format_desc = util_format_description(output_format);
175848b8605Smrg      unsigned output_format_size;
176848b8605Smrg      unsigned output_normalized = 0;
177848b8605Smrg
178848b8605Smrg      if (!output_format_desc
179848b8605Smrg            || !output_format_desc->fetch_rgba_float
180848b8605Smrg            || !output_format_desc->pack_rgba_float
181848b8605Smrg            || output_format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB
182848b8605Smrg            || output_format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN
183848b8605Smrg            || !translate_is_output_format_supported(output_format))
184848b8605Smrg         continue;
185848b8605Smrg
186848b8605Smrg      for(i = 0; i < output_format_desc->nr_channels; ++i)
187848b8605Smrg      {
188848b8605Smrg         if(output_format_desc->channel[i].type != UTIL_FORMAT_TYPE_FLOAT)
189848b8605Smrg            output_normalized |= (1 << output_format_desc->channel[i].normalized);
190848b8605Smrg      }
191848b8605Smrg
192848b8605Smrg      output_format_size = util_format_get_stride(output_format, 1);
193848b8605Smrg
194848b8605Smrg      for (input_format = 1; input_format < PIPE_FORMAT_COUNT; ++input_format)
195848b8605Smrg      {
196848b8605Smrg         const struct util_format_description* input_format_desc = util_format_description(input_format);
197848b8605Smrg         unsigned input_format_size;
198848b8605Smrg         struct translate* translate[2];
199848b8605Smrg         unsigned fail = 0;
200848b8605Smrg         unsigned used_generic = 0;
201848b8605Smrg         unsigned input_normalized = 0;
202848b8605Smrg         boolean input_is_float = FALSE;
203848b8605Smrg
204848b8605Smrg         if (!input_format_desc
205848b8605Smrg               || !input_format_desc->fetch_rgba_float
206848b8605Smrg               || !input_format_desc->pack_rgba_float
207848b8605Smrg               || input_format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB
208848b8605Smrg               || input_format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN
209848b8605Smrg               || !translate_is_output_format_supported(input_format))
210848b8605Smrg            continue;
211848b8605Smrg
212848b8605Smrg         input_format_size = util_format_get_stride(input_format, 1);
213848b8605Smrg
214848b8605Smrg         for(i = 0; i < input_format_desc->nr_channels; ++i)
215848b8605Smrg         {
216848b8605Smrg            if(input_format_desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
217848b8605Smrg            {
218848b8605Smrg               input_is_float = 1;
219848b8605Smrg               input_normalized |= 1 << 1;
220848b8605Smrg            }
221848b8605Smrg            else
222848b8605Smrg               input_normalized |= (1 << input_format_desc->channel[i].normalized);
223848b8605Smrg         }
224848b8605Smrg
225848b8605Smrg         if(((input_normalized | output_normalized) == 3)
226848b8605Smrg               || ((input_normalized & 1) && (output_normalized & 1)
227848b8605Smrg                     && input_format_size * output_format_desc->nr_channels > output_format_size * input_format_desc->nr_channels))
228848b8605Smrg            continue;
229848b8605Smrg
230848b8605Smrg         key.element[0].input_format = input_format;
231848b8605Smrg         key.element[0].output_format = output_format;
232848b8605Smrg         key.output_stride = output_format_size;
233848b8605Smrg         translate[0] = create_fn(&key);
234848b8605Smrg         if (!translate[0])
235848b8605Smrg            continue;
236848b8605Smrg
237848b8605Smrg         key.element[0].input_format = output_format;
238848b8605Smrg         key.element[0].output_format = input_format;
239848b8605Smrg         key.output_stride = input_format_size;
240848b8605Smrg         translate[1] = create_fn(&key);
241848b8605Smrg         if(!translate[1])
242848b8605Smrg         {
243848b8605Smrg            used_generic = 1;
244848b8605Smrg            translate[1] = translate_generic_create(&key);
245848b8605Smrg            if(!translate[1])
246848b8605Smrg               continue;
247848b8605Smrg         }
248848b8605Smrg
249848b8605Smrg         for(i = 1; i < 5; ++i)
250848b8605Smrg            memset(buffer[i], 0xcd - (0x22 * i), 4096);
251848b8605Smrg
252848b8605Smrg         if(input_is_float && input_format_desc->channel[0].size == 32)
253848b8605Smrg            buffer[0] = (unsigned char*)float_buffer;
254848b8605Smrg         else if(input_is_float && input_format_desc->channel[0].size == 64)
255848b8605Smrg            buffer[0] = (unsigned char*)double_buffer;
256848b8605Smrg         else if(input_is_float && input_format_desc->channel[0].size == 16)
257848b8605Smrg            buffer[0] = (unsigned char*)half_buffer;
258848b8605Smrg         else if(input_is_float)
259848b8605Smrg            abort();
260848b8605Smrg         else
261848b8605Smrg            buffer[0] = byte_buffer;
262848b8605Smrg
263848b8605Smrg         translate[0]->set_buffer(translate[0], 0, buffer[0], input_format_size, count - 1);
264848b8605Smrg         translate[0]->run_elts(translate[0], elts, count, 0, 0, buffer[1]);
265848b8605Smrg         translate[1]->set_buffer(translate[1], 0, buffer[1], output_format_size, count - 1);
266848b8605Smrg         translate[1]->run_elts(translate[1], elts, count, 0, 0, buffer[2]);
267848b8605Smrg         translate[0]->set_buffer(translate[0], 0, buffer[2], input_format_size, count - 1);
268848b8605Smrg         translate[0]->run_elts(translate[0], elts, count, 0, 0, buffer[3]);
269848b8605Smrg         translate[1]->set_buffer(translate[1], 0, buffer[3], output_format_size, count - 1);
270848b8605Smrg         translate[1]->run_elts(translate[1], elts, count, 0, 0, buffer[4]);
271848b8605Smrg
272848b8605Smrg         for (i = 0; i < count; ++i)
273848b8605Smrg         {
274848b8605Smrg            float a[4];
275848b8605Smrg            float b[4];
276848b8605Smrg            input_format_desc->fetch_rgba_float(a, buffer[2] + i * input_format_size, 0, 0);
277848b8605Smrg            input_format_desc->fetch_rgba_float(b, buffer[4] + i * input_format_size, 0, 0);
278848b8605Smrg
279848b8605Smrg            for (j = 0; j < count; ++j)
280848b8605Smrg            {
281848b8605Smrg               float d = a[j] - b[j];
282848b8605Smrg               if (d > error || d < -error)
283848b8605Smrg               {
284848b8605Smrg                  fail = 1;
285848b8605Smrg                  break;
286848b8605Smrg               }
287848b8605Smrg            }
288848b8605Smrg         }
289848b8605Smrg
290848b8605Smrg         printf("%s%s: %s -> %s -> %s -> %s -> %s\n",
291848b8605Smrg               fail ? "FAIL" : "PASS",
292848b8605Smrg               used_generic ? "[GENERIC]" : "",
293848b8605Smrg               input_format_desc->name, output_format_desc->name, input_format_desc->name, output_format_desc->name, input_format_desc->name);
294848b8605Smrg
295848b8605Smrg         if (1)
296848b8605Smrg         {
297b8e80941Smrg            for (i = 0; i < ARRAY_SIZE(buffer); ++i)
298848b8605Smrg            {
299848b8605Smrg               unsigned format_size = (i & 1) ? output_format_size : input_format_size;
300848b8605Smrg               printf("%c ", (i == 2 || i == 4) ? '*' : ' ');
301848b8605Smrg               for (j = 0; j < count; ++j)
302848b8605Smrg               {
303848b8605Smrg                  for (k = 0; k < format_size; ++k)
304848b8605Smrg                  {
305848b8605Smrg                     printf("%02x", buffer[i][j * format_size + k]);
306848b8605Smrg                  }
307848b8605Smrg                  printf(" ");
308848b8605Smrg               }
309848b8605Smrg               printf("\n");
310848b8605Smrg            }
311848b8605Smrg         }
312848b8605Smrg
313848b8605Smrg         if (!fail)
314848b8605Smrg            ++passed;
315848b8605Smrg         ++total;
316848b8605Smrg
317848b8605Smrg         if(translate[1])
318848b8605Smrg            translate[1]->release(translate[1]);
319848b8605Smrg         translate[0]->release(translate[0]);
320848b8605Smrg      }
321848b8605Smrg   }
322848b8605Smrg
323848b8605Smrg   printf("%u/%u tests passed for translate_%s\n", passed, total, argv[1]);
324848b8605Smrg   return passed != total;
325848b8605Smrg}
326