lowlevel-blt-bench.c revision 1b18d63a
1/*
2 * Copyright © 2009 Nokia Corporation
3 * Copyright © 2010 Movial Creative Technologies Oy
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <stdint.h>
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29
30#ifdef HAVE_CONFIG_H
31#include <config.h>
32#endif
33
34#include "pixman-private.h"
35#include "utils.h"
36
37#define SOLID_FLAG 1
38#define CA_FLAG    2
39
40#define L1CACHE_SIZE (8 * 1024)
41#define L2CACHE_SIZE (128 * 1024)
42
43#define WIDTH  1920
44#define HEIGHT 1080
45#define BUFSIZE (WIDTH * HEIGHT * 4)
46#define XWIDTH 256
47#define XHEIGHT 256
48#define TILEWIDTH 32
49#define TINYWIDTH 8
50
51#define EXCLUDE_OVERHEAD 1
52
53uint32_t *dst;
54uint32_t *src;
55uint32_t *mask;
56
57double bandwidth = 0;
58
59double
60bench_memcpy ()
61{
62    int64_t n = 0, total;
63    double  t1, t2;
64    int     x = 0;
65
66    t1 = gettime ();
67    while (1)
68    {
69	memcpy (dst, src, BUFSIZE - 64);
70	memcpy (src, dst, BUFSIZE - 64);
71	n += 4 * (BUFSIZE - 64);
72	t2 = gettime ();
73	if (t2 - t1 > 0.5)
74	    break;
75    }
76    n = total = n * 5;
77    t1 = gettime ();
78    while (n > 0)
79    {
80	if (++x >= 64)
81	    x = 0;
82	memcpy ((char *)dst + 1, (char *)src + x, BUFSIZE - 64);
83	memcpy ((char *)src + 1, (char *)dst + x, BUFSIZE - 64);
84	n -= 4 * (BUFSIZE - 64);
85    }
86    t2 = gettime ();
87    return (double)total / (t2 - t1);
88}
89
90static void
91pixman_image_composite_wrapper (pixman_implementation_t *impl,
92                                pixman_op_t              op,
93                                pixman_image_t *         src_image,
94                                pixman_image_t *         mask_image,
95                                pixman_image_t *         dst_image,
96                                int32_t                  src_x,
97                                int32_t                  src_y,
98                                int32_t                  mask_x,
99                                int32_t                  mask_y,
100                                int32_t                  dest_x,
101                                int32_t                  dest_y,
102                                int32_t                  width,
103                                int32_t                  height)
104{
105    pixman_image_composite (op, src_image, mask_image, dst_image, src_x,
106                            src_y, mask_x, mask_y, dest_x, dest_y, width, height);
107}
108
109static void
110pixman_image_composite_empty (pixman_implementation_t *impl,
111                              pixman_op_t              op,
112                              pixman_image_t *         src_image,
113                              pixman_image_t *         mask_image,
114                              pixman_image_t *         dst_image,
115                              int32_t                  src_x,
116                              int32_t                  src_y,
117                              int32_t                  mask_x,
118                              int32_t                  mask_y,
119                              int32_t                  dest_x,
120                              int32_t                  dest_y,
121                              int32_t                  width,
122                              int32_t                  height)
123{
124    pixman_image_composite (op, src_image, mask_image, dst_image, 0,
125                            0, 0, 0, 0, 0, 1, 1);
126}
127
128void
129noinline
130bench_L  (pixman_op_t              op,
131          pixman_image_t *         src_img,
132          pixman_image_t *         mask_img,
133          pixman_image_t *         dst_img,
134          int64_t                  n,
135          pixman_composite_func_t  func,
136          int                      width,
137          int                      lines_count)
138{
139    int64_t      i, j;
140    int          x = 0;
141    int          q = 0;
142    volatile int qx;
143
144    for (i = 0; i < n; i++)
145    {
146	/* touch destination buffer to fetch it into L1 cache */
147	for (j = 0; j < width + 64; j += 16) {
148	    q += dst[j];
149	    q += src[j];
150	}
151	if (++x >= 64)
152	    x = 0;
153	func (0, op, src_img, mask_img, dst_img, x, 0, x, 0, 63 - x, 0, width, lines_count);
154    }
155    qx = q;
156}
157
158void
159noinline
160bench_M (pixman_op_t              op,
161         pixman_image_t *         src_img,
162         pixman_image_t *         mask_img,
163         pixman_image_t *         dst_img,
164         int64_t                  n,
165         pixman_composite_func_t  func)
166{
167    int64_t i;
168    int     x = 0;
169
170    for (i = 0; i < n; i++)
171    {
172	if (++x >= 64)
173	    x = 0;
174	func (0, op, src_img, mask_img, dst_img, x, 0, x, 0, 1, 0, WIDTH - 64, HEIGHT);
175    }
176}
177
178double
179noinline
180bench_HT (pixman_op_t              op,
181          pixman_image_t *         src_img,
182          pixman_image_t *         mask_img,
183          pixman_image_t *         dst_img,
184          int64_t                  n,
185          pixman_composite_func_t  func)
186{
187    double  pix_cnt = 0;
188    int     x = 0;
189    int     y = 0;
190    int64_t i;
191
192    srand (0);
193    for (i = 0; i < n; i++)
194    {
195	int w = (rand () % (TILEWIDTH * 2)) + 1;
196	int h = (rand () % (TILEWIDTH * 2)) + 1;
197	if (x + w > WIDTH)
198	{
199	    x = 0;
200	    y += TILEWIDTH * 2;
201	}
202	if (y + h > HEIGHT)
203	{
204	    y = 0;
205	}
206	func (0, op, src_img, mask_img, dst_img, x, y, x, y, x, y, w, h);
207	x += w;
208	pix_cnt += w * h;
209    }
210    return pix_cnt;
211}
212
213double
214noinline
215bench_VT (pixman_op_t              op,
216          pixman_image_t *         src_img,
217          pixman_image_t *         mask_img,
218          pixman_image_t *         dst_img,
219          int64_t                  n,
220          pixman_composite_func_t  func)
221{
222    double  pix_cnt = 0;
223    int     x = 0;
224    int     y = 0;
225    int64_t i;
226
227    srand (0);
228    for (i = 0; i < n; i++)
229    {
230	int w = (rand () % (TILEWIDTH * 2)) + 1;
231	int h = (rand () % (TILEWIDTH * 2)) + 1;
232	if (y + h > HEIGHT)
233	{
234	    y = 0;
235	    x += TILEWIDTH * 2;
236	}
237	if (x + w > WIDTH)
238	{
239	    x = 0;
240	}
241	func (0, op, src_img, mask_img, dst_img, x, y, x, y, x, y, w, h);
242	y += h;
243	pix_cnt += w * h;
244    }
245    return pix_cnt;
246}
247
248double
249noinline
250bench_R (pixman_op_t              op,
251         pixman_image_t *         src_img,
252         pixman_image_t *         mask_img,
253         pixman_image_t *         dst_img,
254         int64_t                  n,
255         pixman_composite_func_t  func,
256         int                      maxw,
257         int                      maxh)
258{
259    double  pix_cnt = 0;
260    int64_t i;
261
262    if (maxw <= TILEWIDTH * 2 || maxh <= TILEWIDTH * 2)
263    {
264	printf("error: maxw <= TILEWIDTH * 2 || maxh <= TILEWIDTH * 2\n");
265        return 0;
266    }
267
268    srand (0);
269    for (i = 0; i < n; i++)
270    {
271	int w = (rand () % (TILEWIDTH * 2)) + 1;
272	int h = (rand () % (TILEWIDTH * 2)) + 1;
273	int sx = rand () % (maxw - TILEWIDTH * 2);
274	int sy = rand () % (maxh - TILEWIDTH * 2);
275	int dx = rand () % (maxw - TILEWIDTH * 2);
276	int dy = rand () % (maxh - TILEWIDTH * 2);
277	func (0, op, src_img, mask_img, dst_img, sx, sy, sx, sy, dx, dy, w, h);
278	pix_cnt += w * h;
279    }
280    return pix_cnt;
281}
282
283double
284noinline
285bench_RT (pixman_op_t              op,
286          pixman_image_t *         src_img,
287          pixman_image_t *         mask_img,
288          pixman_image_t *         dst_img,
289          int64_t                  n,
290          pixman_composite_func_t  func,
291          int                      maxw,
292          int                      maxh)
293{
294    double  pix_cnt = 0;
295    int64_t i;
296
297    if (maxw <= TINYWIDTH * 2 || maxh <= TINYWIDTH * 2)
298    {
299	printf("error: maxw <= TINYWIDTH * 2 || maxh <= TINYWIDTH * 2\n");
300        return 0;
301    }
302
303    srand (0);
304    for (i = 0; i < n; i++)
305    {
306	int w = (rand () % (TINYWIDTH * 2)) + 1;
307	int h = (rand () % (TINYWIDTH * 2)) + 1;
308	int sx = rand () % (maxw - TINYWIDTH * 2);
309	int sy = rand () % (maxh - TINYWIDTH * 2);
310	int dx = rand () % (maxw - TINYWIDTH * 2);
311	int dy = rand () % (maxh - TINYWIDTH * 2);
312	func (0, op, src_img, mask_img, dst_img, sx, sy, sx, sy, dx, dy, w, h);
313	pix_cnt += w * h;
314    }
315    return pix_cnt;
316}
317
318void
319bench_composite (char * testname,
320                 int    src_fmt,
321                 int    src_flags,
322                 int    op,
323                 int    mask_fmt,
324                 int    mask_flags,
325                 int    dst_fmt,
326                 double npix)
327{
328    pixman_image_t *                src_img;
329    pixman_image_t *                dst_img;
330    pixman_image_t *                mask_img;
331    pixman_image_t *                xsrc_img;
332    pixman_image_t *                xdst_img;
333    pixman_image_t *                xmask_img;
334    double                          t1, t2, t3, pix_cnt;
335    int64_t                         n, l1test_width, nlines;
336    double                             bytes_per_pix = 0;
337
338    pixman_composite_func_t func = pixman_image_composite_wrapper;
339
340    if (!(src_flags & SOLID_FLAG))
341    {
342        bytes_per_pix += (src_fmt >> 24) / 8.0;
343        src_img = pixman_image_create_bits (src_fmt,
344                                            WIDTH, HEIGHT,
345                                            src,
346                                            WIDTH * 4);
347        xsrc_img = pixman_image_create_bits (src_fmt,
348                                             XWIDTH, XHEIGHT,
349                                             src,
350                                             XWIDTH * 4);
351    }
352    else
353    {
354        src_img = pixman_image_create_bits (src_fmt,
355                                            1, 1,
356                                            src,
357                                            4);
358        xsrc_img = pixman_image_create_bits (src_fmt,
359                                             1, 1,
360                                             src,
361                                             4);
362        pixman_image_set_repeat (src_img, PIXMAN_REPEAT_NORMAL);
363        pixman_image_set_repeat (xsrc_img, PIXMAN_REPEAT_NORMAL);
364    }
365
366    bytes_per_pix += (dst_fmt >> 24) / 8.0;
367    dst_img = pixman_image_create_bits (dst_fmt,
368                                        WIDTH, HEIGHT,
369                                        dst,
370                                        WIDTH * 4);
371
372    mask_img = NULL;
373    xmask_img = NULL;
374    if (!(mask_flags & SOLID_FLAG) && mask_fmt != PIXMAN_null)
375    {
376        bytes_per_pix += (mask_fmt >> 24) / ((op == PIXMAN_OP_SRC) ? 8.0 : 4.0);
377        mask_img = pixman_image_create_bits (mask_fmt,
378                                             WIDTH, HEIGHT,
379                                             mask,
380                                             WIDTH * 4);
381        xmask_img = pixman_image_create_bits (mask_fmt,
382                                             XWIDTH, XHEIGHT,
383                                             mask,
384                                             XWIDTH * 4);
385    }
386    else if (mask_fmt != PIXMAN_null)
387    {
388        mask_img = pixman_image_create_bits (mask_fmt,
389                                             1, 1,
390                                             mask,
391                                             4);
392        xmask_img = pixman_image_create_bits (mask_fmt,
393                                             1, 1,
394                                             mask,
395                                             4 * 4);
396       pixman_image_set_repeat (mask_img, PIXMAN_REPEAT_NORMAL);
397       pixman_image_set_repeat (xmask_img, PIXMAN_REPEAT_NORMAL);
398    }
399    if ((mask_flags & CA_FLAG) && mask_fmt != PIXMAN_null)
400    {
401       pixman_image_set_component_alpha (mask_img, 1);
402    }
403    xdst_img = pixman_image_create_bits (dst_fmt,
404                                         XWIDTH, XHEIGHT,
405                                         dst,
406                                         XWIDTH * 4);
407
408
409    printf ("%24s %c", testname, func != pixman_image_composite_wrapper ?
410            '-' : '=');
411
412    memcpy (src, dst, BUFSIZE);
413    memcpy (dst, src, BUFSIZE);
414
415    l1test_width = L1CACHE_SIZE / 8 - 64;
416    if (l1test_width < 1)
417	l1test_width = 1;
418    if (l1test_width > WIDTH - 64)
419	l1test_width = WIDTH - 64;
420    n = 1 + npix / (l1test_width * 8);
421    t1 = gettime ();
422#if EXCLUDE_OVERHEAD
423    bench_L (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, l1test_width, 1);
424#endif
425    t2 = gettime ();
426    bench_L (op, src_img, mask_img, dst_img, n, func, l1test_width, 1);
427    t3 = gettime ();
428    printf ("  L1:%7.2f", (double)n * l1test_width * 1 /
429            ((t3 - t2) - (t2 - t1)) / 1000000.);
430    fflush (stdout);
431
432    memcpy (src, dst, BUFSIZE);
433    memcpy (dst, src, BUFSIZE);
434
435    nlines = (L2CACHE_SIZE / l1test_width) /
436	((PIXMAN_FORMAT_BPP(src_fmt) + PIXMAN_FORMAT_BPP(dst_fmt)) / 8);
437    if (nlines < 1)
438	nlines = 1;
439    n = 1 + npix / (l1test_width * nlines);
440    t1 = gettime ();
441#if EXCLUDE_OVERHEAD
442    bench_L (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, l1test_width, nlines);
443#endif
444    t2 = gettime ();
445    bench_L (op, src_img, mask_img, dst_img, n, func, l1test_width, nlines);
446    t3 = gettime ();
447    printf ("  L2:%7.2f", (double)n * l1test_width * nlines /
448            ((t3 - t2) - (t2 - t1)) / 1000000.);
449    fflush (stdout);
450
451    memcpy (src, dst, BUFSIZE);
452    memcpy (dst, src, BUFSIZE);
453
454    n = 1 + npix / (WIDTH * HEIGHT);
455    t1 = gettime ();
456#if EXCLUDE_OVERHEAD
457    bench_M (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty);
458#endif
459    t2 = gettime ();
460    bench_M (op, src_img, mask_img, dst_img, n, func);
461    t3 = gettime ();
462    printf ("  M:%6.2f (%6.2f%%)",
463        ((double)n * (WIDTH - 64) * HEIGHT / ((t3 - t2) - (t2 - t1))) / 1000000.,
464        ((double)n * (WIDTH - 64) * HEIGHT / ((t3 - t2) - (t2 - t1)) * bytes_per_pix) * (100.0 / bandwidth) );
465    fflush (stdout);
466
467    memcpy (src, dst, BUFSIZE);
468    memcpy (dst, src, BUFSIZE);
469
470    n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
471    t1 = gettime ();
472#if EXCLUDE_OVERHEAD
473    pix_cnt = bench_HT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty);
474#endif
475    t2 = gettime ();
476    pix_cnt = bench_HT (op, src_img, mask_img, dst_img, n, func);
477    t3 = gettime ();
478    printf ("  HT:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.);
479    fflush (stdout);
480
481    memcpy (src, dst, BUFSIZE);
482    memcpy (dst, src, BUFSIZE);
483
484    n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
485    t1 = gettime ();
486#if EXCLUDE_OVERHEAD
487    pix_cnt = bench_VT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty);
488#endif
489    t2 = gettime ();
490    pix_cnt = bench_VT (op, src_img, mask_img, dst_img, n, func);
491    t3 = gettime ();
492    printf ("  VT:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.);
493    fflush (stdout);
494
495    memcpy (src, dst, BUFSIZE);
496    memcpy (dst, src, BUFSIZE);
497
498    n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
499    t1 = gettime ();
500#if EXCLUDE_OVERHEAD
501    pix_cnt = bench_R (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, WIDTH, HEIGHT);
502#endif
503    t2 = gettime ();
504    pix_cnt = bench_R (op, src_img, mask_img, dst_img, n, func, WIDTH, HEIGHT);
505    t3 = gettime ();
506    printf ("  R:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.);
507    fflush (stdout);
508
509    memcpy (src, dst, BUFSIZE);
510    memcpy (dst, src, BUFSIZE);
511
512    n = 1 + npix / (16 * TINYWIDTH * TINYWIDTH);
513    t1 = gettime ();
514#if EXCLUDE_OVERHEAD
515    pix_cnt = bench_RT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, WIDTH, HEIGHT);
516#endif
517    t2 = gettime ();
518    pix_cnt = bench_RT (op, src_img, mask_img, dst_img, n, func, WIDTH, HEIGHT);
519    t3 = gettime ();
520    printf ("  RT:%6.2f (%4.0fKops/s)\n", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000., (double) n / ((t3 - t2) * 1000));
521
522    if (mask_img) {
523	pixman_image_unref (mask_img);
524	pixman_image_unref (xmask_img);
525    }
526    pixman_image_unref (src_img);
527    pixman_image_unref (dst_img);
528    pixman_image_unref (xsrc_img);
529    pixman_image_unref (xdst_img);
530}
531
532#define PIXMAN_OP_OUT_REV (PIXMAN_OP_OUT_REVERSE)
533
534struct
535{
536    char *testname;
537    int   src_fmt;
538    int   src_flags;
539    int   op;
540    int   mask_fmt;
541    int   mask_flags;
542    int   dst_fmt;
543}
544tests_tbl[] =
545{
546    { "add_8_8_8",             PIXMAN_a8,          0, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a8 },
547    { "add_n_8_8",             PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a8 },
548    { "add_n_8_8888",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
549    { "add_n_8_x888",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
550    { "add_n_8_0565",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
551    { "add_n_8_1555",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },
552    { "add_n_8_4444",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a4r4g4b4 },
553    { "add_n_8_2222",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a2r2g2b2 },
554    { "add_n_8_2x10",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_x2r10g10b10 },
555    { "add_n_8_2a10",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a2r10g10b10 },
556    { "add_n_8",               PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a8 },
557    { "add_n_8888",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
558    { "add_n_x888",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
559    { "add_n_0565",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
560    { "add_n_1555",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
561    { "add_n_4444",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a4r4g4b4 },
562    { "add_n_2222",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a2r2g2b2 },
563    { "add_n_2x10",            PIXMAN_a2r10g10b10, 1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_x2r10g10b10 },
564    { "add_n_2a10",            PIXMAN_a2r10g10b10, 1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a2r10g10b10 },
565    { "add_8_8",               PIXMAN_a8,          0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a8 },
566    { "add_x888_x888",         PIXMAN_x8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
567    { "add_8888_8888",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
568    { "add_8888_0565",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
569    { "add_8888_1555",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
570    { "add_8888_4444",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a4r4g4b4 },
571    { "add_8888_2222",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a2r2g2b2 },
572    { "add_0565_0565",         PIXMAN_r5g6b5,      0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
573    { "add_1555_1555",         PIXMAN_a1r5g5b5,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
574    { "add_0565_2x10",         PIXMAN_r5g6b5,      0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_x2r10g10b10 },
575    { "add_2a10_2a10",         PIXMAN_a2r10g10b10, 0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a2r10g10b10 },
576    { "src_n_2222",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a2r2g2b2 },
577    { "src_n_0565",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
578    { "src_n_1555",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
579    { "src_n_4444",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a4r4g4b4 },
580    { "src_n_x888",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
581    { "src_n_8888",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
582    { "src_n_2x10",            PIXMAN_a2r10g10b10, 1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x2r10g10b10 },
583    { "src_n_2a10",            PIXMAN_a2r10g10b10, 1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a2r10g10b10 },
584    { "src_8888_0565",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
585    { "src_8888_4444",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a4r4g4b4 },
586    { "src_8888_2222",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a2r2g2b2 },
587    { "src_8888_2x10",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x2r10g10b10 },
588    { "src_8888_2a10",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a2r10g10b10 },
589    { "src_0888_0565",         PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
590    { "src_0888_8888",         PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
591    { "src_0888_x888",         PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
592    { "src_x888_x888",         PIXMAN_x8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
593    { "src_x888_8888",         PIXMAN_x8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
594    { "src_8888_8888",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
595    { "src_0565_0565",         PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
596    { "src_1555_0565",         PIXMAN_a1r5g5b5,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
597    { "src_0565_1555",         PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
598    { "src_n_8_0565",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
599    { "src_n_8_1555",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },
600    { "src_n_8_4444",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a4r4g4b4 },
601    { "src_n_8_2222",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a2r2g2b2 },
602    { "src_n_8_x888",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
603    { "src_n_8_8888",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
604    { "src_n_8_2x10",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_x2r10g10b10 },
605    { "src_n_8_2a10",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a2r10g10b10 },
606    { "src_8888_8_0565",       PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
607    { "src_0888_8_0565",       PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
608    { "src_0888_8_8888",       PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
609    { "src_0888_8_x888",       PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
610    { "src_x888_8_x888",       PIXMAN_x8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
611    { "src_x888_8_8888",       PIXMAN_x8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
612    { "src_0565_8_0565",       PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
613    { "src_1555_8_0565",       PIXMAN_a1r5g5b5,    0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
614    { "src_0565_8_1555",       PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },
615    { "over_n_x888",           PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
616    { "over_n_8888",           PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
617    { "over_n_0565",           PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_r5g6b5 },
618    { "over_n_1555",           PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
619    { "over_8888_0565",        PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_r5g6b5 },
620    { "over_8888_x888",        PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
621    { "over_n_8_0565",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_r5g6b5 },
622    { "over_n_8_1555",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },
623    { "over_n_8_4444",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a4r4g4b4 },
624    { "over_n_8_2222",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a2r2g2b2 },
625    { "over_n_8_x888",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
626    { "over_n_8_8888",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
627    { "over_n_8_2x10",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_x2r10g10b10 },
628    { "over_n_8_2a10",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a2r10g10b10 },
629    { "over_n_8888_8888_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_a8r8g8b8 },
630    { "over_n_8888_x888_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_x8r8g8b8 },
631    { "over_n_8888_0565_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_r5g6b5 },
632    { "over_n_8888_1555_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_a1r5g5b5 },
633    { "over_n_8888_4444_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_a4r4g4b4 },
634    { "over_n_8888_2222_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_a2r2g2b2 },
635    { "over_n_8888_2x10_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_x2r10g10b10 },
636    { "over_n_8888_2a10_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_a2r10g10b10 },
637    { "over_8888_n_8888",      PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       1, PIXMAN_a8r8g8b8 },
638    { "over_8888_n_x888",      PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       1, PIXMAN_x8r8g8b8 },
639    { "over_8888_n_0565",      PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       1, PIXMAN_r5g6b5 },
640    { "over_8888_n_1555",      PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       1, PIXMAN_a1r5g5b5 },
641    { "outrev_n_8_0565",       PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8,       0, PIXMAN_r5g6b5 },
642    { "outrev_n_8_1555",       PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },
643    { "outrev_n_8_x888",       PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
644    { "outrev_n_8_8888",       PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
645    { "outrev_n_8888_0565_ca", PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_r5g6b5 },
646    { "outrev_n_8888_1555_ca", PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_a1r5g5b5 },
647    { "outrev_n_8888_x888_ca", PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_x8r8g8b8 },
648    { "outrev_n_8888_8888_ca", PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_a8r8g8b8 },
649};
650
651int
652main (int argc, char *argv[])
653{
654    double x;
655    int i;
656    char *pattern = argc > 1 ? argv[1] : "all";
657
658    src = aligned_malloc (4096, BUFSIZE * 3);
659    memset (src, 0xCC, BUFSIZE * 3);
660    dst = src + (BUFSIZE / 4);
661    mask = dst + (BUFSIZE / 4);
662
663    printf ("Benchmark for a set of most commonly used functions\n");
664    printf ("---\n");
665    printf ("All results are presented in millions of pixels per second\n");
666    printf ("L1  - small Xx1 rectangle (fitting L1 cache), always blitted at the same\n");
667    printf ("      memory location with small drift in horizontal direction\n");
668    printf ("L2  - small XxY rectangle (fitting L2 cache), always blitted at the same\n");
669    printf ("      memory location with small drift in horizontal direction\n");
670    printf ("M   - large %dx%d rectangle, always blitted at the same\n",
671            WIDTH - 64, HEIGHT);
672    printf ("      memory location with small drift in horizontal direction\n");
673    printf ("HT  - random rectangles with %dx%d average size are copied from\n",
674            TILEWIDTH, TILEWIDTH);
675    printf ("      one %dx%d buffer to another, traversing from left to right\n",
676            WIDTH, HEIGHT);
677    printf ("      and from top to bottom\n");
678    printf ("VT  - random rectangles with %dx%d average size are copied from\n",
679            TILEWIDTH, TILEWIDTH);
680    printf ("      one %dx%d buffer to another, traversing from top to bottom\n",
681            WIDTH, HEIGHT);
682    printf ("      and from left to right\n");
683    printf ("R   - random rectangles with %dx%d average size are copied from\n",
684            TILEWIDTH, TILEWIDTH);
685    printf ("      random locations of one %dx%d buffer to another\n",
686            WIDTH, HEIGHT);
687    printf ("RT  - as R, but %dx%d average sized rectangles are copied\n",
688            TINYWIDTH, TINYWIDTH);
689    printf ("---\n");
690    bandwidth = x = bench_memcpy ();
691    printf ("reference memcpy speed = %.1fMB/s (%.1fMP/s for 32bpp fills)\n",
692            x / 1000000., x / 4000000);
693    printf ("---\n");
694
695    for (i = 0; i < sizeof(tests_tbl) / sizeof(tests_tbl[0]); i++)
696    {
697	if (strcmp (pattern, "all") == 0 || strstr (tests_tbl[i].testname, pattern))
698	{
699	    bench_composite (tests_tbl[i].testname,
700			     tests_tbl[i].src_fmt,
701			     tests_tbl[i].src_flags,
702			     tests_tbl[i].op,
703			     tests_tbl[i].mask_fmt,
704			     tests_tbl[i].mask_flags,
705			     tests_tbl[i].dst_fmt,
706			     bandwidth/8);
707	}
708    }
709
710    free (src);
711    return 0;
712}
713