lowlevel-blt-bench.c revision 6ba797d6
1/*
2 * Copyright © 2009 Nokia Corporation
3 * Copyright © 2010 Movial Creative Technologies Oy
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <stdint.h>
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29
30#ifdef HAVE_CONFIG_H
31#include <config.h>
32#endif
33
34#include "pixman-private.h"
35#include "utils.h"
36
37#define SOLID_FLAG 1
38#define CA_FLAG    2
39
40#define L1CACHE_SIZE (8 * 1024)
41#define L2CACHE_SIZE (128 * 1024)
42
43#define WIDTH  1920
44#define HEIGHT 1080
45#define BUFSIZE (WIDTH * HEIGHT * 4)
46#define XWIDTH 256
47#define XHEIGHT 256
48#define TILEWIDTH 32
49#define TINYWIDTH 8
50
51#define EXCLUDE_OVERHEAD 1
52
53uint32_t *dst;
54uint32_t *src;
55uint32_t *mask;
56
57double bandwidth = 0;
58
59double
60bench_memcpy ()
61{
62    int64_t n = 0, total;
63    double  t1, t2;
64    int     x = 0;
65
66    t1 = gettime ();
67    while (1)
68    {
69	memcpy (dst, src, BUFSIZE - 64);
70	memcpy (src, dst, BUFSIZE - 64);
71	n += 4 * (BUFSIZE - 64);
72	t2 = gettime ();
73	if (t2 - t1 > 0.5)
74	    break;
75    }
76    n = total = n * 5;
77    t1 = gettime ();
78    while (n > 0)
79    {
80	if (++x >= 64)
81	    x = 0;
82	memcpy ((char *)dst + 1, (char *)src + x, BUFSIZE - 64);
83	memcpy ((char *)src + 1, (char *)dst + x, BUFSIZE - 64);
84	n -= 4 * (BUFSIZE - 64);
85    }
86    t2 = gettime ();
87    return (double)total / (t2 - t1);
88}
89
90static void
91pixman_image_composite_wrapper (pixman_implementation_t *impl,
92				pixman_composite_info_t *info)
93{
94    pixman_image_composite (info->op,
95			    info->src_image, info->mask_image, info->dest_image,
96			    info->src_x, info->src_y,
97			    info->mask_x, info->mask_y,
98			    info->dest_x, info->dest_y,
99			    info->width, info->height);
100}
101
102static void
103pixman_image_composite_empty (pixman_implementation_t *impl,
104			      pixman_composite_info_t *info)
105{
106    pixman_image_composite (info->op,
107			    info->src_image, info->mask_image, info->dest_image,
108			    0, 0, 0, 0, 0, 0, 1, 1);
109}
110
111static inline void
112call_func (pixman_composite_func_t func,
113	   pixman_op_t             op,
114	   pixman_image_t *        src_image,
115	   pixman_image_t *        mask_image,
116	   pixman_image_t *        dest_image,
117	   int32_t		   src_x,
118	   int32_t		   src_y,
119	   int32_t                 mask_x,
120	   int32_t                 mask_y,
121	   int32_t                 dest_x,
122	   int32_t                 dest_y,
123	   int32_t                 width,
124	   int32_t                 height)
125{
126    pixman_composite_info_t info;
127
128    info.op = op;
129    info.src_image = src_image;
130    info.mask_image = mask_image;
131    info.dest_image = dest_image;
132    info.src_x = src_x;
133    info.src_y = src_y;
134    info.mask_x = mask_x;
135    info.mask_y = mask_y;
136    info.dest_x = dest_x;
137    info.dest_y = dest_y;
138    info.width = width;
139    info.height = height;
140
141    func (0, &info);
142}
143
144void
145noinline
146bench_L  (pixman_op_t              op,
147          pixman_image_t *         src_img,
148          pixman_image_t *         mask_img,
149          pixman_image_t *         dst_img,
150          int64_t                  n,
151          pixman_composite_func_t  func,
152          int                      width,
153          int                      lines_count)
154{
155    int64_t      i, j;
156    int          x = 0;
157    int          q = 0;
158    volatile int qx;
159
160    for (i = 0; i < n; i++)
161    {
162	/* touch destination buffer to fetch it into L1 cache */
163	for (j = 0; j < width + 64; j += 16) {
164	    q += dst[j];
165	    q += src[j];
166	}
167	if (++x >= 64)
168	    x = 0;
169	call_func (func, op, src_img, mask_img, dst_img, x, 0, x, 0, 63 - x, 0, width, lines_count);
170    }
171    qx = q;
172}
173
174void
175noinline
176bench_M (pixman_op_t              op,
177         pixman_image_t *         src_img,
178         pixman_image_t *         mask_img,
179         pixman_image_t *         dst_img,
180         int64_t                  n,
181         pixman_composite_func_t  func)
182{
183    int64_t i;
184    int     x = 0;
185
186    for (i = 0; i < n; i++)
187    {
188	if (++x >= 64)
189	    x = 0;
190	call_func (func, op, src_img, mask_img, dst_img, x, 0, x, 0, 1, 0, WIDTH - 64, HEIGHT);
191    }
192}
193
194double
195noinline
196bench_HT (pixman_op_t              op,
197          pixman_image_t *         src_img,
198          pixman_image_t *         mask_img,
199          pixman_image_t *         dst_img,
200          int64_t                  n,
201          pixman_composite_func_t  func)
202{
203    double  pix_cnt = 0;
204    int     x = 0;
205    int     y = 0;
206    int64_t i;
207
208    srand (0);
209    for (i = 0; i < n; i++)
210    {
211	int w = (rand () % (TILEWIDTH * 2)) + 1;
212	int h = (rand () % (TILEWIDTH * 2)) + 1;
213	if (x + w > WIDTH)
214	{
215	    x = 0;
216	    y += TILEWIDTH * 2;
217	}
218	if (y + h > HEIGHT)
219	{
220	    y = 0;
221	}
222	call_func (func, op, src_img, mask_img, dst_img, x, y, x, y, x, y, w, h);
223	x += w;
224	pix_cnt += w * h;
225    }
226    return pix_cnt;
227}
228
229double
230noinline
231bench_VT (pixman_op_t              op,
232          pixman_image_t *         src_img,
233          pixman_image_t *         mask_img,
234          pixman_image_t *         dst_img,
235          int64_t                  n,
236          pixman_composite_func_t  func)
237{
238    double  pix_cnt = 0;
239    int     x = 0;
240    int     y = 0;
241    int64_t i;
242
243    srand (0);
244    for (i = 0; i < n; i++)
245    {
246	int w = (rand () % (TILEWIDTH * 2)) + 1;
247	int h = (rand () % (TILEWIDTH * 2)) + 1;
248	if (y + h > HEIGHT)
249	{
250	    y = 0;
251	    x += TILEWIDTH * 2;
252	}
253	if (x + w > WIDTH)
254	{
255	    x = 0;
256	}
257	call_func (func, op, src_img, mask_img, dst_img, x, y, x, y, x, y, w, h);
258	y += h;
259	pix_cnt += w * h;
260    }
261    return pix_cnt;
262}
263
264double
265noinline
266bench_R (pixman_op_t              op,
267         pixman_image_t *         src_img,
268         pixman_image_t *         mask_img,
269         pixman_image_t *         dst_img,
270         int64_t                  n,
271         pixman_composite_func_t  func,
272         int                      maxw,
273         int                      maxh)
274{
275    double  pix_cnt = 0;
276    int64_t i;
277
278    if (maxw <= TILEWIDTH * 2 || maxh <= TILEWIDTH * 2)
279    {
280	printf("error: maxw <= TILEWIDTH * 2 || maxh <= TILEWIDTH * 2\n");
281        return 0;
282    }
283
284    srand (0);
285    for (i = 0; i < n; i++)
286    {
287	int w = (rand () % (TILEWIDTH * 2)) + 1;
288	int h = (rand () % (TILEWIDTH * 2)) + 1;
289	int sx = rand () % (maxw - TILEWIDTH * 2);
290	int sy = rand () % (maxh - TILEWIDTH * 2);
291	int dx = rand () % (maxw - TILEWIDTH * 2);
292	int dy = rand () % (maxh - TILEWIDTH * 2);
293	call_func (func, op, src_img, mask_img, dst_img, sx, sy, sx, sy, dx, dy, w, h);
294	pix_cnt += w * h;
295    }
296    return pix_cnt;
297}
298
299double
300noinline
301bench_RT (pixman_op_t              op,
302          pixman_image_t *         src_img,
303          pixman_image_t *         mask_img,
304          pixman_image_t *         dst_img,
305          int64_t                  n,
306          pixman_composite_func_t  func,
307          int                      maxw,
308          int                      maxh)
309{
310    double  pix_cnt = 0;
311    int64_t i;
312
313    if (maxw <= TINYWIDTH * 2 || maxh <= TINYWIDTH * 2)
314    {
315	printf("error: maxw <= TINYWIDTH * 2 || maxh <= TINYWIDTH * 2\n");
316        return 0;
317    }
318
319    srand (0);
320    for (i = 0; i < n; i++)
321    {
322	int w = (rand () % (TINYWIDTH * 2)) + 1;
323	int h = (rand () % (TINYWIDTH * 2)) + 1;
324	int sx = rand () % (maxw - TINYWIDTH * 2);
325	int sy = rand () % (maxh - TINYWIDTH * 2);
326	int dx = rand () % (maxw - TINYWIDTH * 2);
327	int dy = rand () % (maxh - TINYWIDTH * 2);
328	call_func (func, op, src_img, mask_img, dst_img, sx, sy, sx, sy, dx, dy, w, h);
329	pix_cnt += w * h;
330    }
331    return pix_cnt;
332}
333
334void
335bench_composite (char * testname,
336                 int    src_fmt,
337                 int    src_flags,
338                 int    op,
339                 int    mask_fmt,
340                 int    mask_flags,
341                 int    dst_fmt,
342                 double npix)
343{
344    pixman_image_t *                src_img;
345    pixman_image_t *                dst_img;
346    pixman_image_t *                mask_img;
347    pixman_image_t *                xsrc_img;
348    pixman_image_t *                xdst_img;
349    pixman_image_t *                xmask_img;
350    double                          t1, t2, t3, pix_cnt;
351    int64_t                         n, l1test_width, nlines;
352    double                             bytes_per_pix = 0;
353
354    pixman_composite_func_t func = pixman_image_composite_wrapper;
355
356    if (!(src_flags & SOLID_FLAG))
357    {
358        bytes_per_pix += (src_fmt >> 24) / 8.0;
359        src_img = pixman_image_create_bits (src_fmt,
360                                            WIDTH, HEIGHT,
361                                            src,
362                                            WIDTH * 4);
363        xsrc_img = pixman_image_create_bits (src_fmt,
364                                             XWIDTH, XHEIGHT,
365                                             src,
366                                             XWIDTH * 4);
367    }
368    else
369    {
370        src_img = pixman_image_create_bits (src_fmt,
371                                            1, 1,
372                                            src,
373                                            4);
374        xsrc_img = pixman_image_create_bits (src_fmt,
375                                             1, 1,
376                                             src,
377                                             4);
378        pixman_image_set_repeat (src_img, PIXMAN_REPEAT_NORMAL);
379        pixman_image_set_repeat (xsrc_img, PIXMAN_REPEAT_NORMAL);
380    }
381
382    bytes_per_pix += (dst_fmt >> 24) / 8.0;
383    dst_img = pixman_image_create_bits (dst_fmt,
384                                        WIDTH, HEIGHT,
385                                        dst,
386                                        WIDTH * 4);
387
388    mask_img = NULL;
389    xmask_img = NULL;
390    if (!(mask_flags & SOLID_FLAG) && mask_fmt != PIXMAN_null)
391    {
392        bytes_per_pix += (mask_fmt >> 24) / ((op == PIXMAN_OP_SRC) ? 8.0 : 4.0);
393        mask_img = pixman_image_create_bits (mask_fmt,
394                                             WIDTH, HEIGHT,
395                                             mask,
396                                             WIDTH * 4);
397        xmask_img = pixman_image_create_bits (mask_fmt,
398                                             XWIDTH, XHEIGHT,
399                                             mask,
400                                             XWIDTH * 4);
401    }
402    else if (mask_fmt != PIXMAN_null)
403    {
404        mask_img = pixman_image_create_bits (mask_fmt,
405                                             1, 1,
406                                             mask,
407                                             4);
408        xmask_img = pixman_image_create_bits (mask_fmt,
409                                             1, 1,
410                                             mask,
411                                             4 * 4);
412       pixman_image_set_repeat (mask_img, PIXMAN_REPEAT_NORMAL);
413       pixman_image_set_repeat (xmask_img, PIXMAN_REPEAT_NORMAL);
414    }
415    if ((mask_flags & CA_FLAG) && mask_fmt != PIXMAN_null)
416    {
417       pixman_image_set_component_alpha (mask_img, 1);
418    }
419    xdst_img = pixman_image_create_bits (dst_fmt,
420                                         XWIDTH, XHEIGHT,
421                                         dst,
422                                         XWIDTH * 4);
423
424
425    printf ("%24s %c", testname, func != pixman_image_composite_wrapper ?
426            '-' : '=');
427
428    memcpy (src, dst, BUFSIZE);
429    memcpy (dst, src, BUFSIZE);
430
431    l1test_width = L1CACHE_SIZE / 8 - 64;
432    if (l1test_width < 1)
433	l1test_width = 1;
434    if (l1test_width > WIDTH - 64)
435	l1test_width = WIDTH - 64;
436    n = 1 + npix / (l1test_width * 8);
437    t1 = gettime ();
438#if EXCLUDE_OVERHEAD
439    bench_L (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, l1test_width, 1);
440#endif
441    t2 = gettime ();
442    bench_L (op, src_img, mask_img, dst_img, n, func, l1test_width, 1);
443    t3 = gettime ();
444    printf ("  L1:%7.2f", (double)n * l1test_width * 1 /
445            ((t3 - t2) - (t2 - t1)) / 1000000.);
446    fflush (stdout);
447
448    memcpy (src, dst, BUFSIZE);
449    memcpy (dst, src, BUFSIZE);
450
451    nlines = (L2CACHE_SIZE / l1test_width) /
452	((PIXMAN_FORMAT_BPP(src_fmt) + PIXMAN_FORMAT_BPP(dst_fmt)) / 8);
453    if (nlines < 1)
454	nlines = 1;
455    n = 1 + npix / (l1test_width * nlines);
456    t1 = gettime ();
457#if EXCLUDE_OVERHEAD
458    bench_L (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, l1test_width, nlines);
459#endif
460    t2 = gettime ();
461    bench_L (op, src_img, mask_img, dst_img, n, func, l1test_width, nlines);
462    t3 = gettime ();
463    printf ("  L2:%7.2f", (double)n * l1test_width * nlines /
464            ((t3 - t2) - (t2 - t1)) / 1000000.);
465    fflush (stdout);
466
467    memcpy (src, dst, BUFSIZE);
468    memcpy (dst, src, BUFSIZE);
469
470    n = 1 + npix / (WIDTH * HEIGHT);
471    t1 = gettime ();
472#if EXCLUDE_OVERHEAD
473    bench_M (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty);
474#endif
475    t2 = gettime ();
476    bench_M (op, src_img, mask_img, dst_img, n, func);
477    t3 = gettime ();
478    printf ("  M:%6.2f (%6.2f%%)",
479        ((double)n * (WIDTH - 64) * HEIGHT / ((t3 - t2) - (t2 - t1))) / 1000000.,
480        ((double)n * (WIDTH - 64) * HEIGHT / ((t3 - t2) - (t2 - t1)) * bytes_per_pix) * (100.0 / bandwidth) );
481    fflush (stdout);
482
483    memcpy (src, dst, BUFSIZE);
484    memcpy (dst, src, BUFSIZE);
485
486    n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
487    t1 = gettime ();
488#if EXCLUDE_OVERHEAD
489    pix_cnt = bench_HT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty);
490#endif
491    t2 = gettime ();
492    pix_cnt = bench_HT (op, src_img, mask_img, dst_img, n, func);
493    t3 = gettime ();
494    printf ("  HT:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.);
495    fflush (stdout);
496
497    memcpy (src, dst, BUFSIZE);
498    memcpy (dst, src, BUFSIZE);
499
500    n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
501    t1 = gettime ();
502#if EXCLUDE_OVERHEAD
503    pix_cnt = bench_VT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty);
504#endif
505    t2 = gettime ();
506    pix_cnt = bench_VT (op, src_img, mask_img, dst_img, n, func);
507    t3 = gettime ();
508    printf ("  VT:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.);
509    fflush (stdout);
510
511    memcpy (src, dst, BUFSIZE);
512    memcpy (dst, src, BUFSIZE);
513
514    n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
515    t1 = gettime ();
516#if EXCLUDE_OVERHEAD
517    pix_cnt = bench_R (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, WIDTH, HEIGHT);
518#endif
519    t2 = gettime ();
520    pix_cnt = bench_R (op, src_img, mask_img, dst_img, n, func, WIDTH, HEIGHT);
521    t3 = gettime ();
522    printf ("  R:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.);
523    fflush (stdout);
524
525    memcpy (src, dst, BUFSIZE);
526    memcpy (dst, src, BUFSIZE);
527
528    n = 1 + npix / (16 * TINYWIDTH * TINYWIDTH);
529    t1 = gettime ();
530#if EXCLUDE_OVERHEAD
531    pix_cnt = bench_RT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, WIDTH, HEIGHT);
532#endif
533    t2 = gettime ();
534    pix_cnt = bench_RT (op, src_img, mask_img, dst_img, n, func, WIDTH, HEIGHT);
535    t3 = gettime ();
536    printf ("  RT:%6.2f (%4.0fKops/s)\n", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000., (double) n / ((t3 - t2) * 1000));
537
538    if (mask_img) {
539	pixman_image_unref (mask_img);
540	pixman_image_unref (xmask_img);
541    }
542    pixman_image_unref (src_img);
543    pixman_image_unref (dst_img);
544    pixman_image_unref (xsrc_img);
545    pixman_image_unref (xdst_img);
546}
547
548#define PIXMAN_OP_OUT_REV (PIXMAN_OP_OUT_REVERSE)
549
550struct
551{
552    char *testname;
553    int   src_fmt;
554    int   src_flags;
555    int   op;
556    int   mask_fmt;
557    int   mask_flags;
558    int   dst_fmt;
559}
560tests_tbl[] =
561{
562    { "add_8_8_8",             PIXMAN_a8,          0, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a8 },
563    { "add_n_8_8",             PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a8 },
564    { "add_n_8_8888",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
565    { "add_n_8_x888",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
566    { "add_n_8_0565",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
567    { "add_n_8_1555",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },
568    { "add_n_8_4444",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a4r4g4b4 },
569    { "add_n_8_2222",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a2r2g2b2 },
570    { "add_n_8_2x10",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_x2r10g10b10 },
571    { "add_n_8_2a10",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a2r10g10b10 },
572    { "add_n_8",               PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a8 },
573    { "add_n_8888",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
574    { "add_n_x888",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
575    { "add_n_0565",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
576    { "add_n_1555",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
577    { "add_n_4444",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a4r4g4b4 },
578    { "add_n_2222",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a2r2g2b2 },
579    { "add_n_2x10",            PIXMAN_a2r10g10b10, 1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_x2r10g10b10 },
580    { "add_n_2a10",            PIXMAN_a2r10g10b10, 1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a2r10g10b10 },
581    { "add_8_8",               PIXMAN_a8,          0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a8 },
582    { "add_x888_x888",         PIXMAN_x8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
583    { "add_8888_8888",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
584    { "add_8888_0565",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
585    { "add_8888_1555",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
586    { "add_8888_4444",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a4r4g4b4 },
587    { "add_8888_2222",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a2r2g2b2 },
588    { "add_0565_0565",         PIXMAN_r5g6b5,      0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
589    { "add_1555_1555",         PIXMAN_a1r5g5b5,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
590    { "add_0565_2x10",         PIXMAN_r5g6b5,      0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_x2r10g10b10 },
591    { "add_2a10_2a10",         PIXMAN_a2r10g10b10, 0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a2r10g10b10 },
592    { "src_n_2222",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a2r2g2b2 },
593    { "src_n_0565",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
594    { "src_n_1555",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
595    { "src_n_4444",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a4r4g4b4 },
596    { "src_n_x888",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
597    { "src_n_8888",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
598    { "src_n_2x10",            PIXMAN_a2r10g10b10, 1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x2r10g10b10 },
599    { "src_n_2a10",            PIXMAN_a2r10g10b10, 1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a2r10g10b10 },
600    { "src_8888_0565",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
601    { "src_8888_4444",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a4r4g4b4 },
602    { "src_8888_2222",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a2r2g2b2 },
603    { "src_8888_2x10",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x2r10g10b10 },
604    { "src_8888_2a10",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a2r10g10b10 },
605    { "src_0888_0565",         PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
606    { "src_0888_8888",         PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
607    { "src_0888_x888",         PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
608    { "src_x888_x888",         PIXMAN_x8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
609    { "src_x888_8888",         PIXMAN_x8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
610    { "src_8888_8888",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
611    { "src_0565_0565",         PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
612    { "src_1555_0565",         PIXMAN_a1r5g5b5,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
613    { "src_0565_1555",         PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
614    { "src_n_8_0565",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
615    { "src_n_8_1555",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },
616    { "src_n_8_4444",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a4r4g4b4 },
617    { "src_n_8_2222",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a2r2g2b2 },
618    { "src_n_8_x888",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
619    { "src_n_8_8888",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
620    { "src_n_8_2x10",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_x2r10g10b10 },
621    { "src_n_8_2a10",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a2r10g10b10 },
622    { "src_8888_8_0565",       PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
623    { "src_0888_8_0565",       PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
624    { "src_0888_8_8888",       PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
625    { "src_0888_8_x888",       PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
626    { "src_x888_8_x888",       PIXMAN_x8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
627    { "src_x888_8_8888",       PIXMAN_x8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
628    { "src_0565_8_0565",       PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
629    { "src_1555_8_0565",       PIXMAN_a1r5g5b5,    0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
630    { "src_0565_8_1555",       PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },
631    { "over_n_x888",           PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
632    { "over_n_8888",           PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
633    { "over_n_0565",           PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_r5g6b5 },
634    { "over_n_1555",           PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
635    { "over_8888_0565",        PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_r5g6b5 },
636    { "over_8888_x888",        PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
637    { "over_x888_8_0565",      PIXMAN_x8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_r5g6b5 },
638    { "over_n_8_0565",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_r5g6b5 },
639    { "over_n_8_1555",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },
640    { "over_n_8_4444",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a4r4g4b4 },
641    { "over_n_8_2222",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a2r2g2b2 },
642    { "over_n_8_x888",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
643    { "over_n_8_8888",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
644    { "over_n_8_2x10",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_x2r10g10b10 },
645    { "over_n_8_2a10",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a2r10g10b10 },
646    { "over_n_8888_8888_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_a8r8g8b8 },
647    { "over_n_8888_x888_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_x8r8g8b8 },
648    { "over_n_8888_0565_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_r5g6b5 },
649    { "over_n_8888_1555_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_a1r5g5b5 },
650    { "over_n_8888_4444_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_a4r4g4b4 },
651    { "over_n_8888_2222_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_a2r2g2b2 },
652    { "over_n_8888_2x10_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_x2r10g10b10 },
653    { "over_n_8888_2a10_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_a2r10g10b10 },
654    { "over_8888_n_8888",      PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       1, PIXMAN_a8r8g8b8 },
655    { "over_8888_n_x888",      PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       1, PIXMAN_x8r8g8b8 },
656    { "over_8888_n_0565",      PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       1, PIXMAN_r5g6b5 },
657    { "over_8888_n_1555",      PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       1, PIXMAN_a1r5g5b5 },
658    { "outrev_n_8_0565",       PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8,       0, PIXMAN_r5g6b5 },
659    { "outrev_n_8_1555",       PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },
660    { "outrev_n_8_x888",       PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
661    { "outrev_n_8_8888",       PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
662    { "outrev_n_8888_0565_ca", PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_r5g6b5 },
663    { "outrev_n_8888_1555_ca", PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_a1r5g5b5 },
664    { "outrev_n_8888_x888_ca", PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_x8r8g8b8 },
665    { "outrev_n_8888_8888_ca", PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_a8r8g8b8 },
666};
667
668int
669main (int argc, char *argv[])
670{
671    double x;
672    int i;
673    char *pattern = argc > 1 ? argv[1] : "all";
674
675    src = aligned_malloc (4096, BUFSIZE * 3);
676    memset (src, 0xCC, BUFSIZE * 3);
677    dst = src + (BUFSIZE / 4);
678    mask = dst + (BUFSIZE / 4);
679
680    printf ("Benchmark for a set of most commonly used functions\n");
681    printf ("---\n");
682    printf ("All results are presented in millions of pixels per second\n");
683    printf ("L1  - small Xx1 rectangle (fitting L1 cache), always blitted at the same\n");
684    printf ("      memory location with small drift in horizontal direction\n");
685    printf ("L2  - small XxY rectangle (fitting L2 cache), always blitted at the same\n");
686    printf ("      memory location with small drift in horizontal direction\n");
687    printf ("M   - large %dx%d rectangle, always blitted at the same\n",
688            WIDTH - 64, HEIGHT);
689    printf ("      memory location with small drift in horizontal direction\n");
690    printf ("HT  - random rectangles with %dx%d average size are copied from\n",
691            TILEWIDTH, TILEWIDTH);
692    printf ("      one %dx%d buffer to another, traversing from left to right\n",
693            WIDTH, HEIGHT);
694    printf ("      and from top to bottom\n");
695    printf ("VT  - random rectangles with %dx%d average size are copied from\n",
696            TILEWIDTH, TILEWIDTH);
697    printf ("      one %dx%d buffer to another, traversing from top to bottom\n",
698            WIDTH, HEIGHT);
699    printf ("      and from left to right\n");
700    printf ("R   - random rectangles with %dx%d average size are copied from\n",
701            TILEWIDTH, TILEWIDTH);
702    printf ("      random locations of one %dx%d buffer to another\n",
703            WIDTH, HEIGHT);
704    printf ("RT  - as R, but %dx%d average sized rectangles are copied\n",
705            TINYWIDTH, TINYWIDTH);
706    printf ("---\n");
707    bandwidth = x = bench_memcpy ();
708    printf ("reference memcpy speed = %.1fMB/s (%.1fMP/s for 32bpp fills)\n",
709            x / 1000000., x / 4000000);
710    printf ("---\n");
711
712    for (i = 0; i < sizeof(tests_tbl) / sizeof(tests_tbl[0]); i++)
713    {
714	if (strcmp (pattern, "all") == 0 || strstr (tests_tbl[i].testname, pattern))
715	{
716	    bench_composite (tests_tbl[i].testname,
717			     tests_tbl[i].src_fmt,
718			     tests_tbl[i].src_flags,
719			     tests_tbl[i].op,
720			     tests_tbl[i].mask_fmt,
721			     tests_tbl[i].mask_flags,
722			     tests_tbl[i].dst_fmt,
723			     bandwidth/8);
724	}
725    }
726
727    free (src);
728    return 0;
729}
730