lowlevel-blt-bench.c revision 9ad247e8
1/*
2 * Copyright © 2009 Nokia Corporation
3 * Copyright © 2010 Movial Creative Technologies Oy
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include "utils.h"
29
30#define SOLID_FLAG 1
31#define CA_FLAG    2
32
33#define L1CACHE_SIZE (8 * 1024)
34#define L2CACHE_SIZE (128 * 1024)
35
36#define WIDTH  1920
37#define HEIGHT 1080
38#define BUFSIZE (WIDTH * HEIGHT * 4)
39#define XWIDTH 256
40#define XHEIGHT 256
41#define TILEWIDTH 32
42#define TINYWIDTH 8
43
44#define EXCLUDE_OVERHEAD 1
45
46uint32_t *dst;
47uint32_t *src;
48uint32_t *mask;
49
50double bandwidth = 0;
51
52double
53bench_memcpy ()
54{
55    int64_t n = 0, total;
56    double  t1, t2;
57    int     x = 0;
58
59    t1 = gettime ();
60    while (1)
61    {
62	memcpy (dst, src, BUFSIZE - 64);
63	memcpy (src, dst, BUFSIZE - 64);
64	n += 4 * (BUFSIZE - 64);
65	t2 = gettime ();
66	if (t2 - t1 > 0.5)
67	    break;
68    }
69    n = total = n * 5;
70    t1 = gettime ();
71    while (n > 0)
72    {
73	if (++x >= 64)
74	    x = 0;
75	memcpy ((char *)dst + 1, (char *)src + x, BUFSIZE - 64);
76	memcpy ((char *)src + 1, (char *)dst + x, BUFSIZE - 64);
77	n -= 4 * (BUFSIZE - 64);
78    }
79    t2 = gettime ();
80    return (double)total / (t2 - t1);
81}
82
83static pixman_bool_t use_scaling = FALSE;
84static pixman_filter_t filter = PIXMAN_FILTER_NEAREST;
85
86/* nearly 1x scale factor */
87static pixman_transform_t m =
88{
89    {
90        { pixman_fixed_1 + 1, 0,              0              },
91        { 0,                  pixman_fixed_1, 0              },
92        { 0,                  0,              pixman_fixed_1 }
93    }
94};
95
96static void
97pixman_image_composite_wrapper (pixman_implementation_t *impl,
98				pixman_composite_info_t *info)
99{
100    if (use_scaling)
101    {
102        pixman_image_set_filter (info->src_image, filter, NULL, 0);
103        pixman_image_set_transform(info->src_image, &m);
104    }
105    pixman_image_composite (info->op,
106			    info->src_image, info->mask_image, info->dest_image,
107			    info->src_x, info->src_y,
108			    info->mask_x, info->mask_y,
109			    info->dest_x, info->dest_y,
110			    info->width, info->height);
111}
112
113static void
114pixman_image_composite_empty (pixman_implementation_t *impl,
115			      pixman_composite_info_t *info)
116{
117    if (use_scaling)
118    {
119        pixman_image_set_filter (info->src_image, filter, NULL, 0);
120        pixman_image_set_transform(info->src_image, &m);
121    }
122    pixman_image_composite (info->op,
123			    info->src_image, info->mask_image, info->dest_image,
124			    0, 0, 0, 0, 0, 0, 1, 1);
125}
126
127static inline void
128call_func (pixman_composite_func_t func,
129	   pixman_op_t             op,
130	   pixman_image_t *        src_image,
131	   pixman_image_t *        mask_image,
132	   pixman_image_t *        dest_image,
133	   int32_t		   src_x,
134	   int32_t		   src_y,
135	   int32_t                 mask_x,
136	   int32_t                 mask_y,
137	   int32_t                 dest_x,
138	   int32_t                 dest_y,
139	   int32_t                 width,
140	   int32_t                 height)
141{
142    pixman_composite_info_t info;
143
144    info.op = op;
145    info.src_image = src_image;
146    info.mask_image = mask_image;
147    info.dest_image = dest_image;
148    info.src_x = src_x;
149    info.src_y = src_y;
150    info.mask_x = mask_x;
151    info.mask_y = mask_y;
152    info.dest_x = dest_x;
153    info.dest_y = dest_y;
154    info.width = width;
155    info.height = height;
156
157    func (0, &info);
158}
159
160void
161noinline
162bench_L  (pixman_op_t              op,
163          pixman_image_t *         src_img,
164          pixman_image_t *         mask_img,
165          pixman_image_t *         dst_img,
166          int64_t                  n,
167          pixman_composite_func_t  func,
168          int                      width,
169          int                      lines_count)
170{
171    int64_t      i, j;
172    int          x = 0;
173    int          q = 0;
174    volatile int qx;
175
176    for (i = 0; i < n; i++)
177    {
178	/* touch destination buffer to fetch it into L1 cache */
179	for (j = 0; j < width + 64; j += 16) {
180	    q += dst[j];
181	    q += src[j];
182	}
183	if (++x >= 64)
184	    x = 0;
185	call_func (func, op, src_img, mask_img, dst_img, x, 0, x, 0, 63 - x, 0, width, lines_count);
186    }
187    qx = q;
188}
189
190void
191noinline
192bench_M (pixman_op_t              op,
193         pixman_image_t *         src_img,
194         pixman_image_t *         mask_img,
195         pixman_image_t *         dst_img,
196         int64_t                  n,
197         pixman_composite_func_t  func)
198{
199    int64_t i;
200    int     x = 0;
201
202    for (i = 0; i < n; i++)
203    {
204	if (++x >= 64)
205	    x = 0;
206	call_func (func, op, src_img, mask_img, dst_img, x, 0, x, 0, 1, 0, WIDTH - 64, HEIGHT);
207    }
208}
209
210double
211noinline
212bench_HT (pixman_op_t              op,
213          pixman_image_t *         src_img,
214          pixman_image_t *         mask_img,
215          pixman_image_t *         dst_img,
216          int64_t                  n,
217          pixman_composite_func_t  func)
218{
219    double  pix_cnt = 0;
220    int     x = 0;
221    int     y = 0;
222    int64_t i;
223
224    srand (0);
225    for (i = 0; i < n; i++)
226    {
227	int w = (rand () % (TILEWIDTH * 2)) + 1;
228	int h = (rand () % (TILEWIDTH * 2)) + 1;
229	if (x + w > WIDTH)
230	{
231	    x = 0;
232	    y += TILEWIDTH * 2;
233	}
234	if (y + h > HEIGHT)
235	{
236	    y = 0;
237	}
238	call_func (func, op, src_img, mask_img, dst_img, x, y, x, y, x, y, w, h);
239	x += w;
240	pix_cnt += w * h;
241    }
242    return pix_cnt;
243}
244
245double
246noinline
247bench_VT (pixman_op_t              op,
248          pixman_image_t *         src_img,
249          pixman_image_t *         mask_img,
250          pixman_image_t *         dst_img,
251          int64_t                  n,
252          pixman_composite_func_t  func)
253{
254    double  pix_cnt = 0;
255    int     x = 0;
256    int     y = 0;
257    int64_t i;
258
259    srand (0);
260    for (i = 0; i < n; i++)
261    {
262	int w = (rand () % (TILEWIDTH * 2)) + 1;
263	int h = (rand () % (TILEWIDTH * 2)) + 1;
264	if (y + h > HEIGHT)
265	{
266	    y = 0;
267	    x += TILEWIDTH * 2;
268	}
269	if (x + w > WIDTH)
270	{
271	    x = 0;
272	}
273	call_func (func, op, src_img, mask_img, dst_img, x, y, x, y, x, y, w, h);
274	y += h;
275	pix_cnt += w * h;
276    }
277    return pix_cnt;
278}
279
280double
281noinline
282bench_R (pixman_op_t              op,
283         pixman_image_t *         src_img,
284         pixman_image_t *         mask_img,
285         pixman_image_t *         dst_img,
286         int64_t                  n,
287         pixman_composite_func_t  func,
288         int                      maxw,
289         int                      maxh)
290{
291    double  pix_cnt = 0;
292    int64_t i;
293
294    if (maxw <= TILEWIDTH * 2 || maxh <= TILEWIDTH * 2)
295    {
296	printf("error: maxw <= TILEWIDTH * 2 || maxh <= TILEWIDTH * 2\n");
297        return 0;
298    }
299
300    srand (0);
301    for (i = 0; i < n; i++)
302    {
303	int w = (rand () % (TILEWIDTH * 2)) + 1;
304	int h = (rand () % (TILEWIDTH * 2)) + 1;
305	int sx = rand () % (maxw - TILEWIDTH * 2);
306	int sy = rand () % (maxh - TILEWIDTH * 2);
307	int dx = rand () % (maxw - TILEWIDTH * 2);
308	int dy = rand () % (maxh - TILEWIDTH * 2);
309	call_func (func, op, src_img, mask_img, dst_img, sx, sy, sx, sy, dx, dy, w, h);
310	pix_cnt += w * h;
311    }
312    return pix_cnt;
313}
314
315double
316noinline
317bench_RT (pixman_op_t              op,
318          pixman_image_t *         src_img,
319          pixman_image_t *         mask_img,
320          pixman_image_t *         dst_img,
321          int64_t                  n,
322          pixman_composite_func_t  func,
323          int                      maxw,
324          int                      maxh)
325{
326    double  pix_cnt = 0;
327    int64_t i;
328
329    if (maxw <= TINYWIDTH * 2 || maxh <= TINYWIDTH * 2)
330    {
331	printf("error: maxw <= TINYWIDTH * 2 || maxh <= TINYWIDTH * 2\n");
332        return 0;
333    }
334
335    srand (0);
336    for (i = 0; i < n; i++)
337    {
338	int w = (rand () % (TINYWIDTH * 2)) + 1;
339	int h = (rand () % (TINYWIDTH * 2)) + 1;
340	int sx = rand () % (maxw - TINYWIDTH * 2);
341	int sy = rand () % (maxh - TINYWIDTH * 2);
342	int dx = rand () % (maxw - TINYWIDTH * 2);
343	int dy = rand () % (maxh - TINYWIDTH * 2);
344	call_func (func, op, src_img, mask_img, dst_img, sx, sy, sx, sy, dx, dy, w, h);
345	pix_cnt += w * h;
346    }
347    return pix_cnt;
348}
349
350void
351bench_composite (char * testname,
352                 int    src_fmt,
353                 int    src_flags,
354                 int    op,
355                 int    mask_fmt,
356                 int    mask_flags,
357                 int    dst_fmt,
358                 double npix)
359{
360    pixman_image_t *                src_img;
361    pixman_image_t *                dst_img;
362    pixman_image_t *                mask_img;
363    pixman_image_t *                xsrc_img;
364    pixman_image_t *                xdst_img;
365    pixman_image_t *                xmask_img;
366    double                          t1, t2, t3, pix_cnt;
367    int64_t                         n, l1test_width, nlines;
368    double                             bytes_per_pix = 0;
369
370    pixman_composite_func_t func = pixman_image_composite_wrapper;
371
372    if (!(src_flags & SOLID_FLAG))
373    {
374        bytes_per_pix += (src_fmt >> 24) / 8.0;
375        src_img = pixman_image_create_bits (src_fmt,
376                                            WIDTH, HEIGHT,
377                                            src,
378                                            WIDTH * 4);
379        xsrc_img = pixman_image_create_bits (src_fmt,
380                                             XWIDTH, XHEIGHT,
381                                             src,
382                                             XWIDTH * 4);
383    }
384    else
385    {
386        src_img = pixman_image_create_bits (src_fmt,
387                                            1, 1,
388                                            src,
389                                            4);
390        xsrc_img = pixman_image_create_bits (src_fmt,
391                                             1, 1,
392                                             src,
393                                             4);
394        pixman_image_set_repeat (src_img, PIXMAN_REPEAT_NORMAL);
395        pixman_image_set_repeat (xsrc_img, PIXMAN_REPEAT_NORMAL);
396    }
397
398    bytes_per_pix += (dst_fmt >> 24) / 8.0;
399    dst_img = pixman_image_create_bits (dst_fmt,
400                                        WIDTH, HEIGHT,
401                                        dst,
402                                        WIDTH * 4);
403
404    mask_img = NULL;
405    xmask_img = NULL;
406    if (!(mask_flags & SOLID_FLAG) && mask_fmt != PIXMAN_null)
407    {
408        bytes_per_pix += (mask_fmt >> 24) / ((op == PIXMAN_OP_SRC) ? 8.0 : 4.0);
409        mask_img = pixman_image_create_bits (mask_fmt,
410                                             WIDTH, HEIGHT,
411                                             mask,
412                                             WIDTH * 4);
413        xmask_img = pixman_image_create_bits (mask_fmt,
414                                             XWIDTH, XHEIGHT,
415                                             mask,
416                                             XWIDTH * 4);
417    }
418    else if (mask_fmt != PIXMAN_null)
419    {
420        mask_img = pixman_image_create_bits (mask_fmt,
421                                             1, 1,
422                                             mask,
423                                             4);
424        xmask_img = pixman_image_create_bits (mask_fmt,
425                                             1, 1,
426                                             mask,
427                                             4 * 4);
428       pixman_image_set_repeat (mask_img, PIXMAN_REPEAT_NORMAL);
429       pixman_image_set_repeat (xmask_img, PIXMAN_REPEAT_NORMAL);
430    }
431    if ((mask_flags & CA_FLAG) && mask_fmt != PIXMAN_null)
432    {
433       pixman_image_set_component_alpha (mask_img, 1);
434    }
435    xdst_img = pixman_image_create_bits (dst_fmt,
436                                         XWIDTH, XHEIGHT,
437                                         dst,
438                                         XWIDTH * 4);
439
440
441    printf ("%24s %c", testname, func != pixman_image_composite_wrapper ?
442            '-' : '=');
443
444    memcpy (src, dst, BUFSIZE);
445    memcpy (dst, src, BUFSIZE);
446
447    l1test_width = L1CACHE_SIZE / 8 - 64;
448    if (l1test_width < 1)
449	l1test_width = 1;
450    if (l1test_width > WIDTH - 64)
451	l1test_width = WIDTH - 64;
452    n = 1 + npix / (l1test_width * 8);
453    t1 = gettime ();
454#if EXCLUDE_OVERHEAD
455    bench_L (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, l1test_width, 1);
456#endif
457    t2 = gettime ();
458    bench_L (op, src_img, mask_img, dst_img, n, func, l1test_width, 1);
459    t3 = gettime ();
460    printf ("  L1:%7.2f", (double)n * l1test_width * 1 /
461            ((t3 - t2) - (t2 - t1)) / 1000000.);
462    fflush (stdout);
463
464    memcpy (src, dst, BUFSIZE);
465    memcpy (dst, src, BUFSIZE);
466
467    nlines = (L2CACHE_SIZE / l1test_width) /
468	((PIXMAN_FORMAT_BPP(src_fmt) + PIXMAN_FORMAT_BPP(dst_fmt)) / 8);
469    if (nlines < 1)
470	nlines = 1;
471    n = 1 + npix / (l1test_width * nlines);
472    t1 = gettime ();
473#if EXCLUDE_OVERHEAD
474    bench_L (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, l1test_width, nlines);
475#endif
476    t2 = gettime ();
477    bench_L (op, src_img, mask_img, dst_img, n, func, l1test_width, nlines);
478    t3 = gettime ();
479    printf ("  L2:%7.2f", (double)n * l1test_width * nlines /
480            ((t3 - t2) - (t2 - t1)) / 1000000.);
481    fflush (stdout);
482
483    memcpy (src, dst, BUFSIZE);
484    memcpy (dst, src, BUFSIZE);
485
486    n = 1 + npix / (WIDTH * HEIGHT);
487    t1 = gettime ();
488#if EXCLUDE_OVERHEAD
489    bench_M (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty);
490#endif
491    t2 = gettime ();
492    bench_M (op, src_img, mask_img, dst_img, n, func);
493    t3 = gettime ();
494    printf ("  M:%6.2f (%6.2f%%)",
495        ((double)n * (WIDTH - 64) * HEIGHT / ((t3 - t2) - (t2 - t1))) / 1000000.,
496        ((double)n * (WIDTH - 64) * HEIGHT / ((t3 - t2) - (t2 - t1)) * bytes_per_pix) * (100.0 / bandwidth) );
497    fflush (stdout);
498
499    memcpy (src, dst, BUFSIZE);
500    memcpy (dst, src, BUFSIZE);
501
502    n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
503    t1 = gettime ();
504#if EXCLUDE_OVERHEAD
505    pix_cnt = bench_HT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty);
506#endif
507    t2 = gettime ();
508    pix_cnt = bench_HT (op, src_img, mask_img, dst_img, n, func);
509    t3 = gettime ();
510    printf ("  HT:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.);
511    fflush (stdout);
512
513    memcpy (src, dst, BUFSIZE);
514    memcpy (dst, src, BUFSIZE);
515
516    n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
517    t1 = gettime ();
518#if EXCLUDE_OVERHEAD
519    pix_cnt = bench_VT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty);
520#endif
521    t2 = gettime ();
522    pix_cnt = bench_VT (op, src_img, mask_img, dst_img, n, func);
523    t3 = gettime ();
524    printf ("  VT:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.);
525    fflush (stdout);
526
527    memcpy (src, dst, BUFSIZE);
528    memcpy (dst, src, BUFSIZE);
529
530    n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
531    t1 = gettime ();
532#if EXCLUDE_OVERHEAD
533    pix_cnt = bench_R (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, WIDTH, HEIGHT);
534#endif
535    t2 = gettime ();
536    pix_cnt = bench_R (op, src_img, mask_img, dst_img, n, func, WIDTH, HEIGHT);
537    t3 = gettime ();
538    printf ("  R:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.);
539    fflush (stdout);
540
541    memcpy (src, dst, BUFSIZE);
542    memcpy (dst, src, BUFSIZE);
543
544    n = 1 + npix / (16 * TINYWIDTH * TINYWIDTH);
545    t1 = gettime ();
546#if EXCLUDE_OVERHEAD
547    pix_cnt = bench_RT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, WIDTH, HEIGHT);
548#endif
549    t2 = gettime ();
550    pix_cnt = bench_RT (op, src_img, mask_img, dst_img, n, func, WIDTH, HEIGHT);
551    t3 = gettime ();
552    printf ("  RT:%6.2f (%4.0fKops/s)\n", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000., (double) n / ((t3 - t2) * 1000));
553
554    if (mask_img) {
555	pixman_image_unref (mask_img);
556	pixman_image_unref (xmask_img);
557    }
558    pixman_image_unref (src_img);
559    pixman_image_unref (dst_img);
560    pixman_image_unref (xsrc_img);
561    pixman_image_unref (xdst_img);
562}
563
564#define PIXMAN_OP_OUT_REV (PIXMAN_OP_OUT_REVERSE)
565
566struct
567{
568    char *testname;
569    int   src_fmt;
570    int   src_flags;
571    int   op;
572    int   mask_fmt;
573    int   mask_flags;
574    int   dst_fmt;
575}
576tests_tbl[] =
577{
578    { "add_8_8_8",             PIXMAN_a8,          0, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a8 },
579    { "add_n_8_8",             PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a8 },
580    { "add_n_8_8888",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
581    { "add_n_8_x888",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
582    { "add_n_8_0565",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
583    { "add_n_8_1555",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },
584    { "add_n_8_4444",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a4r4g4b4 },
585    { "add_n_8_2222",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a2r2g2b2 },
586    { "add_n_8_2x10",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_x2r10g10b10 },
587    { "add_n_8_2a10",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a2r10g10b10 },
588    { "add_n_8",               PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a8 },
589    { "add_n_8888",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
590    { "add_n_x888",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
591    { "add_n_0565",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
592    { "add_n_1555",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
593    { "add_n_4444",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a4r4g4b4 },
594    { "add_n_2222",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a2r2g2b2 },
595    { "add_n_2x10",            PIXMAN_a2r10g10b10, 1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_x2r10g10b10 },
596    { "add_n_2a10",            PIXMAN_a2r10g10b10, 1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a2r10g10b10 },
597    { "add_8_8",               PIXMAN_a8,          0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a8 },
598    { "add_x888_x888",         PIXMAN_x8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
599    { "add_8888_8888",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
600    { "add_8888_0565",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
601    { "add_8888_1555",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
602    { "add_8888_4444",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a4r4g4b4 },
603    { "add_8888_2222",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a2r2g2b2 },
604    { "add_0565_0565",         PIXMAN_r5g6b5,      0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
605    { "add_1555_1555",         PIXMAN_a1r5g5b5,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
606    { "add_0565_2x10",         PIXMAN_r5g6b5,      0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_x2r10g10b10 },
607    { "add_2a10_2a10",         PIXMAN_a2r10g10b10, 0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a2r10g10b10 },
608    { "in_n_8_8",              PIXMAN_a8r8g8b8,    1, PIXMAN_OP_IN,      PIXMAN_a8,       0, PIXMAN_a8 },
609    { "in_8_8",                PIXMAN_a8,          0, PIXMAN_OP_IN,      PIXMAN_null,     0, PIXMAN_a8 },
610    { "src_n_2222",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a2r2g2b2 },
611    { "src_n_0565",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
612    { "src_n_1555",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
613    { "src_n_4444",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a4r4g4b4 },
614    { "src_n_x888",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
615    { "src_n_8888",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
616    { "src_n_2x10",            PIXMAN_a2r10g10b10, 1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x2r10g10b10 },
617    { "src_n_2a10",            PIXMAN_a2r10g10b10, 1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a2r10g10b10 },
618    { "src_8888_0565",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
619    { "src_8888_4444",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a4r4g4b4 },
620    { "src_8888_2222",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a2r2g2b2 },
621    { "src_8888_2x10",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x2r10g10b10 },
622    { "src_8888_2a10",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a2r10g10b10 },
623    { "src_0888_0565",         PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
624    { "src_0888_8888",         PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
625    { "src_0888_x888",         PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
626    { "src_x888_x888",         PIXMAN_x8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
627    { "src_x888_8888",         PIXMAN_x8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
628    { "src_8888_8888",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
629    { "src_0565_0565",         PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
630    { "src_1555_0565",         PIXMAN_a1r5g5b5,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
631    { "src_0565_1555",         PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
632    { "src_n_8_0565",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
633    { "src_n_8_1555",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },
634    { "src_n_8_4444",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a4r4g4b4 },
635    { "src_n_8_2222",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a2r2g2b2 },
636    { "src_n_8_x888",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
637    { "src_n_8_8888",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
638    { "src_n_8_2x10",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_x2r10g10b10 },
639    { "src_n_8_2a10",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a2r10g10b10 },
640    { "src_8888_8_0565",       PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
641    { "src_0888_8_0565",       PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
642    { "src_0888_8_8888",       PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
643    { "src_0888_8_x888",       PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
644    { "src_x888_8_x888",       PIXMAN_x8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
645    { "src_x888_8_8888",       PIXMAN_x8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
646    { "src_0565_8_0565",       PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
647    { "src_1555_8_0565",       PIXMAN_a1r5g5b5,    0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
648    { "src_0565_8_1555",       PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },
649    { "over_n_x888",           PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
650    { "over_n_8888",           PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
651    { "over_n_0565",           PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_r5g6b5 },
652    { "over_n_1555",           PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
653    { "over_8888_0565",        PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_r5g6b5 },
654    { "over_8888_8888",        PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
655    { "over_8888_x888",        PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
656    { "over_x888_8_0565",      PIXMAN_x8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_r5g6b5 },
657    { "over_x888_8_8888",      PIXMAN_x8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
658    { "over_n_8_0565",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_r5g6b5 },
659    { "over_n_8_1555",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },
660    { "over_n_8_4444",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a4r4g4b4 },
661    { "over_n_8_2222",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a2r2g2b2 },
662    { "over_n_8_x888",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
663    { "over_n_8_8888",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
664    { "over_n_8_2x10",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_x2r10g10b10 },
665    { "over_n_8_2a10",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a2r10g10b10 },
666    { "over_n_8888_8888_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_a8r8g8b8 },
667    { "over_n_8888_x888_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_x8r8g8b8 },
668    { "over_n_8888_0565_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_r5g6b5 },
669    { "over_n_8888_1555_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_a1r5g5b5 },
670    { "over_n_8888_4444_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_a4r4g4b4 },
671    { "over_n_8888_2222_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_a2r2g2b2 },
672    { "over_n_8888_2x10_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_x2r10g10b10 },
673    { "over_n_8888_2a10_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_a2r10g10b10 },
674    { "over_8888_n_8888",      PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       1, PIXMAN_a8r8g8b8 },
675    { "over_8888_n_x888",      PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       1, PIXMAN_x8r8g8b8 },
676    { "over_8888_n_0565",      PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       1, PIXMAN_r5g6b5 },
677    { "over_8888_n_1555",      PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       1, PIXMAN_a1r5g5b5 },
678    { "over_x888_n_8888",      PIXMAN_x8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       1, PIXMAN_a8r8g8b8 },
679    { "outrev_n_8_0565",       PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8,       0, PIXMAN_r5g6b5 },
680    { "outrev_n_8_1555",       PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },
681    { "outrev_n_8_x888",       PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
682    { "outrev_n_8_8888",       PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
683    { "outrev_n_8888_0565_ca", PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_r5g6b5 },
684    { "outrev_n_8888_1555_ca", PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_a1r5g5b5 },
685    { "outrev_n_8888_x888_ca", PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_x8r8g8b8 },
686    { "outrev_n_8888_8888_ca", PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_a8r8g8b8 },
687    { "over_reverse_n_8888",   PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER_REVERSE, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
688};
689
690int
691main (int argc, char *argv[])
692{
693    double x;
694    int i;
695    const char *pattern = NULL;
696    for (i = 1; i < argc; i++)
697    {
698	if (argv[i][0] == '-')
699	{
700	    if (strchr (argv[i] + 1, 'b'))
701	    {
702		use_scaling = TRUE;
703		filter = PIXMAN_FILTER_BILINEAR;
704	    }
705	    else if (strchr (argv[i] + 1, 'n'))
706	    {
707		use_scaling = TRUE;
708		filter = PIXMAN_FILTER_NEAREST;
709	    }
710	}
711	else
712	{
713	    pattern = argv[i];
714	}
715    }
716
717    if (!pattern)
718    {
719	printf ("Usage: lowlevel-blt-bench [-b] [-n] pattern\n");
720	printf ("  -n : benchmark nearest scaling\n");
721	printf ("  -b : benchmark bilinear scaling\n");
722	return 1;
723    }
724
725    src = aligned_malloc (4096, BUFSIZE * 3);
726    memset (src, 0xCC, BUFSIZE * 3);
727    dst = src + (BUFSIZE / 4);
728    mask = dst + (BUFSIZE / 4);
729
730    printf ("Benchmark for a set of most commonly used functions\n");
731    printf ("---\n");
732    printf ("All results are presented in millions of pixels per second\n");
733    printf ("L1  - small Xx1 rectangle (fitting L1 cache), always blitted at the same\n");
734    printf ("      memory location with small drift in horizontal direction\n");
735    printf ("L2  - small XxY rectangle (fitting L2 cache), always blitted at the same\n");
736    printf ("      memory location with small drift in horizontal direction\n");
737    printf ("M   - large %dx%d rectangle, always blitted at the same\n",
738            WIDTH - 64, HEIGHT);
739    printf ("      memory location with small drift in horizontal direction\n");
740    printf ("HT  - random rectangles with %dx%d average size are copied from\n",
741            TILEWIDTH, TILEWIDTH);
742    printf ("      one %dx%d buffer to another, traversing from left to right\n",
743            WIDTH, HEIGHT);
744    printf ("      and from top to bottom\n");
745    printf ("VT  - random rectangles with %dx%d average size are copied from\n",
746            TILEWIDTH, TILEWIDTH);
747    printf ("      one %dx%d buffer to another, traversing from top to bottom\n",
748            WIDTH, HEIGHT);
749    printf ("      and from left to right\n");
750    printf ("R   - random rectangles with %dx%d average size are copied from\n",
751            TILEWIDTH, TILEWIDTH);
752    printf ("      random locations of one %dx%d buffer to another\n",
753            WIDTH, HEIGHT);
754    printf ("RT  - as R, but %dx%d average sized rectangles are copied\n",
755            TINYWIDTH, TINYWIDTH);
756    printf ("---\n");
757    bandwidth = x = bench_memcpy ();
758    printf ("reference memcpy speed = %.1fMB/s (%.1fMP/s for 32bpp fills)\n",
759            x / 1000000., x / 4000000);
760    if (use_scaling)
761    {
762	printf ("---\n");
763	if (filter == PIXMAN_FILTER_BILINEAR)
764	    printf ("BILINEAR scaling\n");
765	else if (filter == PIXMAN_FILTER_NEAREST)
766	    printf ("NEAREST scaling\n");
767	else
768	    printf ("UNKNOWN scaling\n");
769    }
770    printf ("---\n");
771
772    for (i = 0; i < ARRAY_LENGTH (tests_tbl); i++)
773    {
774	if (strcmp (pattern, "all") == 0 || strstr (tests_tbl[i].testname, pattern))
775	{
776	    bench_composite (tests_tbl[i].testname,
777			     tests_tbl[i].src_fmt,
778			     tests_tbl[i].src_flags,
779			     tests_tbl[i].op,
780			     tests_tbl[i].mask_fmt,
781			     tests_tbl[i].mask_flags,
782			     tests_tbl[i].dst_fmt,
783			     bandwidth/8);
784	}
785    }
786
787    free (src);
788    return 0;
789}
790