1/*
2 * Copyright © 2014 RISC OS Open Ltd
3 *
4 * Permission to use, copy, modify, distribute, and sell this software and its
5 * documentation for any purpose is hereby granted without fee, provided that
6 * the above copyright notice appear in all copies and that both that
7 * copyright notice and this permission notice appear in supporting
8 * documentation, and that the name of the copyright holders not be used in
9 * advertising or publicity pertaining to distribution of the software without
10 * specific, written prior permission.  The copyright holders make no
11 * representations about the suitability of this software for any purpose.  It
12 * is provided "as is" without express or implied warranty.
13 *
14 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
15 * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
16 * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
17 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
18 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
19 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
20 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
21 * SOFTWARE.
22 *
23 * Author:  Ben Avison (bavison@riscosopen.org)
24 */
25
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29#include <ctype.h>
30#include <stdint.h>
31#include "utils.h"
32
33#ifdef HAVE_GETTIMEOFDAY
34#include <sys/time.h>
35#else
36#include <time.h>
37#endif
38
39#define WIDTH  1920
40#define HEIGHT 1080
41
42/* How much data to read to flush all cached data to RAM */
43#define MAX_L2CACHE_SIZE (8 * 1024 * 1024)
44
45#define PAGE_SIZE (4 * 1024)
46
47struct bench_info
48{
49    pixman_op_t           op;
50    pixman_transform_t    transform;
51    pixman_image_t       *src_image;
52    pixman_image_t       *mask_image;
53    pixman_image_t       *dest_image;
54    int32_t               src_x;
55    int32_t               src_y;
56};
57
58typedef struct bench_info bench_info_t;
59
60struct box_48_16
61{
62    pixman_fixed_48_16_t        x1;
63    pixman_fixed_48_16_t        y1;
64    pixman_fixed_48_16_t        x2;
65    pixman_fixed_48_16_t        y2;
66};
67
68typedef struct box_48_16 box_48_16_t;
69
70/* This function is copied verbatim from pixman.c. */
71static pixman_bool_t
72compute_transformed_extents (pixman_transform_t   *transform,
73			     const pixman_box32_t *extents,
74			     box_48_16_t          *transformed)
75{
76    pixman_fixed_48_16_t tx1, ty1, tx2, ty2;
77    pixman_fixed_t x1, y1, x2, y2;
78    int i;
79
80    x1 = pixman_int_to_fixed (extents->x1) + pixman_fixed_1 / 2;
81    y1 = pixman_int_to_fixed (extents->y1) + pixman_fixed_1 / 2;
82    x2 = pixman_int_to_fixed (extents->x2) - pixman_fixed_1 / 2;
83    y2 = pixman_int_to_fixed (extents->y2) - pixman_fixed_1 / 2;
84
85    if (!transform)
86    {
87	transformed->x1 = x1;
88	transformed->y1 = y1;
89	transformed->x2 = x2;
90	transformed->y2 = y2;
91
92	return TRUE;
93    }
94
95    tx1 = ty1 = INT64_MAX;
96    tx2 = ty2 = INT64_MIN;
97
98    for (i = 0; i < 4; ++i)
99    {
100	pixman_fixed_48_16_t tx, ty;
101	pixman_vector_t v;
102
103	v.vector[0] = (i & 0x01)? x1 : x2;
104	v.vector[1] = (i & 0x02)? y1 : y2;
105	v.vector[2] = pixman_fixed_1;
106
107	if (!pixman_transform_point (transform, &v))
108	    return FALSE;
109
110	tx = (pixman_fixed_48_16_t)v.vector[0];
111	ty = (pixman_fixed_48_16_t)v.vector[1];
112
113	if (tx < tx1)
114	    tx1 = tx;
115	if (ty < ty1)
116	    ty1 = ty;
117	if (tx > tx2)
118	    tx2 = tx;
119	if (ty > ty2)
120	    ty2 = ty;
121    }
122
123    transformed->x1 = tx1;
124    transformed->y1 = ty1;
125    transformed->x2 = tx2;
126    transformed->y2 = ty2;
127
128    return TRUE;
129}
130
131static void
132create_image (uint32_t                   width,
133              uint32_t                   height,
134              pixman_format_code_t       format,
135              pixman_filter_t            filter,
136              uint32_t                 **bits,
137              pixman_image_t           **image)
138{
139    uint32_t stride = (width * PIXMAN_FORMAT_BPP (format) + 31) / 32 * 4;
140
141    *bits = aligned_malloc (PAGE_SIZE, stride * height);
142    memset (*bits, 0xCC, stride * height);
143    *image = pixman_image_create_bits (format, width, height, *bits, stride);
144    pixman_image_set_repeat (*image, PIXMAN_REPEAT_NORMAL);
145    pixman_image_set_filter (*image, filter, NULL, 0);
146}
147
148/* This needs to match the shortest cacheline length we expect to encounter */
149#define CACHE_CLEAN_INCREMENT 32
150
151static void
152flush_cache (void)
153{
154    static const char clean_space[MAX_L2CACHE_SIZE];
155    volatile const char *x = clean_space;
156    const char *clean_end = clean_space + sizeof clean_space;
157
158    while (x < clean_end)
159    {
160        (void) *x;
161        x += CACHE_CLEAN_INCREMENT;
162    }
163}
164
165/* Obtain current time in microseconds modulo 2^32 */
166uint32_t
167gettimei (void)
168{
169#ifdef HAVE_GETTIMEOFDAY
170    struct timeval tv;
171
172    gettimeofday (&tv, NULL);
173    return tv.tv_sec * 1000000 + tv.tv_usec;
174#else
175    return (uint64_t) clock () * 1000000 / CLOCKS_PER_SEC;
176#endif
177}
178
179static void
180pixman_image_composite_wrapper (const pixman_composite_info_t *info)
181{
182    pixman_image_composite (info->op,
183                            info->src_image, info->mask_image, info->dest_image,
184                            info->src_x, info->src_y,
185                            info->mask_x, info->mask_y,
186                            info->dest_x, info->dest_y,
187                            info->width, info->height);
188}
189
190static void
191pixman_image_composite_empty (const pixman_composite_info_t *info)
192{
193    pixman_image_composite (info->op,
194                            info->src_image, info->mask_image, info->dest_image,
195                            info->src_x, info->src_y,
196                            info->mask_x, info->mask_y,
197                            info->dest_x, info->dest_y,
198                            1, 1);
199}
200
201static void
202bench (const bench_info_t *bi,
203       uint32_t            max_n,
204       uint32_t            max_time,
205       uint32_t           *ret_n,
206       uint32_t           *ret_time,
207       void              (*func) (const pixman_composite_info_t *info))
208{
209    uint32_t n = 0;
210    uint32_t t0;
211    uint32_t t1;
212    uint32_t x = 0;
213    pixman_transform_t t;
214    pixman_composite_info_t info;
215
216    t = bi->transform;
217    info.op = bi->op;
218    info.src_image = bi->src_image;
219    info.mask_image = bi->mask_image;
220    info.dest_image = bi->dest_image;
221    info.src_x = 0;
222    info.src_y = 0;
223    info.mask_x = 0;
224    info.mask_y = 0;
225    /* info.dest_x set below */
226    info.dest_y = 0;
227    info.width = WIDTH;
228    info.height = HEIGHT;
229
230    t0 = gettimei ();
231
232    do
233    {
234
235        if (++x >= 64)
236            x = 0;
237
238        info.dest_x = 63 - x;
239
240        t.matrix[0][2] = pixman_int_to_fixed (bi->src_x + x);
241        t.matrix[1][2] = pixman_int_to_fixed (bi->src_y);
242        pixman_image_set_transform (bi->src_image, &t);
243
244        if (bi->mask_image)
245            pixman_image_set_transform (bi->mask_image, &t);
246
247        func (&info);
248        t1 = gettimei ();
249    }
250    while (++n < max_n && (t1 - t0) < max_time);
251
252    if (ret_n)
253        *ret_n = n;
254
255    *ret_time = t1 - t0;
256}
257
258int
259parse_fixed_argument (char *arg, pixman_fixed_t *value)
260{
261    char *tailptr;
262
263    *value = pixman_double_to_fixed (strtod (arg, &tailptr));
264
265    return *tailptr == '\0';
266}
267
268int
269parse_arguments (int                   argc,
270                 char                 *argv[],
271                 pixman_transform_t   *t,
272                 pixman_op_t          *op,
273                 pixman_format_code_t *src_format,
274                 pixman_format_code_t *mask_format,
275                 pixman_format_code_t *dest_format)
276{
277    if (!parse_fixed_argument (*argv, &t->matrix[0][0]))
278        return 0;
279
280    if (*++argv == NULL)
281        return 1;
282
283    if (!parse_fixed_argument (*argv, &t->matrix[0][1]))
284        return 0;
285
286    if (*++argv == NULL)
287        return 1;
288
289    if (!parse_fixed_argument (*argv, &t->matrix[1][0]))
290        return 0;
291
292    if (*++argv == NULL)
293        return 1;
294
295    if (!parse_fixed_argument (*argv, &t->matrix[1][1]))
296        return 0;
297
298    if (*++argv == NULL)
299        return 1;
300
301    *op = operator_from_string (*argv);
302    if (*op == PIXMAN_OP_NONE)
303        return 0;
304
305    if (*++argv == NULL)
306        return 1;
307
308    *src_format = format_from_string (*argv);
309    if (*src_format == PIXMAN_null)
310        return 0;
311
312    ++argv;
313    if (argv[0] && argv[1])
314    {
315        *mask_format = format_from_string (*argv);
316        if (*mask_format == PIXMAN_null)
317            return 0;
318        ++argv;
319    }
320    if (*argv)
321    {
322        *dest_format = format_from_string (*argv);
323        if (*dest_format == PIXMAN_null)
324            return 0;
325    }
326    return 1;
327}
328
329static void
330run_benchmark (const bench_info_t *bi)
331{
332    uint32_t n;  /* number of iterations in at least 5 seconds */
333    uint32_t t1; /* time taken to do n iterations, microseconds */
334    uint32_t t2; /* calling overhead for n iterations, microseconds */
335
336    flush_cache ();
337    bench (bi, UINT32_MAX, 5000000, &n, &t1, pixman_image_composite_wrapper);
338    bench (bi, n, UINT32_MAX, NULL, &t2, pixman_image_composite_empty);
339
340    /* The result indicates the output rate in megapixels/second */
341    printf ("%6.2f\n", (double) n * WIDTH * HEIGHT / (t1 - t2));
342}
343
344
345int
346main (int argc, char *argv[])
347{
348    bench_info_t         binfo;
349    pixman_filter_t      filter      = PIXMAN_FILTER_NEAREST;
350    pixman_format_code_t src_format  = PIXMAN_a8r8g8b8;
351    pixman_format_code_t mask_format = 0;
352    pixman_format_code_t dest_format = PIXMAN_a8r8g8b8;
353    pixman_box32_t       dest_box    = { 0, 0, WIDTH, HEIGHT };
354    box_48_16_t          transformed = { 0 };
355    int32_t xmin, ymin, xmax, ymax;
356    uint32_t *src, *mask, *dest;
357
358    binfo.op         = PIXMAN_OP_SRC;
359    binfo.mask_image = NULL;
360    pixman_transform_init_identity (&binfo.transform);
361
362    ++argv;
363    if (*argv && (*argv)[0] == '-' && (*argv)[1] == 'n')
364    {
365        filter = PIXMAN_FILTER_NEAREST;
366        ++argv;
367        --argc;
368    }
369
370    if (*argv && (*argv)[0] == '-' && (*argv)[1] == 'b')
371    {
372        filter = PIXMAN_FILTER_BILINEAR;
373        ++argv;
374        --argc;
375    }
376
377    if (argc == 1 ||
378        !parse_arguments (argc, argv, &binfo.transform, &binfo.op,
379                          &src_format, &mask_format, &dest_format))
380    {
381        printf ("Usage: affine-bench [-n] [-b] axx [axy] [ayx] [ayy] [combine type]\n");
382        printf ("                    [src format] [mask format] [dest format]\n");
383        printf ("  -n : nearest scaling (default)\n");
384        printf ("  -b : bilinear scaling\n");
385        printf ("  axx : x_out:x_in factor\n");
386        printf ("  axy : x_out:y_in factor (default 0)\n");
387        printf ("  ayx : y_out:x_in factor (default 0)\n");
388        printf ("  ayy : y_out:y_in factor (default 1)\n");
389        printf ("  combine type : src, over, in etc (default src)\n");
390        printf ("  src format : a8r8g8b8, r5g6b5 etc (default a8r8g8b8)\n");
391        printf ("  mask format : as for src format, but no mask used if omitted\n");
392        printf ("  dest format : as for src format (default a8r8g8b8)\n");
393        printf ("The output is a single number in megapixels/second.\n");
394
395        return EXIT_FAILURE;
396    }
397
398    /* Compute required extents for source and mask image so they qualify
399     * for COVER fast paths and get the flags in pixman.c:analyze_extent().
400     * These computations are for FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,
401     * but at the same time they also allow COVER_CLIP_NEAREST.
402     */
403    compute_transformed_extents (&binfo.transform, &dest_box, &transformed);
404    xmin = pixman_fixed_to_int (transformed.x1 - pixman_fixed_1 / 2);
405    ymin = pixman_fixed_to_int (transformed.y1 - pixman_fixed_1 / 2);
406    xmax = pixman_fixed_to_int (transformed.x2 + pixman_fixed_1 / 2);
407    ymax = pixman_fixed_to_int (transformed.y2 + pixman_fixed_1 / 2);
408    /* Note:
409     * The upper limits can be reduced to the following when fetchers
410     * are guaranteed to not access pixels with zero weight. This concerns
411     * particularly all bilinear samplers.
412     *
413     * xmax = pixman_fixed_to_int (transformed.x2 + pixman_fixed_1 / 2 - pixman_fixed_e);
414     * ymax = pixman_fixed_to_int (transformed.y2 + pixman_fixed_1 / 2 - pixman_fixed_e);
415     * This is equivalent to subtracting 0.5 and rounding up, rather than
416     * subtracting 0.5, rounding down and adding 1.
417     */
418    binfo.src_x = -xmin;
419    binfo.src_y = -ymin;
420
421    /* Always over-allocate width by 64 pixels for all src, mask and dst,
422     * so that we can iterate over an x-offset 0..63 in bench ().
423     * This is similar to lowlevel-blt-bench, which uses the same method
424     * to hit different cacheline misalignments.
425     */
426    create_image (xmax - xmin + 64, ymax - ymin + 1, src_format, filter,
427                  &src, &binfo.src_image);
428
429    if (mask_format)
430    {
431        create_image (xmax - xmin + 64, ymax - ymin + 1, mask_format, filter,
432                      &mask, &binfo.mask_image);
433
434        if ((PIXMAN_FORMAT_R(mask_format) ||
435             PIXMAN_FORMAT_G(mask_format) ||
436             PIXMAN_FORMAT_B(mask_format)))
437        {
438            pixman_image_set_component_alpha (binfo.mask_image, 1);
439        }
440    }
441
442    create_image (WIDTH + 64, HEIGHT, dest_format, filter,
443                  &dest, &binfo.dest_image);
444
445    run_benchmark (&binfo);
446
447    return EXIT_SUCCESS;
448}
449