1/* 2 * Copyright © 2014 RISC OS Open Ltd 3 * 4 * Permission to use, copy, modify, distribute, and sell this software and its 5 * documentation for any purpose is hereby granted without fee, provided that 6 * the above copyright notice appear in all copies and that both that 7 * copyright notice and this permission notice appear in supporting 8 * documentation, and that the name of the copyright holders not be used in 9 * advertising or publicity pertaining to distribution of the software without 10 * specific, written prior permission. The copyright holders make no 11 * representations about the suitability of this software for any purpose. It 12 * is provided "as is" without express or implied warranty. 13 * 14 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS 15 * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 16 * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY 17 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 18 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN 19 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING 20 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 21 * SOFTWARE. 22 * 23 * Author: Ben Avison (bavison@riscosopen.org) 24 */ 25 26#include <stdio.h> 27#include <stdlib.h> 28#include <string.h> 29#include <ctype.h> 30#include <stdint.h> 31#include "utils.h" 32 33#ifdef HAVE_GETTIMEOFDAY 34#include <sys/time.h> 35#else 36#include <time.h> 37#endif 38 39#define WIDTH 1920 40#define HEIGHT 1080 41 42/* How much data to read to flush all cached data to RAM */ 43#define MAX_L2CACHE_SIZE (8 * 1024 * 1024) 44 45#define PAGE_SIZE (4 * 1024) 46 47struct bench_info 48{ 49 pixman_op_t op; 50 pixman_transform_t transform; 51 pixman_image_t *src_image; 52 pixman_image_t *mask_image; 53 pixman_image_t *dest_image; 54 int32_t src_x; 55 int32_t src_y; 56}; 57 58typedef struct bench_info bench_info_t; 59 60struct box_48_16 61{ 62 pixman_fixed_48_16_t x1; 63 pixman_fixed_48_16_t y1; 64 pixman_fixed_48_16_t x2; 65 pixman_fixed_48_16_t y2; 66}; 67 68typedef struct box_48_16 box_48_16_t; 69 70/* This function is copied verbatim from pixman.c. */ 71static pixman_bool_t 72compute_transformed_extents (pixman_transform_t *transform, 73 const pixman_box32_t *extents, 74 box_48_16_t *transformed) 75{ 76 pixman_fixed_48_16_t tx1, ty1, tx2, ty2; 77 pixman_fixed_t x1, y1, x2, y2; 78 int i; 79 80 x1 = pixman_int_to_fixed (extents->x1) + pixman_fixed_1 / 2; 81 y1 = pixman_int_to_fixed (extents->y1) + pixman_fixed_1 / 2; 82 x2 = pixman_int_to_fixed (extents->x2) - pixman_fixed_1 / 2; 83 y2 = pixman_int_to_fixed (extents->y2) - pixman_fixed_1 / 2; 84 85 if (!transform) 86 { 87 transformed->x1 = x1; 88 transformed->y1 = y1; 89 transformed->x2 = x2; 90 transformed->y2 = y2; 91 92 return TRUE; 93 } 94 95 tx1 = ty1 = INT64_MAX; 96 tx2 = ty2 = INT64_MIN; 97 98 for (i = 0; i < 4; ++i) 99 { 100 pixman_fixed_48_16_t tx, ty; 101 pixman_vector_t v; 102 103 v.vector[0] = (i & 0x01)? x1 : x2; 104 v.vector[1] = (i & 0x02)? y1 : y2; 105 v.vector[2] = pixman_fixed_1; 106 107 if (!pixman_transform_point (transform, &v)) 108 return FALSE; 109 110 tx = (pixman_fixed_48_16_t)v.vector[0]; 111 ty = (pixman_fixed_48_16_t)v.vector[1]; 112 113 if (tx < tx1) 114 tx1 = tx; 115 if (ty < ty1) 116 ty1 = ty; 117 if (tx > tx2) 118 tx2 = tx; 119 if (ty > ty2) 120 ty2 = ty; 121 } 122 123 transformed->x1 = tx1; 124 transformed->y1 = ty1; 125 transformed->x2 = tx2; 126 transformed->y2 = ty2; 127 128 return TRUE; 129} 130 131static void 132create_image (uint32_t width, 133 uint32_t height, 134 pixman_format_code_t format, 135 pixman_filter_t filter, 136 uint32_t **bits, 137 pixman_image_t **image) 138{ 139 uint32_t stride = (width * PIXMAN_FORMAT_BPP (format) + 31) / 32 * 4; 140 141 *bits = aligned_malloc (PAGE_SIZE, stride * height); 142 memset (*bits, 0xCC, stride * height); 143 *image = pixman_image_create_bits (format, width, height, *bits, stride); 144 pixman_image_set_repeat (*image, PIXMAN_REPEAT_NORMAL); 145 pixman_image_set_filter (*image, filter, NULL, 0); 146} 147 148/* This needs to match the shortest cacheline length we expect to encounter */ 149#define CACHE_CLEAN_INCREMENT 32 150 151static void 152flush_cache (void) 153{ 154 static const char clean_space[MAX_L2CACHE_SIZE]; 155 volatile const char *x = clean_space; 156 const char *clean_end = clean_space + sizeof clean_space; 157 158 while (x < clean_end) 159 { 160 (void) *x; 161 x += CACHE_CLEAN_INCREMENT; 162 } 163} 164 165/* Obtain current time in microseconds modulo 2^32 */ 166uint32_t 167gettimei (void) 168{ 169#ifdef HAVE_GETTIMEOFDAY 170 struct timeval tv; 171 172 gettimeofday (&tv, NULL); 173 return tv.tv_sec * 1000000 + tv.tv_usec; 174#else 175 return (uint64_t) clock () * 1000000 / CLOCKS_PER_SEC; 176#endif 177} 178 179static void 180pixman_image_composite_wrapper (const pixman_composite_info_t *info) 181{ 182 pixman_image_composite (info->op, 183 info->src_image, info->mask_image, info->dest_image, 184 info->src_x, info->src_y, 185 info->mask_x, info->mask_y, 186 info->dest_x, info->dest_y, 187 info->width, info->height); 188} 189 190static void 191pixman_image_composite_empty (const pixman_composite_info_t *info) 192{ 193 pixman_image_composite (info->op, 194 info->src_image, info->mask_image, info->dest_image, 195 info->src_x, info->src_y, 196 info->mask_x, info->mask_y, 197 info->dest_x, info->dest_y, 198 1, 1); 199} 200 201static void 202bench (const bench_info_t *bi, 203 uint32_t max_n, 204 uint32_t max_time, 205 uint32_t *ret_n, 206 uint32_t *ret_time, 207 void (*func) (const pixman_composite_info_t *info)) 208{ 209 uint32_t n = 0; 210 uint32_t t0; 211 uint32_t t1; 212 uint32_t x = 0; 213 pixman_transform_t t; 214 pixman_composite_info_t info; 215 216 t = bi->transform; 217 info.op = bi->op; 218 info.src_image = bi->src_image; 219 info.mask_image = bi->mask_image; 220 info.dest_image = bi->dest_image; 221 info.src_x = 0; 222 info.src_y = 0; 223 info.mask_x = 0; 224 info.mask_y = 0; 225 /* info.dest_x set below */ 226 info.dest_y = 0; 227 info.width = WIDTH; 228 info.height = HEIGHT; 229 230 t0 = gettimei (); 231 232 do 233 { 234 235 if (++x >= 64) 236 x = 0; 237 238 info.dest_x = 63 - x; 239 240 t.matrix[0][2] = pixman_int_to_fixed (bi->src_x + x); 241 t.matrix[1][2] = pixman_int_to_fixed (bi->src_y); 242 pixman_image_set_transform (bi->src_image, &t); 243 244 if (bi->mask_image) 245 pixman_image_set_transform (bi->mask_image, &t); 246 247 func (&info); 248 t1 = gettimei (); 249 } 250 while (++n < max_n && (t1 - t0) < max_time); 251 252 if (ret_n) 253 *ret_n = n; 254 255 *ret_time = t1 - t0; 256} 257 258int 259parse_fixed_argument (char *arg, pixman_fixed_t *value) 260{ 261 char *tailptr; 262 263 *value = pixman_double_to_fixed (strtod (arg, &tailptr)); 264 265 return *tailptr == '\0'; 266} 267 268int 269parse_arguments (int argc, 270 char *argv[], 271 pixman_transform_t *t, 272 pixman_op_t *op, 273 pixman_format_code_t *src_format, 274 pixman_format_code_t *mask_format, 275 pixman_format_code_t *dest_format) 276{ 277 if (!parse_fixed_argument (*argv, &t->matrix[0][0])) 278 return 0; 279 280 if (*++argv == NULL) 281 return 1; 282 283 if (!parse_fixed_argument (*argv, &t->matrix[0][1])) 284 return 0; 285 286 if (*++argv == NULL) 287 return 1; 288 289 if (!parse_fixed_argument (*argv, &t->matrix[1][0])) 290 return 0; 291 292 if (*++argv == NULL) 293 return 1; 294 295 if (!parse_fixed_argument (*argv, &t->matrix[1][1])) 296 return 0; 297 298 if (*++argv == NULL) 299 return 1; 300 301 *op = operator_from_string (*argv); 302 if (*op == PIXMAN_OP_NONE) 303 return 0; 304 305 if (*++argv == NULL) 306 return 1; 307 308 *src_format = format_from_string (*argv); 309 if (*src_format == PIXMAN_null) 310 return 0; 311 312 ++argv; 313 if (argv[0] && argv[1]) 314 { 315 *mask_format = format_from_string (*argv); 316 if (*mask_format == PIXMAN_null) 317 return 0; 318 ++argv; 319 } 320 if (*argv) 321 { 322 *dest_format = format_from_string (*argv); 323 if (*dest_format == PIXMAN_null) 324 return 0; 325 } 326 return 1; 327} 328 329static void 330run_benchmark (const bench_info_t *bi) 331{ 332 uint32_t n; /* number of iterations in at least 5 seconds */ 333 uint32_t t1; /* time taken to do n iterations, microseconds */ 334 uint32_t t2; /* calling overhead for n iterations, microseconds */ 335 336 flush_cache (); 337 bench (bi, UINT32_MAX, 5000000, &n, &t1, pixman_image_composite_wrapper); 338 bench (bi, n, UINT32_MAX, NULL, &t2, pixman_image_composite_empty); 339 340 /* The result indicates the output rate in megapixels/second */ 341 printf ("%6.2f\n", (double) n * WIDTH * HEIGHT / (t1 - t2)); 342} 343 344 345int 346main (int argc, char *argv[]) 347{ 348 bench_info_t binfo; 349 pixman_filter_t filter = PIXMAN_FILTER_NEAREST; 350 pixman_format_code_t src_format = PIXMAN_a8r8g8b8; 351 pixman_format_code_t mask_format = 0; 352 pixman_format_code_t dest_format = PIXMAN_a8r8g8b8; 353 pixman_box32_t dest_box = { 0, 0, WIDTH, HEIGHT }; 354 box_48_16_t transformed = { 0 }; 355 int32_t xmin, ymin, xmax, ymax; 356 uint32_t *src, *mask, *dest; 357 358 binfo.op = PIXMAN_OP_SRC; 359 binfo.mask_image = NULL; 360 pixman_transform_init_identity (&binfo.transform); 361 362 ++argv; 363 if (*argv && (*argv)[0] == '-' && (*argv)[1] == 'n') 364 { 365 filter = PIXMAN_FILTER_NEAREST; 366 ++argv; 367 --argc; 368 } 369 370 if (*argv && (*argv)[0] == '-' && (*argv)[1] == 'b') 371 { 372 filter = PIXMAN_FILTER_BILINEAR; 373 ++argv; 374 --argc; 375 } 376 377 if (argc == 1 || 378 !parse_arguments (argc, argv, &binfo.transform, &binfo.op, 379 &src_format, &mask_format, &dest_format)) 380 { 381 printf ("Usage: affine-bench [-n] [-b] axx [axy] [ayx] [ayy] [combine type]\n"); 382 printf (" [src format] [mask format] [dest format]\n"); 383 printf (" -n : nearest scaling (default)\n"); 384 printf (" -b : bilinear scaling\n"); 385 printf (" axx : x_out:x_in factor\n"); 386 printf (" axy : x_out:y_in factor (default 0)\n"); 387 printf (" ayx : y_out:x_in factor (default 0)\n"); 388 printf (" ayy : y_out:y_in factor (default 1)\n"); 389 printf (" combine type : src, over, in etc (default src)\n"); 390 printf (" src format : a8r8g8b8, r5g6b5 etc (default a8r8g8b8)\n"); 391 printf (" mask format : as for src format, but no mask used if omitted\n"); 392 printf (" dest format : as for src format (default a8r8g8b8)\n"); 393 printf ("The output is a single number in megapixels/second.\n"); 394 395 return EXIT_FAILURE; 396 } 397 398 /* Compute required extents for source and mask image so they qualify 399 * for COVER fast paths and get the flags in pixman.c:analyze_extent(). 400 * These computations are for FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, 401 * but at the same time they also allow COVER_CLIP_NEAREST. 402 */ 403 compute_transformed_extents (&binfo.transform, &dest_box, &transformed); 404 xmin = pixman_fixed_to_int (transformed.x1 - pixman_fixed_1 / 2); 405 ymin = pixman_fixed_to_int (transformed.y1 - pixman_fixed_1 / 2); 406 xmax = pixman_fixed_to_int (transformed.x2 + pixman_fixed_1 / 2); 407 ymax = pixman_fixed_to_int (transformed.y2 + pixman_fixed_1 / 2); 408 /* Note: 409 * The upper limits can be reduced to the following when fetchers 410 * are guaranteed to not access pixels with zero weight. This concerns 411 * particularly all bilinear samplers. 412 * 413 * xmax = pixman_fixed_to_int (transformed.x2 + pixman_fixed_1 / 2 - pixman_fixed_e); 414 * ymax = pixman_fixed_to_int (transformed.y2 + pixman_fixed_1 / 2 - pixman_fixed_e); 415 * This is equivalent to subtracting 0.5 and rounding up, rather than 416 * subtracting 0.5, rounding down and adding 1. 417 */ 418 binfo.src_x = -xmin; 419 binfo.src_y = -ymin; 420 421 /* Always over-allocate width by 64 pixels for all src, mask and dst, 422 * so that we can iterate over an x-offset 0..63 in bench (). 423 * This is similar to lowlevel-blt-bench, which uses the same method 424 * to hit different cacheline misalignments. 425 */ 426 create_image (xmax - xmin + 64, ymax - ymin + 1, src_format, filter, 427 &src, &binfo.src_image); 428 429 if (mask_format) 430 { 431 create_image (xmax - xmin + 64, ymax - ymin + 1, mask_format, filter, 432 &mask, &binfo.mask_image); 433 434 if ((PIXMAN_FORMAT_R(mask_format) || 435 PIXMAN_FORMAT_G(mask_format) || 436 PIXMAN_FORMAT_B(mask_format))) 437 { 438 pixman_image_set_component_alpha (binfo.mask_image, 1); 439 } 440 } 441 442 create_image (WIDTH + 64, HEIGHT, dest_format, filter, 443 &dest, &binfo.dest_image); 444 445 run_benchmark (&binfo); 446 447 return EXIT_SUCCESS; 448} 449