17117f1b4Smrg/* 27117f1b4Smrg * Mesa 3-D graphics library 37117f1b4Smrg * 47117f1b4Smrg * Copyright (C) 1999-2004 Brian Paul All Rights Reserved. 57117f1b4Smrg * 67117f1b4Smrg * Permission is hereby granted, free of charge, to any person obtaining a 77117f1b4Smrg * copy of this software and associated documentation files (the "Software"), 87117f1b4Smrg * to deal in the Software without restriction, including without limitation 97117f1b4Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 107117f1b4Smrg * and/or sell copies of the Software, and to permit persons to whom the 117117f1b4Smrg * Software is furnished to do so, subject to the following conditions: 127117f1b4Smrg * 137117f1b4Smrg * The above copyright notice and this permission notice shall be included 147117f1b4Smrg * in all copies or substantial portions of the Software. 157117f1b4Smrg * 167117f1b4Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 177117f1b4Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 187117f1b4Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19af69d88dSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20af69d88dSmrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21af69d88dSmrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22af69d88dSmrg * OTHER DEALINGS IN THE SOFTWARE. 237117f1b4Smrg * 247117f1b4Smrg * Authors: 257117f1b4Smrg * Gareth Hughes 267117f1b4Smrg */ 277117f1b4Smrg 287117f1b4Smrg#ifndef __M_DEBUG_UTIL_H__ 297117f1b4Smrg#define __M_DEBUG_UTIL_H__ 307117f1b4Smrg 317117f1b4Smrg 327117f1b4Smrg#ifdef DEBUG_MATH /* This code only used for debugging */ 337117f1b4Smrg 347117f1b4Smrg 3501e04c3fSmrg#include "c99_math.h" 3601e04c3fSmrg 3701e04c3fSmrg 387117f1b4Smrg/* Comment this out to deactivate the cycle counter. 397117f1b4Smrg * NOTE: it works only on CPUs which know the 'rdtsc' command (586 or higher) 407117f1b4Smrg * (hope, you don't try to debug Mesa on a 386 ;) 417117f1b4Smrg */ 427117f1b4Smrg#if defined(__GNUC__) && \ 437117f1b4Smrg ((defined(__i386__) && defined(USE_X86_ASM)) || \ 447117f1b4Smrg (defined(__sparc__) && defined(USE_SPARC_ASM))) 457117f1b4Smrg#define RUN_DEBUG_BENCHMARK 467117f1b4Smrg#endif 477117f1b4Smrg 487117f1b4Smrg#define TEST_COUNT 128 /* size of the tested vector array */ 497117f1b4Smrg 507117f1b4Smrg#define REQUIRED_PRECISION 10 /* allow 4 bits to miss */ 517117f1b4Smrg#define MAX_PRECISION 24 /* max. precision possible */ 527117f1b4Smrg 537117f1b4Smrg 547117f1b4Smrg#ifdef RUN_DEBUG_BENCHMARK 557117f1b4Smrg/* Overhead of profiling counter in cycles. Automatically adjusted to 567117f1b4Smrg * your machine at run time - counter initialization should give very 577117f1b4Smrg * consistent results. 587117f1b4Smrg */ 597117f1b4Smrgextern long counter_overhead; 607117f1b4Smrg 617117f1b4Smrg/* This is the value of the environment variable MESA_PROFILE, and is 627117f1b4Smrg * used to determine if we should benchmark the functions as well as 637117f1b4Smrg * verify their correctness. 647117f1b4Smrg */ 657117f1b4Smrgextern char *mesa_profile; 667117f1b4Smrg 67cdc920a0Smrg/* Modify the number of tests if you like. 687117f1b4Smrg * We take the minimum of all results, because every error should be 697117f1b4Smrg * positive (time used by other processes, task switches etc). 707117f1b4Smrg * It is assumed that all calculations are done in the cache. 717117f1b4Smrg */ 727117f1b4Smrg 737117f1b4Smrg#if defined(__i386__) 747117f1b4Smrg 757117f1b4Smrg#if 1 /* PPro, PII, PIII version */ 767117f1b4Smrg 777117f1b4Smrg/* Profiling on the P6 architecture requires a little more work, due to 787117f1b4Smrg * the internal out-of-order execution. We must perform a serializing 797117f1b4Smrg * 'cpuid' instruction before and after the 'rdtsc' instructions to make 807117f1b4Smrg * sure no other uops are executed when we sample the timestamp counter. 817117f1b4Smrg */ 827117f1b4Smrg#define INIT_COUNTER() \ 837117f1b4Smrg do { \ 847117f1b4Smrg int cycle_i; \ 857117f1b4Smrg counter_overhead = LONG_MAX; \ 867117f1b4Smrg for ( cycle_i = 0 ; cycle_i < 8 ; cycle_i++ ) { \ 877117f1b4Smrg long cycle_tmp1 = 0, cycle_tmp2 = 0; \ 887117f1b4Smrg __asm__ __volatile__ ( "push %%ebx \n" \ 897117f1b4Smrg "xor %%eax, %%eax \n" \ 907117f1b4Smrg "cpuid \n" \ 917117f1b4Smrg "rdtsc \n" \ 927117f1b4Smrg "mov %%eax, %0 \n" \ 937117f1b4Smrg "xor %%eax, %%eax \n" \ 947117f1b4Smrg "cpuid \n" \ 957117f1b4Smrg "pop %%ebx \n" \ 967117f1b4Smrg "push %%ebx \n" \ 977117f1b4Smrg "xor %%eax, %%eax \n" \ 987117f1b4Smrg "cpuid \n" \ 997117f1b4Smrg "rdtsc \n" \ 1007117f1b4Smrg "mov %%eax, %1 \n" \ 1017117f1b4Smrg "xor %%eax, %%eax \n" \ 1027117f1b4Smrg "cpuid \n" \ 1037117f1b4Smrg "pop %%ebx \n" \ 1047117f1b4Smrg : "=m" (cycle_tmp1), "=m" (cycle_tmp2) \ 1057117f1b4Smrg : : "eax", "ecx", "edx" ); \ 1067117f1b4Smrg if ( counter_overhead > (cycle_tmp2 - cycle_tmp1) ) { \ 1077117f1b4Smrg counter_overhead = cycle_tmp2 - cycle_tmp1; \ 1087117f1b4Smrg } \ 1097117f1b4Smrg } \ 1107117f1b4Smrg } while (0) 1117117f1b4Smrg 1127117f1b4Smrg#define BEGIN_RACE(x) \ 1137117f1b4Smrg x = LONG_MAX; \ 1147117f1b4Smrg for ( cycle_i = 0 ; cycle_i < 10 ; cycle_i++ ) { \ 1157117f1b4Smrg long cycle_tmp1 = 0, cycle_tmp2 = 0; \ 1167117f1b4Smrg __asm__ __volatile__ ( "push %%ebx \n" \ 1177117f1b4Smrg "xor %%eax, %%eax \n" \ 1187117f1b4Smrg "cpuid \n" \ 1197117f1b4Smrg "rdtsc \n" \ 1207117f1b4Smrg "mov %%eax, %0 \n" \ 1217117f1b4Smrg "xor %%eax, %%eax \n" \ 1227117f1b4Smrg "cpuid \n" \ 1237117f1b4Smrg "pop %%ebx \n" \ 1247117f1b4Smrg : "=m" (cycle_tmp1) \ 1257117f1b4Smrg : : "eax", "ecx", "edx" ); 1267117f1b4Smrg 1277117f1b4Smrg#define END_RACE(x) \ 1287117f1b4Smrg __asm__ __volatile__ ( "push %%ebx \n" \ 1297117f1b4Smrg "xor %%eax, %%eax \n" \ 1307117f1b4Smrg "cpuid \n" \ 1317117f1b4Smrg "rdtsc \n" \ 1327117f1b4Smrg "mov %%eax, %0 \n" \ 1337117f1b4Smrg "xor %%eax, %%eax \n" \ 1347117f1b4Smrg "cpuid \n" \ 1357117f1b4Smrg "pop %%ebx \n" \ 1367117f1b4Smrg : "=m" (cycle_tmp2) \ 1377117f1b4Smrg : : "eax", "ecx", "edx" ); \ 1387117f1b4Smrg if ( x > (cycle_tmp2 - cycle_tmp1) ) { \ 1397117f1b4Smrg x = cycle_tmp2 - cycle_tmp1; \ 1407117f1b4Smrg } \ 1417117f1b4Smrg } \ 1427117f1b4Smrg x -= counter_overhead; 1437117f1b4Smrg 1447117f1b4Smrg#else /* PPlain, PMMX version */ 1457117f1b4Smrg 1467117f1b4Smrg/* To ensure accurate results, we stall the pipelines with the 1477117f1b4Smrg * non-pairable 'cdq' instruction. This ensures all the code being 1487117f1b4Smrg * profiled is complete when the 'rdtsc' instruction executes. 1497117f1b4Smrg */ 1507117f1b4Smrg#define INIT_COUNTER(x) \ 1517117f1b4Smrg do { \ 1527117f1b4Smrg int cycle_i; \ 1537117f1b4Smrg x = LONG_MAX; \ 1547117f1b4Smrg for ( cycle_i = 0 ; cycle_i < 32 ; cycle_i++ ) { \ 1557117f1b4Smrg long cycle_tmp1, cycle_tmp2, dummy; \ 1567117f1b4Smrg __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp1) ); \ 1577117f1b4Smrg __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp2) ); \ 1587117f1b4Smrg __asm__ ( "cdq" ); \ 1597117f1b4Smrg __asm__ ( "cdq" ); \ 1607117f1b4Smrg __asm__ ( "rdtsc" : "=a" (cycle_tmp1), "=d" (dummy) ); \ 1617117f1b4Smrg __asm__ ( "cdq" ); \ 1627117f1b4Smrg __asm__ ( "cdq" ); \ 1637117f1b4Smrg __asm__ ( "rdtsc" : "=a" (cycle_tmp2), "=d" (dummy) ); \ 1647117f1b4Smrg if ( x > (cycle_tmp2 - cycle_tmp1) ) \ 1657117f1b4Smrg x = cycle_tmp2 - cycle_tmp1; \ 1667117f1b4Smrg } \ 1677117f1b4Smrg } while (0) 1687117f1b4Smrg 1697117f1b4Smrg#define BEGIN_RACE(x) \ 1707117f1b4Smrg x = LONG_MAX; \ 1717117f1b4Smrg for ( cycle_i = 0 ; cycle_i < 16 ; cycle_i++ ) { \ 1727117f1b4Smrg long cycle_tmp1, cycle_tmp2, dummy; \ 1737117f1b4Smrg __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp1) ); \ 1747117f1b4Smrg __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp2) ); \ 1757117f1b4Smrg __asm__ ( "cdq" ); \ 1767117f1b4Smrg __asm__ ( "cdq" ); \ 1777117f1b4Smrg __asm__ ( "rdtsc" : "=a" (cycle_tmp1), "=d" (dummy) ); 1787117f1b4Smrg 1797117f1b4Smrg 1807117f1b4Smrg#define END_RACE(x) \ 1817117f1b4Smrg __asm__ ( "cdq" ); \ 1827117f1b4Smrg __asm__ ( "cdq" ); \ 1837117f1b4Smrg __asm__ ( "rdtsc" : "=a" (cycle_tmp2), "=d" (dummy) ); \ 1847117f1b4Smrg if ( x > (cycle_tmp2 - cycle_tmp1) ) \ 1857117f1b4Smrg x = cycle_tmp2 - cycle_tmp1; \ 1867117f1b4Smrg } \ 1877117f1b4Smrg x -= counter_overhead; 1887117f1b4Smrg 1897117f1b4Smrg#endif 1907117f1b4Smrg 191c1f859d4Smrg#elif defined(__x86_64__) 1927117f1b4Smrg 1937117f1b4Smrg#define rdtscll(val) do { \ 1947117f1b4Smrg unsigned int a,d; \ 1957117f1b4Smrg __asm__ volatile("rdtsc" : "=a" (a), "=d" (d)); \ 1967117f1b4Smrg (val) = ((unsigned long)a) | (((unsigned long)d)<<32); \ 1977117f1b4Smrg} while(0) 1987117f1b4Smrg 1997117f1b4Smrg/* Copied from i386 PIII version */ 2007117f1b4Smrg#define INIT_COUNTER() \ 2017117f1b4Smrg do { \ 2027117f1b4Smrg int cycle_i; \ 2037117f1b4Smrg counter_overhead = LONG_MAX; \ 2047117f1b4Smrg for ( cycle_i = 0 ; cycle_i < 16 ; cycle_i++ ) { \ 2057117f1b4Smrg unsigned long cycle_tmp1, cycle_tmp2; \ 2067117f1b4Smrg rdtscll(cycle_tmp1); \ 2077117f1b4Smrg rdtscll(cycle_tmp2); \ 2087117f1b4Smrg if ( counter_overhead > (cycle_tmp2 - cycle_tmp1) ) { \ 2097117f1b4Smrg counter_overhead = cycle_tmp2 - cycle_tmp1; \ 2107117f1b4Smrg } \ 2117117f1b4Smrg } \ 2127117f1b4Smrg } while (0) 2137117f1b4Smrg 2147117f1b4Smrg 2157117f1b4Smrg#define BEGIN_RACE(x) \ 2167117f1b4Smrg x = LONG_MAX; \ 2177117f1b4Smrg for ( cycle_i = 0 ; cycle_i < 10 ; cycle_i++ ) { \ 2187117f1b4Smrg unsigned long cycle_tmp1, cycle_tmp2; \ 21901e04c3fSmrg rdtscll(cycle_tmp1); 2207117f1b4Smrg 2217117f1b4Smrg#define END_RACE(x) \ 2227117f1b4Smrg rdtscll(cycle_tmp2); \ 2237117f1b4Smrg if ( x > (cycle_tmp2 - cycle_tmp1) ) { \ 2247117f1b4Smrg x = cycle_tmp2 - cycle_tmp1; \ 2257117f1b4Smrg } \ 2267117f1b4Smrg } \ 2277117f1b4Smrg x -= counter_overhead; 2287117f1b4Smrg 2297117f1b4Smrg#elif defined(__sparc__) 2307117f1b4Smrg 2317117f1b4Smrg#define INIT_COUNTER() \ 2327117f1b4Smrg do { counter_overhead = 5; } while(0) 2337117f1b4Smrg 2347117f1b4Smrg#define BEGIN_RACE(x) \ 2357117f1b4Smrgx = LONG_MAX; \ 2367117f1b4Smrgfor (cycle_i = 0; cycle_i <10; cycle_i++) { \ 2374a49301eSmrg register long cycle_tmp1 __asm__("l0"); \ 2384a49301eSmrg register long cycle_tmp2 __asm__("l1"); \ 2397117f1b4Smrg /* rd %tick, %l0 */ \ 2407117f1b4Smrg __asm__ __volatile__ (".word 0xa1410000" : "=r" (cycle_tmp1)); /* save timestamp */ 2417117f1b4Smrg 2427117f1b4Smrg#define END_RACE(x) \ 2437117f1b4Smrg /* rd %tick, %l1 */ \ 2447117f1b4Smrg __asm__ __volatile__ (".word 0xa3410000" : "=r" (cycle_tmp2)); \ 2457117f1b4Smrg if (x > (cycle_tmp2-cycle_tmp1)) x = cycle_tmp2 - cycle_tmp1; \ 2467117f1b4Smrg} \ 2477117f1b4Smrgx -= counter_overhead; 2487117f1b4Smrg 2497117f1b4Smrg#else 2507117f1b4Smrg#error Your processor is not supported for RUN_XFORM_BENCHMARK 2517117f1b4Smrg#endif 2527117f1b4Smrg 2537117f1b4Smrg#else 2547117f1b4Smrg 2557117f1b4Smrg#define BEGIN_RACE(x) 2567117f1b4Smrg#define END_RACE(x) 2577117f1b4Smrg 2587117f1b4Smrg#endif 2597117f1b4Smrg 2607117f1b4Smrg 2617117f1b4Smrg/* ============================================================= 2627117f1b4Smrg * Helper functions 2637117f1b4Smrg */ 2647117f1b4Smrg 2657117f1b4Smrgstatic GLfloat rnd( void ) 2667117f1b4Smrg{ 2677117f1b4Smrg GLfloat f = (GLfloat)rand() / (GLfloat)RAND_MAX; 2687117f1b4Smrg GLfloat gran = (GLfloat)(1 << 13); 2697117f1b4Smrg 2707117f1b4Smrg f = (GLfloat)(GLint)(f * gran) / gran; 2717117f1b4Smrg 2727117f1b4Smrg return f * 2.0 - 1.0; 2737117f1b4Smrg} 2747117f1b4Smrg 2757117f1b4Smrgstatic int significand_match( GLfloat a, GLfloat b ) 2767117f1b4Smrg{ 2777117f1b4Smrg GLfloat d = a - b; 2787117f1b4Smrg int a_ex, b_ex, d_ex; 2797117f1b4Smrg 2807117f1b4Smrg if ( d == 0.0F ) { 2817117f1b4Smrg return MAX_PRECISION; /* Exact match */ 2827117f1b4Smrg } 2837117f1b4Smrg 2847117f1b4Smrg if ( a == 0.0F || b == 0.0F ) { 2857117f1b4Smrg /* It would probably be better to check if the 2867117f1b4Smrg * non-zero number is denormalized and return 2877117f1b4Smrg * the index of the highest set bit here. 2887117f1b4Smrg */ 2897117f1b4Smrg return 0; 2907117f1b4Smrg } 2917117f1b4Smrg 29201e04c3fSmrg frexpf( a, &a_ex ); 29301e04c3fSmrg frexpf( b, &b_ex ); 29401e04c3fSmrg frexpf( d, &d_ex ); 2957117f1b4Smrg 2967117f1b4Smrg if ( a_ex < b_ex ) { 2977117f1b4Smrg return a_ex - d_ex; 2987117f1b4Smrg } else { 2997117f1b4Smrg return b_ex - d_ex; 3007117f1b4Smrg } 3017117f1b4Smrg} 3027117f1b4Smrg 3037117f1b4Smrgenum { NIL = 0, ONE = 1, NEG = -1, VAR = 2 }; 3047117f1b4Smrg 3057117f1b4Smrg#endif /* DEBUG_MATH */ 3067117f1b4Smrg 3077117f1b4Smrg#endif /* __M_DEBUG_UTIL_H__ */ 308