Home | History | Annotate | Line # | Download | only in programs
      1 /*
      2  * Copyright (c) Meta Platforms, Inc. and affiliates.
      3  * All rights reserved.
      4  *
      5  * This source code is licensed under both the BSD-style license (found in the
      6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
      7  * in the COPYING file in the root directory of this source tree).
      8  * You may select, at your option, one of the above-listed licenses.
      9  */
     10 
     11 
     12 
     13 /* *************************************
     14 *  Includes
     15 ***************************************/
     16 #include <stdlib.h>      /* malloc, free */
     17 #include <string.h>      /* memset */
     18 #include <assert.h>      /* assert */
     19 
     20 #include "timefn.h"        /* UTIL_time_t, UTIL_getTime */
     21 #include "benchfn.h"
     22 
     23 
     24 /* *************************************
     25 *  Constants
     26 ***************************************/
     27 #define TIMELOOP_MICROSEC     SEC_TO_MICRO      /* 1 second */
     28 #define TIMELOOP_NANOSEC      (1*1000000000ULL) /* 1 second */
     29 
     30 #define KB *(1 <<10)
     31 #define MB *(1 <<20)
     32 #define GB *(1U<<30)
     33 
     34 
     35 /* *************************************
     36 *  Debug errors
     37 ***************************************/
     38 #if defined(DEBUG) && (DEBUG >= 1)
     39 #  include <stdio.h>       /* fprintf */
     40 #  define DISPLAY(...)       fprintf(stderr, __VA_ARGS__)
     41 #  define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
     42 #else
     43 #  define DEBUGOUTPUT(...)
     44 #endif
     45 
     46 
     47 /* error without displaying */
     48 #define RETURN_QUIET_ERROR(retValue, ...) {           \
     49     DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__);    \
     50     DEBUGOUTPUT("Error : ");                          \
     51     DEBUGOUTPUT(__VA_ARGS__);                         \
     52     DEBUGOUTPUT(" \n");                               \
     53     return retValue;                                  \
     54 }
     55 
     56 /* Abort execution if a condition is not met */
     57 #define CONTROL(c) { if (!(c)) { DEBUGOUTPUT("error: %s \n", #c); abort(); } }
     58 
     59 
     60 /* *************************************
     61 *  Benchmarking an arbitrary function
     62 ***************************************/
     63 
     64 int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)
     65 {
     66     return outcome.error_tag_never_ever_use_directly == 0;
     67 }
     68 
     69 /* warning : this function will stop program execution if outcome is invalid !
     70  *           check outcome validity first, using BMK_isValid_runResult() */
     71 BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
     72 {
     73     CONTROL(outcome.error_tag_never_ever_use_directly == 0);
     74     return outcome.internal_never_ever_use_directly;
     75 }
     76 
     77 size_t BMK_extract_errorResult(BMK_runOutcome_t outcome)
     78 {
     79     CONTROL(outcome.error_tag_never_ever_use_directly != 0);
     80     return outcome.error_result_never_ever_use_directly;
     81 }
     82 
     83 static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult)
     84 {
     85     BMK_runOutcome_t b;
     86     memset(&b, 0, sizeof(b));
     87     b.error_tag_never_ever_use_directly = 1;
     88     b.error_result_never_ever_use_directly = errorResult;
     89     return b;
     90 }
     91 
     92 static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime)
     93 {
     94     BMK_runOutcome_t outcome;
     95     outcome.error_tag_never_ever_use_directly = 0;
     96     outcome.internal_never_ever_use_directly = runTime;
     97     return outcome;
     98 }
     99 
    100 
    101 /* initFn will be measured once, benchFn will be measured `nbLoops` times */
    102 /* initFn is optional, provide NULL if none */
    103 /* benchFn must return a size_t value that errorFn can interpret */
    104 /* takes # of blocks and list of size & stuff for each. */
    105 /* can report result of benchFn for each block into blockResult. */
    106 /* blockResult is optional, provide NULL if this information is not required */
    107 /* note : time per loop can be reported as zero if run time < timer resolution */
    108 BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
    109                                    unsigned nbLoops)
    110 {
    111     nbLoops += !nbLoops;   /* minimum nbLoops is 1 */
    112 
    113     /* init */
    114     {   size_t i;
    115         for(i = 0; i < p.blockCount; i++) {
    116             memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]);  /* warm up and erase result buffer */
    117     }   }
    118 
    119     /* benchmark */
    120     {   size_t dstSize = 0;
    121         UTIL_time_t const clockStart = UTIL_getTime();
    122         unsigned loopNb, blockNb;
    123         if (p.initFn != NULL) p.initFn(p.initPayload);
    124         for (loopNb = 0; loopNb < nbLoops; loopNb++) {
    125             for (blockNb = 0; blockNb < p.blockCount; blockNb++) {
    126                 size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb],
    127                                    p.dstBuffers[blockNb], p.dstCapacities[blockNb],
    128                                    p.benchPayload);
    129                 if (loopNb == 0) {
    130                     if (p.blockResults != NULL) p.blockResults[blockNb] = res;
    131                     if ((p.errorFn != NULL) && (p.errorFn(res))) {
    132                         RETURN_QUIET_ERROR(BMK_runOutcome_error(res),
    133                             "Function benchmark failed on block %u (of size %u) with error %i",
    134                             blockNb, (unsigned)p.srcSizes[blockNb], (int)res);
    135                     }
    136                     dstSize += res;
    137             }   }
    138         }  /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */
    139 
    140         {   PTime const totalTime = UTIL_clockSpanNano(clockStart);
    141             BMK_runTime_t rt;
    142             rt.nanoSecPerRun = (double)totalTime / nbLoops;
    143             rt.sumOfReturn = dstSize;
    144             return BMK_setValid_runTime(rt);
    145     }   }
    146 }
    147 
    148 
    149 /* ====  Benchmarking any function, providing intermediate results  ==== */
    150 
    151 struct BMK_timedFnState_s {
    152     PTime timeSpent_ns;
    153     PTime timeBudget_ns;
    154     PTime runBudget_ns;
    155     BMK_runTime_t fastestRun;
    156     unsigned nbLoops;
    157     UTIL_time_t coolTime;
    158 };  /* typedef'd to BMK_timedFnState_t within bench.h */
    159 
    160 BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
    161 {
    162     BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r));
    163     if (r == NULL) return NULL;   /* malloc() error */
    164     BMK_resetTimedFnState(r, total_ms, run_ms);
    165     return r;
    166 }
    167 
    168 void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); }
    169 
    170 BMK_timedFnState_t*
    171 BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms)
    172 {
    173     typedef char check_size[ 2 * (sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s)) - 1];  /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */
    174     typedef struct { check_size c; BMK_timedFnState_t tfs; } tfs_align;  /* force tfs to be aligned at its next best position */
    175     size_t const tfs_alignment = offsetof(tfs_align, tfs); /* provides the minimal alignment restriction for BMK_timedFnState_t */
    176     BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer;
    177     if (buffer == NULL) return NULL;
    178     if (size < sizeof(struct BMK_timedFnState_s)) return NULL;
    179     if ((size_t)buffer % tfs_alignment) return NULL;  /* buffer must be properly aligned */
    180     BMK_resetTimedFnState(r, total_ms, run_ms);
    181     return r;
    182 }
    183 
    184 void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
    185 {
    186     if (!total_ms) total_ms = 1 ;
    187     if (!run_ms) run_ms = 1;
    188     if (run_ms > total_ms) run_ms = total_ms;
    189     timedFnState->timeSpent_ns = 0;
    190     timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000;
    191     timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000;
    192     timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000;  /* hopefully large enough : must be larger than any potential measurement */
    193     timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
    194     timedFnState->nbLoops = 1;
    195     timedFnState->coolTime = UTIL_getTime();
    196 }
    197 
    198 /* Tells if nb of seconds set in timedFnState for all runs is spent.
    199  * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */
    200 int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)
    201 {
    202     return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns);
    203 }
    204 
    205 
    206 #undef MIN
    207 #define MIN(a,b)   ( (a) < (b) ? (a) : (b) )
    208 
    209 #define MINUSABLETIME  (TIMELOOP_NANOSEC / 2)  /* 0.5 seconds */
    210 
    211 BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont,
    212                                   BMK_benchParams_t p)
    213 {
    214     PTime const runBudget_ns = cont->runBudget_ns;
    215     PTime const runTimeMin_ns = runBudget_ns / 2;
    216     int completed = 0;
    217     BMK_runTime_t bestRunTime = cont->fastestRun;
    218 
    219     while (!completed) {
    220         BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops);
    221 
    222         if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */
    223             return runResult;
    224         }
    225 
    226         {   BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);
    227             double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;
    228 
    229             cont->timeSpent_ns += (unsigned long long)loopDuration_ns;
    230 
    231             /* estimate nbLoops for next run to last approximately 1 second */
    232             if (loopDuration_ns > ((double)runBudget_ns / 50)) {
    233                 double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
    234                 cont->nbLoops = (unsigned)((double)runBudget_ns / fastestRun_ns) + 1;
    235             } else {
    236                 /* previous run was too short : blindly increase workload by x multiplier */
    237                 const unsigned multiplier = 10;
    238                 assert(cont->nbLoops < ((unsigned)-1) / multiplier);  /* avoid overflow */
    239                 cont->nbLoops *= multiplier;
    240             }
    241 
    242             if(loopDuration_ns < (double)runTimeMin_ns) {
    243                 /* don't report results for which benchmark run time was too small : increased risks of rounding errors */
    244                 assert(completed == 0);
    245                 continue;
    246             } else {
    247                 if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) {
    248                     bestRunTime = newRunTime;
    249                 }
    250                 completed = 1;
    251             }
    252         }
    253     }   /* while (!completed) */
    254 
    255     return BMK_setValid_runTime(bestRunTime);
    256 }
    257