Home | History | Annotate | Line # | Download | only in pa
      1  1.1  christos #include "test/jemalloc_test.h"
      2  1.1  christos 
      3  1.1  christos /* Additional includes for PA functionality */
      4  1.1  christos #include "jemalloc/internal/pa.h"
      5  1.1  christos #include "jemalloc/internal/tsd.h"
      6  1.1  christos #include "jemalloc/internal/sz.h"
      7  1.1  christos #include "jemalloc/internal/base.h"
      8  1.1  christos #include "jemalloc/internal/ehooks.h"
      9  1.1  christos #include "jemalloc/internal/nstime.h"
     10  1.1  christos #include "jemalloc/internal/hpa.h"
     11  1.1  christos #include "jemalloc/internal/sec.h"
     12  1.1  christos #include "jemalloc/internal/emap.h"
     13  1.1  christos #include "jemalloc/internal/psset.h"
     14  1.1  christos 
     15  1.1  christos /*
     16  1.1  christos  * PA Microbenchmark (Simplified Version)
     17  1.1  christos  *
     18  1.1  christos  * This tool reads allocation traces and simulates PA behavior
     19  1.1  christos  * for testing and understanding the allocation patterns.
     20  1.1  christos  *
     21  1.1  christos  * Features:
     22  1.1  christos  * 1. Reads CSV input file with format: shard_ind,operation,size_or_alloc_index,is_frequent
     23  1.1  christos  * 2. Simulates allocations/deallocations tracking
     24  1.1  christos  * 3. Provides basic statistics analysis
     25  1.1  christos  * 4. Validates the framework setup
     26  1.1  christos  */
     27  1.1  christos 
     28  1.1  christos #define MAX_LINE_LENGTH 1024
     29  1.1  christos #define MAX_ALLOCATIONS 10000000
     30  1.1  christos #define MAX_ARENAS 128
     31  1.1  christos 
     32  1.1  christos typedef enum { PA_ALLOC = 0, PA_DALLOC = 1 } pa_op_t;
     33  1.1  christos 
     34  1.1  christos typedef struct {
     35  1.1  christos 	int      shard_ind;
     36  1.1  christos 	pa_op_t  operation;
     37  1.1  christos 	size_t   size_or_alloc_index;
     38  1.1  christos 	uint64_t nsecs;
     39  1.1  christos 	int      is_frequent;
     40  1.1  christos } pa_event_t;
     41  1.1  christos 
     42  1.1  christos typedef struct {
     43  1.1  christos 	edata_t *edata;
     44  1.1  christos 	size_t   size;
     45  1.1  christos 	int      shard_ind;
     46  1.1  christos 	bool     active;
     47  1.1  christos } allocation_record_t;
     48  1.1  christos 
     49  1.1  christos /* Structure to group per-shard tracking statistics */
     50  1.1  christos typedef struct {
     51  1.1  christos 	uint64_t alloc_count;     /* Number of allocations */
     52  1.1  christos 	uint64_t dealloc_count;   /* Number of deallocations */
     53  1.1  christos 	uint64_t bytes_allocated; /* Current bytes allocated */
     54  1.1  christos } shard_stats_t;
     55  1.1  christos 
     56  1.1  christos /* Structure to group per-shard PA infrastructure */
     57  1.1  christos typedef struct {
     58  1.1  christos 	base_t          *base;        /* Base allocator */
     59  1.1  christos 	emap_t           emap;        /* Extent map */
     60  1.1  christos 	pa_shard_t       pa_shard;    /* PA shard */
     61  1.1  christos 	pa_shard_stats_t shard_stats; /* PA shard statistics */
     62  1.1  christos 	malloc_mutex_t   stats_mtx;   /* Statistics mutex */
     63  1.1  christos } shard_infrastructure_t;
     64  1.1  christos 
     65  1.1  christos static FILE                *g_stats_output = NULL; /* Output file for stats */
     66  1.1  christos static size_t               g_alloc_counter = 0; /* Global allocation counter */
     67  1.1  christos static allocation_record_t *g_alloc_records =
     68  1.1  christos     NULL;                     /* Global allocation tracking */
     69  1.1  christos static bool g_use_sec = true; /* Global flag for SEC vs HPA-only */
     70  1.1  christos 
     71  1.1  christos /* Refactored arrays using structures */
     72  1.1  christos static shard_stats_t *g_shard_stats = NULL; /* Per-shard tracking statistics */
     73  1.1  christos static shard_infrastructure_t *g_shard_infra =
     74  1.1  christos     NULL;                         /* Per-shard PA infrastructure */
     75  1.1  christos static pa_central_t g_pa_central; /* Global PA central */
     76  1.1  christos 
     77  1.1  christos /* Override for curtime */
     78  1.1  christos static hpa_hooks_t hpa_hooks_override;
     79  1.1  christos static nstime_t    cur_time_clock;
     80  1.1  christos 
     81  1.1  christos void
     82  1.1  christos curtime(nstime_t *r_time, bool first_reading) {
     83  1.1  christos 	if (first_reading) {
     84  1.1  christos 		nstime_init_zero(r_time);
     85  1.1  christos 	}
     86  1.1  christos 	*r_time = cur_time_clock;
     87  1.1  christos }
     88  1.1  christos 
     89  1.1  christos static void
     90  1.1  christos set_clock(uint64_t nsecs) {
     91  1.1  christos 	nstime_init(&cur_time_clock, nsecs);
     92  1.1  christos }
     93  1.1  christos 
     94  1.1  christos static void
     95  1.1  christos init_hpa_hooks() {
     96  1.1  christos 	hpa_hooks_override = hpa_hooks_default;
     97  1.1  christos 	hpa_hooks_override.curtime = curtime;
     98  1.1  christos }
     99  1.1  christos 
    100  1.1  christos static void cleanup_pa_infrastructure(int num_shards);
    101  1.1  christos 
    102  1.1  christos static bool
    103  1.1  christos initialize_pa_infrastructure(int num_shards) {
    104  1.1  christos 	/*
    105  1.1  christos 	 * Note when we call malloc, it resolves to je_malloc, while internal
    106  1.1  christos 	 * functions like base_new resolve to jet_malloc.  This is because this
    107  1.1  christos 	 * file is compiled with -DJEMALLOC_JET as a test.  This allows us to
    108  1.1  christos 	 * completely isolate the PA infrastructure benchmark from the rest of
    109  1.1  christos 	 * the jemalloc usage.
    110  1.1  christos 	*/
    111  1.1  christos 	void *dummy_jet = jet_malloc(16);
    112  1.1  christos 	if (dummy_jet == NULL) {
    113  1.1  christos 		fprintf(stderr, "Failed to initialize JET jemalloc\n");
    114  1.1  christos 		return 1;
    115  1.1  christos 	}
    116  1.1  christos 
    117  1.1  christos 	/* Force JET system to be fully initialized */
    118  1.1  christos 	if (jet_mallctl("epoch", NULL, NULL, NULL, 0) != 0) {
    119  1.1  christos 		fprintf(stderr, "Failed to initialize JET system fully\n");
    120  1.1  christos 		jet_free(dummy_jet);
    121  1.1  christos 		return 1;
    122  1.1  christos 	}
    123  1.1  christos 	jet_free(dummy_jet);
    124  1.1  christos 
    125  1.1  christos 	/* Allocate shard tracking statistics */
    126  1.1  christos 	g_shard_stats = calloc(num_shards, sizeof(shard_stats_t));
    127  1.1  christos 	if (g_shard_stats == NULL) {
    128  1.1  christos 		printf("DEBUG: Failed to allocate shard stats\n");
    129  1.1  christos 		return true;
    130  1.1  christos 	}
    131  1.1  christos 
    132  1.1  christos 	/* Allocate shard infrastructure */
    133  1.1  christos 	g_shard_infra = calloc(num_shards, sizeof(shard_infrastructure_t));
    134  1.1  christos 	if (g_shard_infra == NULL) {
    135  1.1  christos 		printf("DEBUG: Failed to allocate shard infrastructure\n");
    136  1.1  christos 		free(g_shard_stats);
    137  1.1  christos 		return true;
    138  1.1  christos 	}
    139  1.1  christos 
    140  1.1  christos 	/* Initialize one base allocator for PA central */
    141  1.1  christos 	base_t *central_base = base_new(tsd_tsdn(tsd_fetch()), 0 /* ind */,
    142  1.1  christos 	    (extent_hooks_t *)&ehooks_default_extent_hooks,
    143  1.1  christos 	    /* metadata_use_hooks */ true);
    144  1.1  christos 	if (central_base == NULL) {
    145  1.1  christos 		printf("DEBUG: Failed to create central_base\n");
    146  1.1  christos 		free(g_shard_stats);
    147  1.1  christos 		free(g_shard_infra);
    148  1.1  christos 		return true;
    149  1.1  christos 	}
    150  1.1  christos 
    151  1.1  christos 	/* Initialize PA central with HPA enabled */
    152  1.1  christos 	init_hpa_hooks();
    153  1.1  christos 	if (pa_central_init(&g_pa_central, central_base, true /* hpa */,
    154  1.1  christos 	        &hpa_hooks_override)) {
    155  1.1  christos 		printf("DEBUG: Failed to initialize PA central\n");
    156  1.1  christos 		base_delete(tsd_tsdn(tsd_fetch()), central_base);
    157  1.1  christos 		free(g_shard_stats);
    158  1.1  christos 		free(g_shard_infra);
    159  1.1  christos 		return true;
    160  1.1  christos 	}
    161  1.1  christos 
    162  1.1  christos 	for (int i = 0; i < num_shards; i++) {
    163  1.1  christos 		/* Create a separate base allocator for each shard */
    164  1.1  christos 		g_shard_infra[i].base = base_new(tsd_tsdn(tsd_fetch()),
    165  1.1  christos 		    i /* ind */, (extent_hooks_t *)&ehooks_default_extent_hooks,
    166  1.1  christos 		    /* metadata_use_hooks */ true);
    167  1.1  christos 		if (g_shard_infra[i].base == NULL) {
    168  1.1  christos 			printf("DEBUG: Failed to create base %d\n", i);
    169  1.1  christos 			/* Clean up partially initialized shards */
    170  1.1  christos 			cleanup_pa_infrastructure(num_shards);
    171  1.1  christos 			return true;
    172  1.1  christos 		}
    173  1.1  christos 
    174  1.1  christos 		/* Initialize emap for this shard */
    175  1.1  christos 		if (emap_init(&g_shard_infra[i].emap, g_shard_infra[i].base,
    176  1.1  christos 		        /* zeroed */ false)) {
    177  1.1  christos 			printf("DEBUG: Failed to initialize emap %d\n", i);
    178  1.1  christos 			/* Clean up partially initialized shards */
    179  1.1  christos 			cleanup_pa_infrastructure(num_shards);
    180  1.1  christos 			return true;
    181  1.1  christos 		}
    182  1.1  christos 
    183  1.1  christos 		/* Initialize stats mutex */
    184  1.1  christos 		if (malloc_mutex_init(&g_shard_infra[i].stats_mtx,
    185  1.1  christos 		        "pa_shard_stats", WITNESS_RANK_OMIT,
    186  1.1  christos 		        malloc_mutex_rank_exclusive)) {
    187  1.1  christos 			printf(
    188  1.1  christos 			    "DEBUG: Failed to initialize stats mutex %d\n", i);
    189  1.1  christos 			/* Clean up partially initialized shards */
    190  1.1  christos 			cleanup_pa_infrastructure(num_shards);
    191  1.1  christos 			return true;
    192  1.1  christos 		}
    193  1.1  christos 
    194  1.1  christos 		/* Initialize PA shard */
    195  1.1  christos 		nstime_t cur_time;
    196  1.1  christos 		nstime_init_zero(&cur_time);
    197  1.1  christos 
    198  1.1  christos 		if (pa_shard_init(tsd_tsdn(tsd_fetch()),
    199  1.1  christos 		        &g_shard_infra[i].pa_shard, &g_pa_central,
    200  1.1  christos 		        &g_shard_infra[i].emap /* emap */,
    201  1.1  christos 		        g_shard_infra[i].base, i /* ind */,
    202  1.1  christos 		        &g_shard_infra[i].shard_stats /* stats */,
    203  1.1  christos 		        &g_shard_infra[i].stats_mtx /* stats_mtx */,
    204  1.1  christos 		        &cur_time /* cur_time */,
    205  1.1  christos 		        SIZE_MAX /* oversize_threshold */,
    206  1.1  christos 		        -1 /* dirty_decay_ms */, -1 /* muzzy_decay_ms */)) {
    207  1.1  christos 			printf("DEBUG: Failed to initialize PA shard %d\n", i);
    208  1.1  christos 			/* Clean up partially initialized shards */
    209  1.1  christos 			cleanup_pa_infrastructure(num_shards);
    210  1.1  christos 			return true;
    211  1.1  christos 		}
    212  1.1  christos 
    213  1.1  christos 		/* Enable HPA for this shard with proper configuration */
    214  1.1  christos 		hpa_shard_opts_t hpa_opts = HPA_SHARD_OPTS_DEFAULT;
    215  1.1  christos 		hpa_opts.deferral_allowed =
    216  1.1  christos 		    false; /* No background threads in microbench */
    217  1.1  christos 
    218  1.1  christos 		sec_opts_t sec_opts = SEC_OPTS_DEFAULT;
    219  1.1  christos 		if (!g_use_sec) {
    220  1.1  christos 			/* Disable SEC by setting nshards to 0 */
    221  1.1  christos 			sec_opts.nshards = 0;
    222  1.1  christos 		}
    223  1.1  christos 
    224  1.1  christos 		if (pa_shard_enable_hpa(tsd_tsdn(tsd_fetch()),
    225  1.1  christos 		        &g_shard_infra[i].pa_shard, &hpa_opts, &sec_opts)) {
    226  1.1  christos 			fprintf(
    227  1.1  christos 			    stderr, "Failed to enable HPA on shard %d\n", i);
    228  1.1  christos 			/* Clean up partially initialized shards */
    229  1.1  christos 			cleanup_pa_infrastructure(num_shards);
    230  1.1  christos 			return true;
    231  1.1  christos 		}
    232  1.1  christos 	}
    233  1.1  christos 
    234  1.1  christos 	printf("PA infrastructure configured: HPA=enabled, SEC=%s\n",
    235  1.1  christos 	    g_use_sec ? "enabled" : "disabled");
    236  1.1  christos 
    237  1.1  christos 	return false;
    238  1.1  christos }
    239  1.1  christos 
    240  1.1  christos static void
    241  1.1  christos cleanup_pa_infrastructure(int num_shards) {
    242  1.1  christos 	if (g_shard_infra != NULL) {
    243  1.1  christos 		for (int i = 0; i < num_shards; i++) {
    244  1.1  christos 			pa_shard_destroy(
    245  1.1  christos 			    tsd_tsdn(tsd_fetch()), &g_shard_infra[i].pa_shard);
    246  1.1  christos 			if (g_shard_infra[i].base != NULL) {
    247  1.1  christos 				base_delete(tsd_tsdn(tsd_fetch()),
    248  1.1  christos 				    g_shard_infra[i].base);
    249  1.1  christos 			}
    250  1.1  christos 		}
    251  1.1  christos 		free(g_shard_infra);
    252  1.1  christos 		g_shard_infra = NULL;
    253  1.1  christos 	}
    254  1.1  christos 
    255  1.1  christos 	if (g_shard_stats != NULL) {
    256  1.1  christos 		free(g_shard_stats);
    257  1.1  christos 		g_shard_stats = NULL;
    258  1.1  christos 	}
    259  1.1  christos }
    260  1.1  christos 
    261  1.1  christos static bool
    262  1.1  christos parse_csv_line(const char *line, pa_event_t *event) {
    263  1.1  christos 	/* Expected format: shard_ind,operation,size_or_alloc_index,is_frequent */
    264  1.1  christos 	int operation;
    265  1.1  christos 	int fields = sscanf(line, "%d,%d,%zu,%lu,%d", &event->shard_ind,
    266  1.1  christos 	    &operation, &event->size_or_alloc_index, &event->nsecs,
    267  1.1  christos 	    &event->is_frequent);
    268  1.1  christos 
    269  1.1  christos 	if (fields < 4) { /* is_frequent is optional */
    270  1.1  christos 		return false;
    271  1.1  christos 	}
    272  1.1  christos 
    273  1.1  christos 	if (fields == 4) {
    274  1.1  christos 		event->is_frequent = 0; /* Default value */
    275  1.1  christos 	}
    276  1.1  christos 
    277  1.1  christos 	if (operation == 0) {
    278  1.1  christos 		event->operation = PA_ALLOC;
    279  1.1  christos 	} else if (operation == 1) {
    280  1.1  christos 		event->operation = PA_DALLOC;
    281  1.1  christos 	} else {
    282  1.1  christos 		return false;
    283  1.1  christos 	}
    284  1.1  christos 
    285  1.1  christos 	return true;
    286  1.1  christos }
    287  1.1  christos 
    288  1.1  christos static size_t
    289  1.1  christos load_trace_file(const char *filename, pa_event_t **events, int *max_shard_id) {
    290  1.1  christos 	FILE *file = fopen(filename, "r");
    291  1.1  christos 	if (!file) {
    292  1.1  christos 		fprintf(stderr, "Failed to open trace file: %s\n", filename);
    293  1.1  christos 		return 0;
    294  1.1  christos 	}
    295  1.1  christos 
    296  1.1  christos 	*events = malloc(MAX_ALLOCATIONS * sizeof(pa_event_t));
    297  1.1  christos 	if (!*events) {
    298  1.1  christos 		fclose(file);
    299  1.1  christos 		return 0;
    300  1.1  christos 	}
    301  1.1  christos 
    302  1.1  christos 	char   line[MAX_LINE_LENGTH];
    303  1.1  christos 	size_t count = 0;
    304  1.1  christos 	*max_shard_id = 0;
    305  1.1  christos 
    306  1.1  christos 	/* Skip header line */
    307  1.1  christos 	if (fgets(line, sizeof(line), file) == NULL) {
    308  1.1  christos 		fclose(file);
    309  1.1  christos 		free(*events);
    310  1.1  christos 		return 0;
    311  1.1  christos 	}
    312  1.1  christos 
    313  1.1  christos 	while (fgets(line, sizeof(line), file) && count < MAX_ALLOCATIONS) {
    314  1.1  christos 		if (parse_csv_line(line, &(*events)[count])) {
    315  1.1  christos 			if ((*events)[count].shard_ind > *max_shard_id) {
    316  1.1  christos 				*max_shard_id = (*events)[count].shard_ind;
    317  1.1  christos 			}
    318  1.1  christos 			count++;
    319  1.1  christos 		}
    320  1.1  christos 	}
    321  1.1  christos 
    322  1.1  christos 	fclose(file);
    323  1.1  christos 	printf("Loaded %zu events from %s\n", count, filename);
    324  1.1  christos 	printf("Maximum shard ID found: %d\n", *max_shard_id);
    325  1.1  christos 	return count;
    326  1.1  christos }
    327  1.1  christos 
    328  1.1  christos static void
    329  1.1  christos collect_hpa_stats(int shard_id, hpa_shard_stats_t *hpa_stats_out) {
    330  1.1  christos 	/* Get tsdn for statistics collection */
    331  1.1  christos 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
    332  1.1  christos 
    333  1.1  christos 	/* Clear the output structure */
    334  1.1  christos 	memset(hpa_stats_out, 0, sizeof(hpa_shard_stats_t));
    335  1.1  christos 
    336  1.1  christos 	/* Check if this shard has HPA enabled */
    337  1.1  christos 	if (!g_shard_infra[shard_id].pa_shard.ever_used_hpa) {
    338  1.1  christos 		return;
    339  1.1  christos 	}
    340  1.1  christos 
    341  1.1  christos 	/* Merge HPA statistics from the shard */
    342  1.1  christos 	hpa_shard_stats_merge(
    343  1.1  christos 	    tsdn, &g_shard_infra[shard_id].pa_shard.hpa_shard, hpa_stats_out);
    344  1.1  christos }
    345  1.1  christos 
    346  1.1  christos static void
    347  1.1  christos print_shard_stats(int shard_id, size_t operation_count) {
    348  1.1  christos 	if (!g_stats_output) {
    349  1.1  christos 		return;
    350  1.1  christos 	}
    351  1.1  christos 
    352  1.1  christos 	/* Collect HPA statistics */
    353  1.1  christos 	hpa_shard_stats_t hpa_stats;
    354  1.1  christos 	collect_hpa_stats(shard_id, &hpa_stats);
    355  1.1  christos 	psset_stats_t *psset_stats = &hpa_stats.psset_stats;
    356  1.1  christos 
    357  1.1  christos 	/* Total pageslabs */
    358  1.1  christos 	size_t total_pageslabs = psset_stats->merged.npageslabs;
    359  1.1  christos 
    360  1.1  christos 	/* Full pageslabs breakdown by hugification */
    361  1.1  christos 	size_t full_pageslabs_non_huge =
    362  1.1  christos 	    psset_stats->full_slabs[0].npageslabs; /* [0] = non-hugified */
    363  1.1  christos 	size_t full_pageslabs_huge =
    364  1.1  christos 	    psset_stats->full_slabs[1].npageslabs; /* [1] = hugified */
    365  1.1  christos 	size_t full_pageslabs_total = full_pageslabs_non_huge
    366  1.1  christos 	    + full_pageslabs_huge;
    367  1.1  christos 
    368  1.1  christos 	/* Empty pageslabs breakdown by hugification */
    369  1.1  christos 	size_t empty_pageslabs_non_huge =
    370  1.1  christos 	    psset_stats->empty_slabs[0].npageslabs; /* [0] = non-hugified */
    371  1.1  christos 	size_t empty_pageslabs_huge =
    372  1.1  christos 	    psset_stats->empty_slabs[1].npageslabs; /* [1] = hugified */
    373  1.1  christos 	size_t empty_pageslabs_total = empty_pageslabs_non_huge
    374  1.1  christos 	    + empty_pageslabs_huge;
    375  1.1  christos 
    376  1.1  christos 	/* Hugified pageslabs (full + empty + partial) */
    377  1.1  christos 	size_t hugified_pageslabs = full_pageslabs_huge + empty_pageslabs_huge;
    378  1.1  christos 	/* Add hugified partial slabs */
    379  1.1  christos 	for (int i = 0; i < PSSET_NPSIZES; i++) {
    380  1.1  christos 		hugified_pageslabs +=
    381  1.1  christos 		    psset_stats->nonfull_slabs[i][1].npageslabs;
    382  1.1  christos 	}
    383  1.1  christos 
    384  1.1  christos 	/* Dirty bytes */
    385  1.1  christos 	size_t   dirty_bytes = psset_stats->merged.ndirty * PAGE;
    386  1.1  christos 	uint64_t npurge_passes = hpa_stats.nonderived_stats.npurge_passes;
    387  1.1  christos 	uint64_t npurges = hpa_stats.nonderived_stats.npurges;
    388  1.1  christos 
    389  1.1  christos 	assert(g_use_sec
    390  1.1  christos 	    || psset_stats->merged.nactive * PAGE
    391  1.1  christos 	        == g_shard_stats[shard_id].bytes_allocated);
    392  1.1  christos 	/* Output enhanced stats with detailed breakdown */
    393  1.1  christos 	fprintf(g_stats_output,
    394  1.1  christos 	    "%zu,%d,%lu,%lu,%lu,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%lu,%lu,%lu"
    395  1.1  christos 	    ",%lu,%lu\n",
    396  1.1  christos 	    operation_count, shard_id, g_shard_stats[shard_id].alloc_count,
    397  1.1  christos 	    g_shard_stats[shard_id].dealloc_count,
    398  1.1  christos 	    g_shard_stats[shard_id].bytes_allocated, total_pageslabs,
    399  1.1  christos 	    full_pageslabs_total, empty_pageslabs_total, hugified_pageslabs,
    400  1.1  christos 	    full_pageslabs_non_huge, full_pageslabs_huge,
    401  1.1  christos 	    empty_pageslabs_non_huge, empty_pageslabs_huge, dirty_bytes,
    402  1.1  christos 	    hpa_stats.nonderived_stats.nhugifies,
    403  1.1  christos 	    hpa_stats.nonderived_stats.nhugify_failures,
    404  1.1  christos 	    hpa_stats.nonderived_stats.ndehugifies, npurge_passes, npurges);
    405  1.1  christos 	fflush(g_stats_output);
    406  1.1  christos }
    407  1.1  christos 
    408  1.1  christos static void
    409  1.1  christos simulate_trace(
    410  1.1  christos     int num_shards, pa_event_t *events, size_t count, size_t stats_interval) {
    411  1.1  christos 	uint64_t total_allocs = 0, total_deallocs = 0;
    412  1.1  christos 	uint64_t total_allocated_bytes = 0;
    413  1.1  christos 
    414  1.1  christos 	printf("Starting simulation with %zu events across %d shards...\n",
    415  1.1  christos 	    count, num_shards);
    416  1.1  christos 
    417  1.1  christos 	for (size_t i = 0; i < count; i++) {
    418  1.1  christos 		pa_event_t *event = &events[i];
    419  1.1  christos 
    420  1.1  christos 		/* Validate shard index */
    421  1.1  christos 		if (event->shard_ind >= num_shards) {
    422  1.1  christos 			fprintf(stderr,
    423  1.1  christos 			    "Warning: Invalid shard index %d (max %d)\n",
    424  1.1  christos 			    event->shard_ind, num_shards - 1);
    425  1.1  christos 			continue;
    426  1.1  christos 		}
    427  1.1  christos 
    428  1.1  christos 		set_clock(event->nsecs);
    429  1.1  christos 		switch (event->operation) {
    430  1.1  christos 		case PA_ALLOC: {
    431  1.1  christos 			size_t size = event->size_or_alloc_index;
    432  1.1  christos 
    433  1.1  christos 			/* Get tsdn and calculate parameters for PA */
    434  1.1  christos 			tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
    435  1.1  christos 			szind_t szind = sz_size2index(size);
    436  1.1  christos 			bool    slab =
    437  1.1  christos 			    event
    438  1.1  christos 			        ->is_frequent; /* Use frequent_reuse for slab */
    439  1.1  christos 			bool deferred_work_generated = false;
    440  1.1  christos 
    441  1.1  christos 			/* Allocate using PA allocator */
    442  1.1  christos 			edata_t *edata = pa_alloc(tsdn,
    443  1.1  christos 			    &g_shard_infra[event->shard_ind].pa_shard, size,
    444  1.1  christos 			    PAGE /* alignment */, slab, szind, false /* zero */,
    445  1.1  christos 			    false /* guarded */, &deferred_work_generated);
    446  1.1  christos 
    447  1.1  christos 			if (edata != NULL) {
    448  1.1  christos 				/* Store allocation record */
    449  1.1  christos 				g_alloc_records[g_alloc_counter].edata = edata;
    450  1.1  christos 				g_alloc_records[g_alloc_counter].size = size;
    451  1.1  christos 				g_alloc_records[g_alloc_counter].shard_ind =
    452  1.1  christos 				    event->shard_ind;
    453  1.1  christos 				g_alloc_records[g_alloc_counter].active = true;
    454  1.1  christos 				g_alloc_counter++;
    455  1.1  christos 
    456  1.1  christos 				/* Update shard-specific stats */
    457  1.1  christos 				g_shard_stats[event->shard_ind].alloc_count++;
    458  1.1  christos 				g_shard_stats[event->shard_ind]
    459  1.1  christos 				    .bytes_allocated += size;
    460  1.1  christos 
    461  1.1  christos 				total_allocs++;
    462  1.1  christos 				total_allocated_bytes += size;
    463  1.1  christos 			}
    464  1.1  christos 			break;
    465  1.1  christos 		}
    466  1.1  christos 		case PA_DALLOC: {
    467  1.1  christos 			size_t alloc_index = event->size_or_alloc_index;
    468  1.1  christos 			if (alloc_index < g_alloc_counter
    469  1.1  christos 			    && g_alloc_records[alloc_index].active
    470  1.1  christos 			    && g_alloc_records[alloc_index].shard_ind
    471  1.1  christos 			        == event->shard_ind) {
    472  1.1  christos 				/* Get tsdn for PA */
    473  1.1  christos 				tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
    474  1.1  christos 				bool    deferred_work_generated = false;
    475  1.1  christos 
    476  1.1  christos 				/* Deallocate using PA allocator */
    477  1.1  christos 				pa_dalloc(tsdn,
    478  1.1  christos 				    &g_shard_infra[event->shard_ind].pa_shard,
    479  1.1  christos 				    g_alloc_records[alloc_index].edata,
    480  1.1  christos 				    &deferred_work_generated);
    481  1.1  christos 
    482  1.1  christos 				/* Update shard-specific stats */
    483  1.1  christos 				g_shard_stats[event->shard_ind].dealloc_count++;
    484  1.1  christos 				g_shard_stats[event->shard_ind]
    485  1.1  christos 				    .bytes_allocated -=
    486  1.1  christos 				    g_alloc_records[alloc_index].size;
    487  1.1  christos 
    488  1.1  christos 				g_alloc_records[alloc_index].active = false;
    489  1.1  christos 				total_deallocs++;
    490  1.1  christos 			}
    491  1.1  christos 			break;
    492  1.1  christos 		}
    493  1.1  christos 		}
    494  1.1  christos 
    495  1.1  christos 		/* Periodic stats output and progress reporting */
    496  1.1  christos 		if (stats_interval > 0 && (i + 1) % stats_interval == 0) {
    497  1.1  christos 			/* Print stats for all shards */
    498  1.1  christos 			for (int j = 0; j < num_shards; j++) {
    499  1.1  christos 				print_shard_stats(j, i + 1);
    500  1.1  christos 			}
    501  1.1  christos 		}
    502  1.1  christos 	}
    503  1.1  christos 
    504  1.1  christos 	printf("\nSimulation completed:\n");
    505  1.1  christos 	printf("  Total allocations: %lu\n", total_allocs);
    506  1.1  christos 	printf("  Total deallocations: %lu\n", total_deallocs);
    507  1.1  christos 	printf("  Total allocated: %lu bytes\n", total_allocated_bytes);
    508  1.1  christos 	printf("  Active allocations: %lu\n", g_alloc_counter - total_deallocs);
    509  1.1  christos 
    510  1.1  christos 	/* Print final stats for all shards */
    511  1.1  christos 	printf("\nFinal shard statistics:\n");
    512  1.1  christos 	for (int i = 0; i < num_shards; i++) {
    513  1.1  christos 		printf(
    514  1.1  christos 		    "  Shard %d: Allocs=%lu, Deallocs=%lu, Active Bytes=%lu\n",
    515  1.1  christos 		    i, g_shard_stats[i].alloc_count,
    516  1.1  christos 		    g_shard_stats[i].dealloc_count,
    517  1.1  christos 		    g_shard_stats[i].bytes_allocated);
    518  1.1  christos 
    519  1.1  christos 		/* Final stats to file */
    520  1.1  christos 		print_shard_stats(i, count);
    521  1.1  christos 	}
    522  1.1  christos }
    523  1.1  christos 
    524  1.1  christos static void
    525  1.1  christos cleanup_remaining_allocations(int num_shards) {
    526  1.1  christos 	size_t cleaned_up = 0;
    527  1.1  christos 
    528  1.1  christos 	printf("Cleaning up remaining allocations...\n");
    529  1.1  christos 
    530  1.1  christos 	for (size_t i = 0; i < g_alloc_counter; i++) {
    531  1.1  christos 		if (g_alloc_records[i].active) {
    532  1.1  christos 			int shard_ind = g_alloc_records[i].shard_ind;
    533  1.1  christos 			if (shard_ind < num_shards) {
    534  1.1  christos 				tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
    535  1.1  christos 				bool    deferred_work_generated = false;
    536  1.1  christos 
    537  1.1  christos 				pa_dalloc(tsdn,
    538  1.1  christos 				    &g_shard_infra[shard_ind].pa_shard,
    539  1.1  christos 				    g_alloc_records[i].edata,
    540  1.1  christos 				    &deferred_work_generated);
    541  1.1  christos 
    542  1.1  christos 				g_alloc_records[i].active = false;
    543  1.1  christos 				cleaned_up++;
    544  1.1  christos 			}
    545  1.1  christos 		}
    546  1.1  christos 	}
    547  1.1  christos 
    548  1.1  christos 	printf("Cleaned up %zu remaining allocations\n", cleaned_up);
    549  1.1  christos }
    550  1.1  christos 
    551  1.1  christos static void
    552  1.1  christos print_usage(const char *program) {
    553  1.1  christos 	printf("Usage: %s [options] <trace_file.csv>\n", program);
    554  1.1  christos 	printf("Options:\n");
    555  1.1  christos 	printf("  -h, --help           Show this help message\n");
    556  1.1  christos 	printf(
    557  1.1  christos 	    "  -o, --output FILE    Output file for statistics (default: stdout)\n");
    558  1.1  christos 	printf("  -s, --sec            Use SEC (default)\n");
    559  1.1  christos 	printf("  -p, --hpa-only       Use HPA only (no SEC)\n");
    560  1.1  christos 	printf(
    561  1.1  christos 	    "  -i, --interval N     Stats print interval (default: 100000, 0=disable)\n");
    562  1.1  christos 	printf(
    563  1.1  christos 	    "\nTrace file format: shard_ind,operation,size_or_alloc_index,is_frequent\n");
    564  1.1  christos 	printf("  - operation: 0=alloc, 1=dealloc\n");
    565  1.1  christos 	printf("  - is_frequent: optional column\n");
    566  1.1  christos }
    567  1.1  christos 
    568  1.1  christos int
    569  1.1  christos main(int argc, char *argv[]) {
    570  1.1  christos 	const char *trace_file = NULL;
    571  1.1  christos 	const char *stats_output_file = NULL;
    572  1.1  christos 	size_t      stats_interval = 100000; /* Default stats print interval */
    573  1.1  christos 	/* Parse command line arguments */
    574  1.1  christos 	for (int i = 1; i < argc; i++) {
    575  1.1  christos 		if (strcmp(argv[i], "-h") == 0
    576  1.1  christos 		    || strcmp(argv[i], "--help") == 0) {
    577  1.1  christos 			print_usage(argv[0]);
    578  1.1  christos 			return 0;
    579  1.1  christos 		} else if (strcmp(argv[i], "-o") == 0
    580  1.1  christos 		    || strcmp(argv[i], "--output") == 0) {
    581  1.1  christos 			if (i + 1 >= argc) {
    582  1.1  christos 				fprintf(stderr,
    583  1.1  christos 				    "Error: %s requires an argument\n",
    584  1.1  christos 				    argv[i]);
    585  1.1  christos 				return 1;
    586  1.1  christos 			}
    587  1.1  christos 			stats_output_file = argv[++i];
    588  1.1  christos 		} else if (strcmp(argv[i], "-s") == 0
    589  1.1  christos 		    || strcmp(argv[i], "--sec") == 0) {
    590  1.1  christos 			g_use_sec = true;
    591  1.1  christos 		} else if (strcmp(argv[i], "-p") == 0
    592  1.1  christos 		    || strcmp(argv[i], "--hpa-only") == 0) {
    593  1.1  christos 			g_use_sec = false;
    594  1.1  christos 		} else if (strcmp(argv[i], "-i") == 0
    595  1.1  christos 		    || strcmp(argv[i], "--interval") == 0) {
    596  1.1  christos 			if (i + 1 >= argc) {
    597  1.1  christos 				fprintf(stderr,
    598  1.1  christos 				    "Error: %s requires an argument\n",
    599  1.1  christos 				    argv[i]);
    600  1.1  christos 				return 1;
    601  1.1  christos 			}
    602  1.1  christos 			stats_interval = (size_t)atol(argv[++i]);
    603  1.1  christos 		} else if (argv[i][0] != '-') {
    604  1.1  christos 			trace_file = argv[i];
    605  1.1  christos 		} else {
    606  1.1  christos 			fprintf(stderr, "Unknown option: %s\n", argv[i]);
    607  1.1  christos 			print_usage(argv[0]);
    608  1.1  christos 			return 1;
    609  1.1  christos 		}
    610  1.1  christos 	}
    611  1.1  christos 
    612  1.1  christos 	if (!trace_file) {
    613  1.1  christos 		fprintf(stderr, "Error: No trace file specified\n");
    614  1.1  christos 		print_usage(argv[0]);
    615  1.1  christos 		return 1;
    616  1.1  christos 	}
    617  1.1  christos 
    618  1.1  christos 	printf("Trace file: %s\n", trace_file);
    619  1.1  christos 	printf("Mode: %s\n", g_use_sec ? "PA with SEC" : "HPA only");
    620  1.1  christos 
    621  1.1  christos 	/* Open stats output file */
    622  1.1  christos 	if (stats_output_file) {
    623  1.1  christos 		g_stats_output = fopen(stats_output_file, "w");
    624  1.1  christos 		if (!g_stats_output) {
    625  1.1  christos 			fprintf(stderr,
    626  1.1  christos 			    "Failed to open stats output file: %s\n",
    627  1.1  christos 			    stats_output_file);
    628  1.1  christos 			return 1;
    629  1.1  christos 		}
    630  1.1  christos 		printf("Stats output: %s\n", stats_output_file);
    631  1.1  christos 
    632  1.1  christos 		/* Write CSV header */
    633  1.1  christos 		fprintf(g_stats_output,
    634  1.1  christos 		    "operation_count,shard_id,alloc_count,dealloc_count,active_bytes,"
    635  1.1  christos 		    "total_pageslabs,full_pageslabs_total,empty_pageslabs_total,hugified_pageslabs,"
    636  1.1  christos 		    "full_pageslabs_non_huge,full_pageslabs_huge,"
    637  1.1  christos 		    "empty_pageslabs_non_huge,empty_pageslabs_huge,"
    638  1.1  christos 		    "dirty_bytes,nhugifies,nhugify_failures,ndehugifies,"
    639  1.1  christos 		    "npurge_passes,npurges\n");
    640  1.1  christos 	}
    641  1.1  christos 
    642  1.1  christos 	/* Load trace data and determine max number of arenas */
    643  1.1  christos 	pa_event_t *events;
    644  1.1  christos 	int         max_shard_id;
    645  1.1  christos 	size_t      event_count = load_trace_file(
    646  1.1  christos             trace_file, &events, &max_shard_id);
    647  1.1  christos 	if (event_count == 0) {
    648  1.1  christos 		if (g_stats_output)
    649  1.1  christos 			fclose(g_stats_output);
    650  1.1  christos 		return 1;
    651  1.1  christos 	}
    652  1.1  christos 
    653  1.1  christos 	int num_shards = max_shard_id + 1; /* shard IDs are 0-based */
    654  1.1  christos 	if (num_shards > MAX_ARENAS) {
    655  1.1  christos 		fprintf(stderr, "Error: Too many arenas required (%d > %d)\n",
    656  1.1  christos 		    num_shards, MAX_ARENAS);
    657  1.1  christos 		free(events);
    658  1.1  christos 		if (g_stats_output)
    659  1.1  christos 			fclose(g_stats_output);
    660  1.1  christos 		return 1;
    661  1.1  christos 	}
    662  1.1  christos 
    663  1.1  christos 	/* Allocate allocation tracking array */
    664  1.1  christos 	g_alloc_records = malloc(event_count * sizeof(allocation_record_t));
    665  1.1  christos 
    666  1.1  christos 	if (!g_alloc_records) {
    667  1.1  christos 		fprintf(
    668  1.1  christos 		    stderr, "Failed to allocate allocation tracking array\n");
    669  1.1  christos 		free(events);
    670  1.1  christos 		if (g_stats_output) {
    671  1.1  christos 			fclose(g_stats_output);
    672  1.1  christos 		}
    673  1.1  christos 		return 1;
    674  1.1  christos 	}
    675  1.1  christos 
    676  1.1  christos 	/* Initialize PA infrastructure */
    677  1.1  christos 	if (initialize_pa_infrastructure(num_shards)) {
    678  1.1  christos 		fprintf(stderr, "Failed to initialize PA infrastructure\n");
    679  1.1  christos 		free(events);
    680  1.1  christos 		free(g_alloc_records);
    681  1.1  christos 		if (g_stats_output) {
    682  1.1  christos 			fclose(g_stats_output);
    683  1.1  christos 		}
    684  1.1  christos 		return 1;
    685  1.1  christos 	}
    686  1.1  christos 
    687  1.1  christos 	/* Run simulation */
    688  1.1  christos 	simulate_trace(num_shards, events, event_count, stats_interval);
    689  1.1  christos 
    690  1.1  christos 	/* Clean up remaining allocations */
    691  1.1  christos 	cleanup_remaining_allocations(num_shards);
    692  1.1  christos 
    693  1.1  christos 	/* Cleanup PA infrastructure */
    694  1.1  christos 	cleanup_pa_infrastructure(num_shards);
    695  1.1  christos 
    696  1.1  christos 	/* Cleanup */
    697  1.1  christos 	free(g_alloc_records);
    698  1.1  christos 	free(events);
    699  1.1  christos 
    700  1.1  christos 	if (g_stats_output) {
    701  1.1  christos 		fclose(g_stats_output);
    702  1.1  christos 		printf("Statistics written to: %s\n", stats_output_file);
    703  1.1  christos 	}
    704  1.1  christos 
    705  1.1  christos 	return 0;
    706  1.1  christos }
    707