1 1.9 riastrad /* $NetBSD: cdbw.c,v 1.9 2023/08/08 10:34:08 riastradh Exp $ */ 2 1.1 joerg /*- 3 1.6 alnsn * Copyright (c) 2009, 2010, 2015 The NetBSD Foundation, Inc. 4 1.1 joerg * All rights reserved. 5 1.1 joerg * 6 1.1 joerg * This code is derived from software contributed to The NetBSD Foundation 7 1.6 alnsn * by Joerg Sonnenberger and Alexander Nasonov. 8 1.1 joerg * 9 1.1 joerg * Redistribution and use in source and binary forms, with or without 10 1.1 joerg * modification, are permitted provided that the following conditions 11 1.1 joerg * are met: 12 1.1 joerg * 13 1.1 joerg * 1. Redistributions of source code must retain the above copyright 14 1.1 joerg * notice, this list of conditions and the following disclaimer. 15 1.1 joerg * 2. Redistributions in binary form must reproduce the above copyright 16 1.1 joerg * notice, this list of conditions and the following disclaimer in 17 1.1 joerg * the documentation and/or other materials provided with the 18 1.1 joerg * distribution. 19 1.1 joerg * 20 1.1 joerg * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 1.1 joerg * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 1.1 joerg * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 1.1 joerg * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 1.1 joerg * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 1.1 joerg * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 1.1 joerg * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 1.1 joerg * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 1.1 joerg * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 1.1 joerg * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 1.1 joerg * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 1.1 joerg * SUCH DAMAGE. 32 1.1 joerg */ 33 1.1 joerg 34 1.1 joerg #if HAVE_NBTOOL_CONFIG_H 35 1.1 joerg #include "nbtool_config.h" 36 1.1 joerg #endif 37 1.1 joerg 38 1.1 joerg #include <sys/cdefs.h> 39 1.9 riastrad __RCSID("$NetBSD: cdbw.c,v 1.9 2023/08/08 10:34:08 riastradh Exp $"); 40 1.1 joerg 41 1.1 joerg #include "namespace.h" 42 1.1 joerg 43 1.4 joerg #if !HAVE_NBTOOL_CONFIG_H || HAVE_SYS_ENDIAN_H 44 1.1 joerg #include <sys/endian.h> 45 1.4 joerg #endif 46 1.1 joerg #include <sys/queue.h> 47 1.1 joerg #include <cdbw.h> 48 1.1 joerg #include <stdlib.h> 49 1.1 joerg #include <string.h> 50 1.1 joerg #include <unistd.h> 51 1.1 joerg 52 1.7 joerg #if !HAVE_NBTOOL_CONFIG_H 53 1.7 joerg #include <sys/bitops.h> 54 1.7 joerg #else 55 1.7 joerg static inline int 56 1.7 joerg my_fls32(uint32_t n) 57 1.7 joerg { 58 1.7 joerg int v; 59 1.7 joerg 60 1.7 joerg if (!n) 61 1.7 joerg return 0; 62 1.7 joerg 63 1.7 joerg v = 32; 64 1.7 joerg if ((n & 0xFFFF0000U) == 0) { 65 1.7 joerg n <<= 16; 66 1.7 joerg v -= 16; 67 1.7 joerg } 68 1.7 joerg if ((n & 0xFF000000U) == 0) { 69 1.7 joerg n <<= 8; 70 1.7 joerg v -= 8; 71 1.7 joerg } 72 1.7 joerg if ((n & 0xF0000000U) == 0) { 73 1.7 joerg n <<= 4; 74 1.7 joerg v -= 4; 75 1.7 joerg } 76 1.7 joerg if ((n & 0xC0000000U) == 0) { 77 1.7 joerg n <<= 2; 78 1.7 joerg v -= 2; 79 1.7 joerg } 80 1.7 joerg if ((n & 0x80000000U) == 0) { 81 1.7 joerg n <<= 1; 82 1.7 joerg v -= 1; 83 1.7 joerg } 84 1.7 joerg return v; 85 1.7 joerg } 86 1.7 joerg 87 1.7 joerg static inline void 88 1.7 joerg fast_divide32_prepare(uint32_t div, uint32_t * m, 89 1.7 joerg uint8_t *s1, uint8_t *s2) 90 1.7 joerg { 91 1.7 joerg uint64_t mt; 92 1.7 joerg int l; 93 1.7 joerg 94 1.7 joerg l = my_fls32(div - 1); 95 1.7 joerg mt = (uint64_t)(0x100000000ULL * ((1ULL << l) - div)); 96 1.7 joerg *m = (uint32_t)(mt / div + 1); 97 1.7 joerg *s1 = (l > 1) ? 1U : (uint8_t)l; 98 1.7 joerg *s2 = (l == 0) ? 0 : (uint8_t)(l - 1); 99 1.7 joerg } 100 1.7 joerg 101 1.7 joerg static inline uint32_t 102 1.7 joerg fast_divide32(uint32_t v, uint32_t div, uint32_t m, uint8_t s1, 103 1.7 joerg uint8_t s2) 104 1.7 joerg { 105 1.7 joerg uint32_t t; 106 1.7 joerg 107 1.7 joerg t = (uint32_t)(((uint64_t)v * m) >> 32); 108 1.7 joerg return (t + ((v - t) >> s1)) >> s2; 109 1.7 joerg } 110 1.7 joerg 111 1.7 joerg static inline uint32_t 112 1.7 joerg fast_remainder32(uint32_t v, uint32_t div, uint32_t m, uint8_t s1, 113 1.7 joerg uint8_t s2) 114 1.7 joerg { 115 1.7 joerg 116 1.7 joerg return v - div * fast_divide32(v, div, m, s1, s2); 117 1.7 joerg } 118 1.7 joerg #endif 119 1.7 joerg 120 1.1 joerg #ifdef __weak_alias 121 1.1 joerg __weak_alias(cdbw_close,_cdbw_close) 122 1.1 joerg __weak_alias(cdbw_open,_cdbw_open) 123 1.1 joerg __weak_alias(cdbw_output,_cdbw_output) 124 1.1 joerg __weak_alias(cdbw_put,_cdbw_put) 125 1.1 joerg __weak_alias(cdbw_put_data,_cdbw_put_data) 126 1.1 joerg __weak_alias(cdbw_put_key,_cdbw_put_key) 127 1.1 joerg #endif 128 1.1 joerg 129 1.1 joerg struct key_hash { 130 1.1 joerg SLIST_ENTRY(key_hash) link; 131 1.1 joerg uint32_t hashes[3]; 132 1.1 joerg uint32_t idx; 133 1.1 joerg void *key; 134 1.1 joerg size_t keylen; 135 1.1 joerg }; 136 1.1 joerg 137 1.1 joerg SLIST_HEAD(key_hash_head, key_hash); 138 1.1 joerg 139 1.1 joerg struct cdbw { 140 1.1 joerg size_t data_counter; 141 1.1 joerg size_t data_allocated; 142 1.1 joerg size_t data_size; 143 1.1 joerg size_t *data_len; 144 1.1 joerg void **data_ptr; 145 1.1 joerg 146 1.1 joerg size_t hash_size; 147 1.1 joerg struct key_hash_head *hash; 148 1.1 joerg size_t key_counter; 149 1.1 joerg }; 150 1.1 joerg 151 1.1 joerg /* Max. data counter that allows the index size to be 32bit. */ 152 1.1 joerg static const uint32_t max_data_counter = 0xccccccccU; 153 1.1 joerg 154 1.1 joerg struct cdbw * 155 1.1 joerg cdbw_open(void) 156 1.1 joerg { 157 1.1 joerg struct cdbw *cdbw; 158 1.1 joerg size_t i; 159 1.1 joerg 160 1.1 joerg cdbw = calloc(sizeof(*cdbw), 1); 161 1.1 joerg if (cdbw == NULL) 162 1.1 joerg return NULL; 163 1.1 joerg 164 1.1 joerg cdbw->hash_size = 1024; 165 1.1 joerg cdbw->hash = calloc(cdbw->hash_size, sizeof(*cdbw->hash)); 166 1.1 joerg if (cdbw->hash == NULL) { 167 1.1 joerg free(cdbw); 168 1.1 joerg return NULL; 169 1.1 joerg } 170 1.1 joerg 171 1.1 joerg for (i = 0; i < cdbw->hash_size; ++i) 172 1.1 joerg SLIST_INIT(cdbw->hash + i); 173 1.1 joerg 174 1.1 joerg return cdbw; 175 1.1 joerg } 176 1.1 joerg 177 1.1 joerg int 178 1.1 joerg cdbw_put(struct cdbw *cdbw, const void *key, size_t keylen, 179 1.1 joerg const void *data, size_t datalen) 180 1.1 joerg { 181 1.1 joerg uint32_t idx; 182 1.1 joerg int rv; 183 1.1 joerg 184 1.1 joerg rv = cdbw_put_data(cdbw, data, datalen, &idx); 185 1.1 joerg if (rv) 186 1.1 joerg return rv; 187 1.1 joerg rv = cdbw_put_key(cdbw, key, keylen, idx); 188 1.1 joerg if (rv) { 189 1.1 joerg --cdbw->data_counter; 190 1.1 joerg free(cdbw->data_ptr[cdbw->data_counter]); 191 1.1 joerg cdbw->data_size -= datalen; 192 1.1 joerg return rv; 193 1.1 joerg } 194 1.1 joerg return 0; 195 1.1 joerg } 196 1.1 joerg 197 1.1 joerg int 198 1.1 joerg cdbw_put_data(struct cdbw *cdbw, const void *data, size_t datalen, 199 1.1 joerg uint32_t *idx) 200 1.1 joerg { 201 1.1 joerg 202 1.1 joerg if (cdbw->data_counter == max_data_counter) 203 1.1 joerg return -1; 204 1.1 joerg 205 1.1 joerg if (cdbw->data_size + datalen < cdbw->data_size || 206 1.1 joerg cdbw->data_size + datalen > 0xffffffffU) 207 1.1 joerg return -1; /* Overflow */ 208 1.1 joerg 209 1.1 joerg if (cdbw->data_allocated == cdbw->data_counter) { 210 1.1 joerg void **new_data_ptr; 211 1.1 joerg size_t *new_data_len; 212 1.1 joerg size_t new_allocated; 213 1.1 joerg 214 1.1 joerg if (cdbw->data_allocated == 0) 215 1.1 joerg new_allocated = 256; 216 1.1 joerg else 217 1.1 joerg new_allocated = cdbw->data_allocated * 2; 218 1.1 joerg 219 1.1 joerg new_data_ptr = realloc(cdbw->data_ptr, 220 1.1 joerg sizeof(*cdbw->data_ptr) * new_allocated); 221 1.1 joerg if (new_data_ptr == NULL) 222 1.1 joerg return -1; 223 1.1 joerg cdbw->data_ptr = new_data_ptr; 224 1.1 joerg 225 1.1 joerg new_data_len = realloc(cdbw->data_len, 226 1.1 joerg sizeof(*cdbw->data_len) * new_allocated); 227 1.1 joerg if (new_data_len == NULL) 228 1.1 joerg return -1; 229 1.1 joerg cdbw->data_len = new_data_len; 230 1.1 joerg 231 1.1 joerg cdbw->data_allocated = new_allocated; 232 1.1 joerg } 233 1.1 joerg 234 1.1 joerg cdbw->data_ptr[cdbw->data_counter] = malloc(datalen); 235 1.1 joerg if (cdbw->data_ptr[cdbw->data_counter] == NULL) 236 1.1 joerg return -1; 237 1.1 joerg memcpy(cdbw->data_ptr[cdbw->data_counter], data, datalen); 238 1.1 joerg cdbw->data_len[cdbw->data_counter] = datalen; 239 1.1 joerg cdbw->data_size += datalen; 240 1.3 joerg *idx = cdbw->data_counter++; 241 1.1 joerg return 0; 242 1.1 joerg } 243 1.1 joerg 244 1.1 joerg int 245 1.1 joerg cdbw_put_key(struct cdbw *cdbw, const void *key, size_t keylen, uint32_t idx) 246 1.1 joerg { 247 1.1 joerg uint32_t hashes[3]; 248 1.1 joerg struct key_hash_head *head, *head2, *new_head; 249 1.1 joerg struct key_hash *key_hash; 250 1.1 joerg size_t new_hash_size, i; 251 1.1 joerg 252 1.1 joerg if (idx >= cdbw->data_counter || 253 1.1 joerg cdbw->key_counter == max_data_counter) 254 1.1 joerg return -1; 255 1.1 joerg 256 1.1 joerg mi_vector_hash(key, keylen, 0, hashes); 257 1.1 joerg 258 1.1 joerg head = cdbw->hash + (hashes[0] & (cdbw->hash_size - 1)); 259 1.1 joerg SLIST_FOREACH(key_hash, head, link) { 260 1.1 joerg if (key_hash->keylen != keylen) 261 1.1 joerg continue; 262 1.1 joerg if (key_hash->hashes[0] != hashes[0]) 263 1.1 joerg continue; 264 1.1 joerg if (key_hash->hashes[1] != hashes[1]) 265 1.1 joerg continue; 266 1.1 joerg if (key_hash->hashes[2] != hashes[2]) 267 1.1 joerg continue; 268 1.1 joerg if (memcmp(key, key_hash->key, keylen)) 269 1.1 joerg continue; 270 1.1 joerg return -1; 271 1.1 joerg } 272 1.1 joerg key_hash = malloc(sizeof(*key_hash)); 273 1.1 joerg if (key_hash == NULL) 274 1.1 joerg return -1; 275 1.1 joerg key_hash->key = malloc(keylen); 276 1.1 joerg if (key_hash->key == NULL) { 277 1.1 joerg free(key_hash); 278 1.1 joerg return -1; 279 1.1 joerg } 280 1.1 joerg memcpy(key_hash->key, key, keylen); 281 1.1 joerg key_hash->hashes[0] = hashes[0]; 282 1.1 joerg key_hash->hashes[1] = hashes[1]; 283 1.1 joerg key_hash->hashes[2] = hashes[2]; 284 1.1 joerg key_hash->keylen = keylen; 285 1.1 joerg key_hash->idx = idx; 286 1.1 joerg SLIST_INSERT_HEAD(head, key_hash, link); 287 1.1 joerg ++cdbw->key_counter; 288 1.1 joerg 289 1.1 joerg if (cdbw->key_counter <= cdbw->hash_size) 290 1.1 joerg return 0; 291 1.1 joerg 292 1.1 joerg /* Try to resize the hash table, but ignore errors. */ 293 1.1 joerg new_hash_size = cdbw->hash_size * 2; 294 1.1 joerg new_head = calloc(sizeof(*new_head), new_hash_size); 295 1.1 joerg if (new_head == NULL) 296 1.1 joerg return 0; 297 1.1 joerg 298 1.1 joerg head = &cdbw->hash[hashes[0] & (cdbw->hash_size - 1)]; 299 1.1 joerg for (i = 0; i < new_hash_size; ++i) 300 1.1 joerg SLIST_INIT(new_head + i); 301 1.1 joerg 302 1.1 joerg for (i = 0; i < cdbw->hash_size; ++i) { 303 1.1 joerg head = cdbw->hash + i; 304 1.1 joerg 305 1.1 joerg while ((key_hash = SLIST_FIRST(head)) != NULL) { 306 1.1 joerg SLIST_REMOVE_HEAD(head, link); 307 1.1 joerg head2 = new_head + 308 1.1 joerg (key_hash->hashes[0] & (new_hash_size - 1)); 309 1.1 joerg SLIST_INSERT_HEAD(head2, key_hash, link); 310 1.1 joerg } 311 1.1 joerg } 312 1.1 joerg free(cdbw->hash); 313 1.1 joerg cdbw->hash_size = new_hash_size; 314 1.1 joerg cdbw->hash = new_head; 315 1.1 joerg 316 1.1 joerg return 0; 317 1.1 joerg } 318 1.1 joerg 319 1.1 joerg void 320 1.1 joerg cdbw_close(struct cdbw *cdbw) 321 1.1 joerg { 322 1.1 joerg struct key_hash_head *head; 323 1.1 joerg struct key_hash *key_hash; 324 1.1 joerg size_t i; 325 1.1 joerg 326 1.1 joerg for (i = 0; i < cdbw->hash_size; ++i) { 327 1.1 joerg head = cdbw->hash + i; 328 1.1 joerg while ((key_hash = SLIST_FIRST(head)) != NULL) { 329 1.1 joerg SLIST_REMOVE_HEAD(head, link); 330 1.1 joerg free(key_hash->key); 331 1.1 joerg free(key_hash); 332 1.1 joerg } 333 1.1 joerg } 334 1.1 joerg 335 1.1 joerg for (i = 0; i < cdbw->data_counter; ++i) 336 1.1 joerg free(cdbw->data_ptr[i]); 337 1.1 joerg free(cdbw->data_ptr); 338 1.1 joerg free(cdbw->data_len); 339 1.1 joerg free(cdbw->hash); 340 1.1 joerg free(cdbw); 341 1.1 joerg } 342 1.1 joerg 343 1.4 joerg uint32_t 344 1.4 joerg cdbw_stable_seeder(void) 345 1.4 joerg { 346 1.4 joerg return 0; 347 1.4 joerg } 348 1.4 joerg 349 1.6 alnsn /* 350 1.7 joerg * For each vertex in the 3-graph, the incidence lists needs to be kept. 351 1.7 joerg * Avoid storing the full list by just XORing the indices of the still 352 1.7 joerg * incident edges and remember the number of such edges as that's all 353 1.7 joerg * the peeling computation needs. This is inspired by: 354 1.7 joerg * Cache-Oblivious Peeling of Random Hypergraphs by Djamal Belazzougui, 355 1.7 joerg * Paolo Boldi, Giuseppe Ottaviano, Rossano Venturini, and Sebastiano 356 1.7 joerg * Vigna. https://arxiv.org/abs/1312.0526 357 1.7 joerg * 358 1.7 joerg * Unlike in the paper, we don't care about external storage and have 359 1.7 joerg * the edge list at hand all the time. As such, no ordering is necessary 360 1.7 joerg * and the vertices of the edge don't have to be copied. 361 1.7 joerg * 362 1.7 joerg * The core observation of the paper above is that for a degree of one, 363 1.7 joerg * the incident edge can be obtained directly. 364 1.6 alnsn */ 365 1.7 joerg struct vertex { 366 1.7 joerg uint32_t degree; 367 1.7 joerg uint32_t edges; 368 1.1 joerg }; 369 1.1 joerg 370 1.1 joerg struct edge { 371 1.7 joerg uint32_t vertices[3]; 372 1.1 joerg uint32_t idx; 373 1.1 joerg }; 374 1.1 joerg 375 1.1 joerg struct state { 376 1.1 joerg uint32_t data_entries; 377 1.1 joerg uint32_t entries; 378 1.1 joerg uint32_t keys; 379 1.1 joerg uint32_t seed; 380 1.1 joerg 381 1.1 joerg uint32_t *g; 382 1.1 joerg char *visited; 383 1.1 joerg 384 1.7 joerg struct vertex *vertices; 385 1.1 joerg struct edge *edges; 386 1.1 joerg uint32_t output_index; 387 1.1 joerg uint32_t *output_order; 388 1.1 joerg }; 389 1.1 joerg 390 1.6 alnsn /* 391 1.7 joerg * Add (delta == 1) or remove (delta == -1) the edge e 392 1.7 joerg * from the incidence lists. 393 1.6 alnsn */ 394 1.6 alnsn static inline void 395 1.7 joerg change_edge(struct state *state, int delta, uint32_t e) 396 1.1 joerg { 397 1.7 joerg int i; 398 1.7 joerg struct vertex *v; 399 1.7 joerg struct edge *e_ = &state->edges[e]; 400 1.7 joerg 401 1.7 joerg for (i = 0; i < 3; ++i) { 402 1.7 joerg v = &state->vertices[e_->vertices[i]]; 403 1.7 joerg v->edges ^= e; 404 1.7 joerg v->degree += delta; 405 1.7 joerg } 406 1.6 alnsn } 407 1.1 joerg 408 1.6 alnsn static inline void 409 1.7 joerg remove_vertex(struct state *state, uint32_t v) 410 1.6 alnsn { 411 1.7 joerg struct vertex *v_ = &state->vertices[v]; 412 1.7 joerg uint32_t e; 413 1.1 joerg 414 1.7 joerg if (v_->degree == 1) { 415 1.7 joerg e = v_->edges; 416 1.6 alnsn state->output_order[--state->output_index] = e; 417 1.7 joerg change_edge(state, -1, e); 418 1.6 alnsn } 419 1.1 joerg } 420 1.1 joerg 421 1.1 joerg static int 422 1.1 joerg build_graph(struct cdbw *cdbw, struct state *state) 423 1.1 joerg { 424 1.1 joerg struct key_hash_head *head; 425 1.1 joerg struct key_hash *key_hash; 426 1.1 joerg struct edge *e; 427 1.7 joerg uint32_t entries_m; 428 1.7 joerg uint8_t entries_s1, entries_s2; 429 1.3 joerg uint32_t hashes[3]; 430 1.3 joerg size_t i; 431 1.7 joerg int j; 432 1.1 joerg 433 1.7 joerg memset(state->vertices, 0, sizeof(*state->vertices) * state->entries); 434 1.6 alnsn 435 1.1 joerg e = state->edges; 436 1.7 joerg fast_divide32_prepare(state->entries, &entries_m, &entries_s1, 437 1.7 joerg &entries_s2); 438 1.7 joerg 439 1.1 joerg for (i = 0; i < cdbw->hash_size; ++i) { 440 1.1 joerg head = &cdbw->hash[i]; 441 1.1 joerg SLIST_FOREACH(key_hash, head, link) { 442 1.1 joerg mi_vector_hash(key_hash->key, key_hash->keylen, 443 1.1 joerg state->seed, hashes); 444 1.1 joerg 445 1.7 joerg for (j = 0; j < 3; ++j) { 446 1.7 joerg e->vertices[j] = fast_remainder32(hashes[j], 447 1.7 joerg state->entries, entries_m, entries_s1, 448 1.7 joerg entries_s2); 449 1.7 joerg } 450 1.7 joerg 451 1.7 joerg if (e->vertices[0] == e->vertices[1]) 452 1.5 joerg return -1; 453 1.7 joerg if (e->vertices[0] == e->vertices[2]) 454 1.5 joerg return -1; 455 1.7 joerg if (e->vertices[1] == e->vertices[2]) 456 1.5 joerg return -1; 457 1.7 joerg e->idx = key_hash->idx; 458 1.1 joerg ++e; 459 1.1 joerg } 460 1.1 joerg } 461 1.1 joerg 462 1.7 joerg /* 463 1.7 joerg * Do the edge processing separately as there is a good chance 464 1.7 joerg * the degraded edge case above will happen; this avoid 465 1.7 joerg *unnecessary work. 466 1.7 joerg */ 467 1.7 joerg for (i = 0; i < state->keys; ++i) 468 1.7 joerg change_edge(state, 1, i); 469 1.7 joerg 470 1.1 joerg state->output_index = state->keys; 471 1.1 joerg for (i = 0; i < state->entries; ++i) 472 1.6 alnsn remove_vertex(state, i); 473 1.1 joerg 474 1.1 joerg i = state->keys; 475 1.1 joerg while (i > 0 && i > state->output_index) { 476 1.1 joerg --i; 477 1.1 joerg e = state->edges + state->output_order[i]; 478 1.7 joerg for (j = 0; j < 3; ++j) 479 1.7 joerg remove_vertex(state, e->vertices[j]); 480 1.1 joerg } 481 1.1 joerg 482 1.1 joerg return state->output_index == 0 ? 0 : -1; 483 1.1 joerg } 484 1.1 joerg 485 1.1 joerg static void 486 1.1 joerg assign_nodes(struct state *state) 487 1.1 joerg { 488 1.1 joerg struct edge *e; 489 1.1 joerg size_t i; 490 1.1 joerg 491 1.7 joerg uint32_t v0, v1, v2, entries_m; 492 1.7 joerg uint8_t entries_s1, entries_s2; 493 1.7 joerg 494 1.7 joerg fast_divide32_prepare(state->data_entries, &entries_m, &entries_s1, 495 1.7 joerg &entries_s2); 496 1.7 joerg 497 1.1 joerg for (i = 0; i < state->keys; ++i) { 498 1.1 joerg e = state->edges + state->output_order[i]; 499 1.7 joerg if (!state->visited[e->vertices[0]]) { 500 1.7 joerg v0 = e->vertices[0]; 501 1.7 joerg v1 = e->vertices[1]; 502 1.7 joerg v2 = e->vertices[2]; 503 1.7 joerg } else if (!state->visited[e->vertices[1]]) { 504 1.7 joerg v0 = e->vertices[1]; 505 1.7 joerg v1 = e->vertices[0]; 506 1.7 joerg v2 = e->vertices[2]; 507 1.1 joerg } else { 508 1.7 joerg v0 = e->vertices[2]; 509 1.7 joerg v1 = e->vertices[0]; 510 1.7 joerg v2 = e->vertices[1]; 511 1.1 joerg } 512 1.7 joerg state->g[v0] = 513 1.7 joerg fast_remainder32((2 * state->data_entries + e->idx 514 1.7 joerg - state->g[v1] - state->g[v2]), 515 1.7 joerg state->data_entries, entries_m, 516 1.7 joerg entries_s1, entries_s2); 517 1.7 joerg state->visited[v0] = 1; 518 1.7 joerg state->visited[v1] = 1; 519 1.7 joerg state->visited[v2] = 1; 520 1.1 joerg } 521 1.1 joerg } 522 1.1 joerg 523 1.1 joerg static size_t 524 1.1 joerg compute_size(uint32_t size) 525 1.1 joerg { 526 1.1 joerg if (size < 0x100) 527 1.1 joerg return 1; 528 1.1 joerg else if (size < 0x10000) 529 1.1 joerg return 2; 530 1.1 joerg else 531 1.1 joerg return 4; 532 1.1 joerg } 533 1.1 joerg 534 1.1 joerg #define COND_FLUSH_BUFFER(n) do { \ 535 1.1 joerg if (__predict_false(cur_pos + (n) >= sizeof(buf))) { \ 536 1.1 joerg ret = write(fd, buf, cur_pos); \ 537 1.1 joerg if (ret == -1 || (size_t)ret != cur_pos) \ 538 1.1 joerg return -1; \ 539 1.1 joerg cur_pos = 0; \ 540 1.1 joerg } \ 541 1.8 rillig } while (0) 542 1.1 joerg 543 1.1 joerg static int 544 1.1 joerg print_hash(struct cdbw *cdbw, struct state *state, int fd, const char *descr) 545 1.1 joerg { 546 1.1 joerg uint32_t data_size; 547 1.1 joerg uint8_t buf[90000]; 548 1.1 joerg size_t i, size, size2, cur_pos; 549 1.1 joerg ssize_t ret; 550 1.1 joerg 551 1.1 joerg memcpy(buf, "NBCDB\n\0", 7); 552 1.1 joerg buf[7] = 1; 553 1.1 joerg strncpy((char *)buf + 8, descr, 16); 554 1.3 joerg le32enc(buf + 24, cdbw->data_size); 555 1.3 joerg le32enc(buf + 28, cdbw->data_counter); 556 1.1 joerg le32enc(buf + 32, state->entries); 557 1.1 joerg le32enc(buf + 36, state->seed); 558 1.1 joerg cur_pos = 40; 559 1.1 joerg 560 1.1 joerg size = compute_size(state->entries); 561 1.1 joerg for (i = 0; i < state->entries; ++i) { 562 1.1 joerg COND_FLUSH_BUFFER(4); 563 1.1 joerg le32enc(buf + cur_pos, state->g[i]); 564 1.1 joerg cur_pos += size; 565 1.1 joerg } 566 1.3 joerg size2 = compute_size(cdbw->data_size); 567 1.1 joerg size = size * state->entries % size2; 568 1.1 joerg if (size != 0) { 569 1.1 joerg size = size2 - size; 570 1.1 joerg COND_FLUSH_BUFFER(4); 571 1.1 joerg le32enc(buf + cur_pos, 0); 572 1.1 joerg cur_pos += size; 573 1.1 joerg } 574 1.1 joerg for (data_size = 0, i = 0; i < cdbw->data_counter; ++i) { 575 1.1 joerg COND_FLUSH_BUFFER(4); 576 1.1 joerg le32enc(buf + cur_pos, data_size); 577 1.1 joerg cur_pos += size2; 578 1.3 joerg data_size += cdbw->data_len[i]; 579 1.1 joerg } 580 1.1 joerg COND_FLUSH_BUFFER(4); 581 1.1 joerg le32enc(buf + cur_pos, data_size); 582 1.1 joerg cur_pos += size2; 583 1.1 joerg 584 1.1 joerg for (i = 0; i < cdbw->data_counter; ++i) { 585 1.1 joerg COND_FLUSH_BUFFER(cdbw->data_len[i]); 586 1.1 joerg if (cdbw->data_len[i] < sizeof(buf)) { 587 1.1 joerg memcpy(buf + cur_pos, cdbw->data_ptr[i], 588 1.1 joerg cdbw->data_len[i]); 589 1.1 joerg cur_pos += cdbw->data_len[i]; 590 1.1 joerg } else { 591 1.1 joerg ret = write(fd, cdbw->data_ptr[i], cdbw->data_len[i]); 592 1.1 joerg if (ret == -1 || (size_t)ret != cdbw->data_len[i]) 593 1.1 joerg return -1; 594 1.1 joerg } 595 1.1 joerg } 596 1.1 joerg if (cur_pos != 0) { 597 1.1 joerg ret = write(fd, buf, cur_pos); 598 1.1 joerg if (ret == -1 || (size_t)ret != cur_pos) 599 1.1 joerg return -1; 600 1.1 joerg } 601 1.1 joerg return 0; 602 1.1 joerg } 603 1.1 joerg 604 1.1 joerg int 605 1.9 riastrad cdbw_output(struct cdbw *cdbw, int fd, const char *descr, 606 1.1 joerg uint32_t (*seedgen)(void)) 607 1.1 joerg { 608 1.1 joerg struct state state; 609 1.1 joerg int rv; 610 1.1 joerg 611 1.1 joerg if (cdbw->data_counter == 0 || cdbw->key_counter == 0) { 612 1.1 joerg state.entries = 0; 613 1.1 joerg state.seed = 0; 614 1.1 joerg print_hash(cdbw, &state, fd, descr); 615 1.1 joerg return 0; 616 1.1 joerg } 617 1.1 joerg 618 1.4 joerg #if HAVE_NBTOOL_CONFIG_H 619 1.4 joerg if (seedgen == NULL) 620 1.4 joerg seedgen = cdbw_stable_seeder; 621 1.4 joerg #else 622 1.1 joerg if (seedgen == NULL) 623 1.1 joerg seedgen = arc4random; 624 1.4 joerg #endif 625 1.1 joerg 626 1.1 joerg rv = 0; 627 1.1 joerg 628 1.3 joerg state.keys = cdbw->key_counter; 629 1.3 joerg state.data_entries = cdbw->data_counter; 630 1.1 joerg state.entries = state.keys + (state.keys + 3) / 4; 631 1.1 joerg if (state.entries < 10) 632 1.1 joerg state.entries = 10; 633 1.1 joerg 634 1.1 joerg #define NALLOC(var, n) var = calloc(sizeof(*var), n) 635 1.1 joerg NALLOC(state.g, state.entries); 636 1.1 joerg NALLOC(state.visited, state.entries); 637 1.7 joerg NALLOC(state.vertices, state.entries); 638 1.6 alnsn NALLOC(state.edges, state.keys); 639 1.1 joerg NALLOC(state.output_order, state.keys); 640 1.1 joerg #undef NALLOC 641 1.1 joerg 642 1.7 joerg if (state.g == NULL || state.visited == NULL || state.edges == NULL || 643 1.7 joerg state.vertices == NULL || state.output_order == NULL) { 644 1.1 joerg rv = -1; 645 1.1 joerg goto release; 646 1.1 joerg } 647 1.1 joerg 648 1.4 joerg state.seed = 0; 649 1.1 joerg do { 650 1.4 joerg if (seedgen == cdbw_stable_seeder) 651 1.4 joerg ++state.seed; 652 1.4 joerg else 653 1.4 joerg state.seed = (*seedgen)(); 654 1.1 joerg } while (build_graph(cdbw, &state)); 655 1.1 joerg 656 1.1 joerg assign_nodes(&state); 657 1.1 joerg rv = print_hash(cdbw, &state, fd, descr); 658 1.1 joerg 659 1.1 joerg release: 660 1.1 joerg free(state.g); 661 1.1 joerg free(state.visited); 662 1.7 joerg free(state.vertices); 663 1.1 joerg free(state.edges); 664 1.1 joerg free(state.output_order); 665 1.1 joerg 666 1.1 joerg return rv; 667 1.1 joerg } 668