1 1.1 haad /* 2 1.1 haad * CDDL HEADER START 3 1.1 haad * 4 1.1 haad * The contents of this file are subject to the terms of the 5 1.1 haad * Common Development and Distribution License (the "License"). 6 1.1 haad * You may not use this file except in compliance with the License. 7 1.1 haad * 8 1.1 haad * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 1.1 haad * or http://www.opensolaris.org/os/licensing. 10 1.1 haad * See the License for the specific language governing permissions 11 1.1 haad * and limitations under the License. 12 1.1 haad * 13 1.1 haad * When distributing Covered Code, include this CDDL HEADER in each 14 1.1 haad * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 1.1 haad * If applicable, add the following below this CDDL HEADER, with the 16 1.1 haad * fields enclosed by brackets "[]" replaced with your own identifying 17 1.1 haad * information: Portions Copyright [yyyy] [name of copyright owner] 18 1.1 haad * 19 1.1 haad * CDDL HEADER END 20 1.1 haad */ 21 1.1 haad /* 22 1.4 chs * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 1.4 chs * Copyright (c) 2011, 2016 by Delphix. All rights reserved. 24 1.4 chs * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. 25 1.4 chs * Copyright (c) 2014 Integros [integros.com] 26 1.1 haad */ 27 1.1 haad 28 1.3 haad #include <sys/zio.h> 29 1.1 haad #include <sys/spa.h> 30 1.1 haad #include <sys/dmu.h> 31 1.1 haad #include <sys/zfs_context.h> 32 1.1 haad #include <sys/zap.h> 33 1.1 haad #include <sys/refcount.h> 34 1.1 haad #include <sys/zap_impl.h> 35 1.1 haad #include <sys/zap_leaf.h> 36 1.1 haad #include <sys/avl.h> 37 1.4 chs #include <sys/arc.h> 38 1.4 chs #include <sys/dmu_objset.h> 39 1.1 haad 40 1.1 haad #ifdef _KERNEL 41 1.1 haad #include <sys/sunddi.h> 42 1.1 haad #endif 43 1.1 haad 44 1.4 chs extern inline mzap_phys_t *zap_m_phys(zap_t *zap); 45 1.4 chs 46 1.4 chs static int mzap_upgrade(zap_t **zapp, 47 1.4 chs void *tag, dmu_tx_t *tx, zap_flags_t flags); 48 1.1 haad 49 1.3 haad uint64_t 50 1.3 haad zap_getflags(zap_t *zap) 51 1.3 haad { 52 1.3 haad if (zap->zap_ismicro) 53 1.3 haad return (0); 54 1.4 chs return (zap_f_phys(zap)->zap_flags); 55 1.3 haad } 56 1.3 haad 57 1.3 haad int 58 1.3 haad zap_hashbits(zap_t *zap) 59 1.3 haad { 60 1.3 haad if (zap_getflags(zap) & ZAP_FLAG_HASH64) 61 1.3 haad return (48); 62 1.3 haad else 63 1.3 haad return (28); 64 1.3 haad } 65 1.3 haad 66 1.3 haad uint32_t 67 1.3 haad zap_maxcd(zap_t *zap) 68 1.3 haad { 69 1.3 haad if (zap_getflags(zap) & ZAP_FLAG_HASH64) 70 1.3 haad return ((1<<16)-1); 71 1.3 haad else 72 1.3 haad return (-1U); 73 1.3 haad } 74 1.1 haad 75 1.1 haad static uint64_t 76 1.3 haad zap_hash(zap_name_t *zn) 77 1.1 haad { 78 1.3 haad zap_t *zap = zn->zn_zap; 79 1.3 haad uint64_t h = 0; 80 1.3 haad 81 1.3 haad if (zap_getflags(zap) & ZAP_FLAG_PRE_HASHED_KEY) { 82 1.3 haad ASSERT(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY); 83 1.3 haad h = *(uint64_t *)zn->zn_key_orig; 84 1.3 haad } else { 85 1.3 haad h = zap->zap_salt; 86 1.3 haad ASSERT(h != 0); 87 1.3 haad ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY); 88 1.3 haad 89 1.3 haad if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) { 90 1.3 haad int i; 91 1.3 haad const uint64_t *wp = zn->zn_key_norm; 92 1.3 haad 93 1.3 haad ASSERT(zn->zn_key_intlen == 8); 94 1.3 haad for (i = 0; i < zn->zn_key_norm_numints; wp++, i++) { 95 1.3 haad int j; 96 1.3 haad uint64_t word = *wp; 97 1.3 haad 98 1.3 haad for (j = 0; j < zn->zn_key_intlen; j++) { 99 1.3 haad h = (h >> 8) ^ 100 1.3 haad zfs_crc64_table[(h ^ word) & 0xFF]; 101 1.3 haad word >>= NBBY; 102 1.3 haad } 103 1.3 haad } 104 1.3 haad } else { 105 1.3 haad int i, len; 106 1.3 haad const uint8_t *cp = zn->zn_key_norm; 107 1.3 haad 108 1.3 haad /* 109 1.3 haad * We previously stored the terminating null on 110 1.3 haad * disk, but didn't hash it, so we need to 111 1.3 haad * continue to not hash it. (The 112 1.3 haad * zn_key_*_numints includes the terminating 113 1.3 haad * null for non-binary keys.) 114 1.3 haad */ 115 1.3 haad len = zn->zn_key_norm_numints - 1; 116 1.3 haad 117 1.3 haad ASSERT(zn->zn_key_intlen == 1); 118 1.3 haad for (i = 0; i < len; cp++, i++) { 119 1.3 haad h = (h >> 8) ^ 120 1.3 haad zfs_crc64_table[(h ^ *cp) & 0xFF]; 121 1.3 haad } 122 1.3 haad } 123 1.1 haad } 124 1.1 haad /* 125 1.3 haad * Don't use all 64 bits, since we need some in the cookie for 126 1.3 haad * the collision differentiator. We MUST use the high bits, 127 1.3 haad * since those are the ones that we first pay attention to when 128 1.1 haad * chosing the bucket. 129 1.1 haad */ 130 1.3 haad h &= ~((1ULL << (64 - zap_hashbits(zap))) - 1); 131 1.1 haad 132 1.3 haad return (h); 133 1.1 haad } 134 1.1 haad 135 1.1 haad static int 136 1.1 haad zap_normalize(zap_t *zap, const char *name, char *namenorm) 137 1.1 haad { 138 1.1 haad size_t inlen, outlen; 139 1.1 haad int err; 140 1.1 haad 141 1.3 haad ASSERT(!(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY)); 142 1.3 haad 143 1.1 haad inlen = strlen(name) + 1; 144 1.1 haad outlen = ZAP_MAXNAMELEN; 145 1.1 haad 146 1.1 haad err = 0; 147 1.1 haad (void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen, 148 1.3 haad zap->zap_normflags | U8_TEXTPREP_IGNORE_NULL | 149 1.3 haad U8_TEXTPREP_IGNORE_INVALID, U8_UNICODE_LATEST, &err); 150 1.1 haad 151 1.1 haad return (err); 152 1.1 haad } 153 1.1 haad 154 1.1 haad boolean_t 155 1.1 haad zap_match(zap_name_t *zn, const char *matchname) 156 1.1 haad { 157 1.3 haad ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY)); 158 1.3 haad 159 1.1 haad if (zn->zn_matchtype == MT_FIRST) { 160 1.1 haad char norm[ZAP_MAXNAMELEN]; 161 1.1 haad 162 1.1 haad if (zap_normalize(zn->zn_zap, matchname, norm) != 0) 163 1.1 haad return (B_FALSE); 164 1.1 haad 165 1.3 haad return (strcmp(zn->zn_key_norm, norm) == 0); 166 1.1 haad } else { 167 1.1 haad /* MT_BEST or MT_EXACT */ 168 1.3 haad return (strcmp(zn->zn_key_orig, matchname) == 0); 169 1.1 haad } 170 1.1 haad } 171 1.1 haad 172 1.1 haad void 173 1.1 haad zap_name_free(zap_name_t *zn) 174 1.1 haad { 175 1.1 haad kmem_free(zn, sizeof (zap_name_t)); 176 1.1 haad } 177 1.1 haad 178 1.1 haad zap_name_t * 179 1.3 haad zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt) 180 1.1 haad { 181 1.1 haad zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); 182 1.1 haad 183 1.1 haad zn->zn_zap = zap; 184 1.3 haad zn->zn_key_intlen = sizeof (*key); 185 1.3 haad zn->zn_key_orig = key; 186 1.3 haad zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1; 187 1.1 haad zn->zn_matchtype = mt; 188 1.1 haad if (zap->zap_normflags) { 189 1.3 haad if (zap_normalize(zap, key, zn->zn_normbuf) != 0) { 190 1.1 haad zap_name_free(zn); 191 1.1 haad return (NULL); 192 1.1 haad } 193 1.3 haad zn->zn_key_norm = zn->zn_normbuf; 194 1.3 haad zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1; 195 1.1 haad } else { 196 1.1 haad if (mt != MT_EXACT) { 197 1.1 haad zap_name_free(zn); 198 1.1 haad return (NULL); 199 1.1 haad } 200 1.3 haad zn->zn_key_norm = zn->zn_key_orig; 201 1.3 haad zn->zn_key_norm_numints = zn->zn_key_orig_numints; 202 1.1 haad } 203 1.1 haad 204 1.3 haad zn->zn_hash = zap_hash(zn); 205 1.3 haad return (zn); 206 1.3 haad } 207 1.3 haad 208 1.3 haad zap_name_t * 209 1.3 haad zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints) 210 1.3 haad { 211 1.3 haad zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); 212 1.3 haad 213 1.3 haad ASSERT(zap->zap_normflags == 0); 214 1.3 haad zn->zn_zap = zap; 215 1.3 haad zn->zn_key_intlen = sizeof (*key); 216 1.3 haad zn->zn_key_orig = zn->zn_key_norm = key; 217 1.3 haad zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints; 218 1.3 haad zn->zn_matchtype = MT_EXACT; 219 1.3 haad 220 1.3 haad zn->zn_hash = zap_hash(zn); 221 1.1 haad return (zn); 222 1.1 haad } 223 1.1 haad 224 1.1 haad static void 225 1.1 haad mzap_byteswap(mzap_phys_t *buf, size_t size) 226 1.1 haad { 227 1.1 haad int i, max; 228 1.1 haad buf->mz_block_type = BSWAP_64(buf->mz_block_type); 229 1.1 haad buf->mz_salt = BSWAP_64(buf->mz_salt); 230 1.1 haad buf->mz_normflags = BSWAP_64(buf->mz_normflags); 231 1.1 haad max = (size / MZAP_ENT_LEN) - 1; 232 1.1 haad for (i = 0; i < max; i++) { 233 1.1 haad buf->mz_chunk[i].mze_value = 234 1.1 haad BSWAP_64(buf->mz_chunk[i].mze_value); 235 1.1 haad buf->mz_chunk[i].mze_cd = 236 1.1 haad BSWAP_32(buf->mz_chunk[i].mze_cd); 237 1.1 haad } 238 1.1 haad } 239 1.1 haad 240 1.1 haad void 241 1.1 haad zap_byteswap(void *buf, size_t size) 242 1.1 haad { 243 1.1 haad uint64_t block_type; 244 1.1 haad 245 1.1 haad block_type = *(uint64_t *)buf; 246 1.1 haad 247 1.1 haad if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) { 248 1.1 haad /* ASSERT(magic == ZAP_LEAF_MAGIC); */ 249 1.1 haad mzap_byteswap(buf, size); 250 1.1 haad } else { 251 1.1 haad fzap_byteswap(buf, size); 252 1.1 haad } 253 1.1 haad } 254 1.1 haad 255 1.1 haad static int 256 1.1 haad mze_compare(const void *arg1, const void *arg2) 257 1.1 haad { 258 1.1 haad const mzap_ent_t *mze1 = arg1; 259 1.1 haad const mzap_ent_t *mze2 = arg2; 260 1.1 haad 261 1.1 haad if (mze1->mze_hash > mze2->mze_hash) 262 1.1 haad return (+1); 263 1.1 haad if (mze1->mze_hash < mze2->mze_hash) 264 1.1 haad return (-1); 265 1.4 chs if (mze1->mze_cd > mze2->mze_cd) 266 1.1 haad return (+1); 267 1.4 chs if (mze1->mze_cd < mze2->mze_cd) 268 1.1 haad return (-1); 269 1.1 haad return (0); 270 1.1 haad } 271 1.1 haad 272 1.4 chs static int 273 1.4 chs mze_insert(zap_t *zap, int chunkid, uint64_t hash) 274 1.1 haad { 275 1.1 haad mzap_ent_t *mze; 276 1.4 chs avl_index_t idx; 277 1.1 haad 278 1.1 haad ASSERT(zap->zap_ismicro); 279 1.1 haad ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 280 1.1 haad 281 1.1 haad mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP); 282 1.1 haad mze->mze_chunkid = chunkid; 283 1.1 haad mze->mze_hash = hash; 284 1.4 chs mze->mze_cd = MZE_PHYS(zap, mze)->mze_cd; 285 1.4 chs ASSERT(MZE_PHYS(zap, mze)->mze_name[0] != 0); 286 1.4 chs if (avl_find(&zap->zap_m.zap_avl, mze, &idx) != NULL) { 287 1.4 chs kmem_free(mze, sizeof (mzap_ent_t)); 288 1.4 chs return (EEXIST); 289 1.4 chs } 290 1.4 chs avl_insert(&zap->zap_m.zap_avl, mze, idx); 291 1.4 chs return (0); 292 1.1 haad } 293 1.1 haad 294 1.1 haad static mzap_ent_t * 295 1.1 haad mze_find(zap_name_t *zn) 296 1.1 haad { 297 1.1 haad mzap_ent_t mze_tofind; 298 1.1 haad mzap_ent_t *mze; 299 1.1 haad avl_index_t idx; 300 1.1 haad avl_tree_t *avl = &zn->zn_zap->zap_m.zap_avl; 301 1.1 haad 302 1.1 haad ASSERT(zn->zn_zap->zap_ismicro); 303 1.1 haad ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock)); 304 1.1 haad 305 1.1 haad mze_tofind.mze_hash = zn->zn_hash; 306 1.4 chs mze_tofind.mze_cd = 0; 307 1.1 haad 308 1.1 haad again: 309 1.1 haad mze = avl_find(avl, &mze_tofind, &idx); 310 1.1 haad if (mze == NULL) 311 1.1 haad mze = avl_nearest(avl, idx, AVL_AFTER); 312 1.1 haad for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) { 313 1.4 chs ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd); 314 1.4 chs if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name)) 315 1.1 haad return (mze); 316 1.1 haad } 317 1.1 haad if (zn->zn_matchtype == MT_BEST) { 318 1.1 haad zn->zn_matchtype = MT_FIRST; 319 1.1 haad goto again; 320 1.1 haad } 321 1.1 haad return (NULL); 322 1.1 haad } 323 1.1 haad 324 1.1 haad static uint32_t 325 1.1 haad mze_find_unused_cd(zap_t *zap, uint64_t hash) 326 1.1 haad { 327 1.1 haad mzap_ent_t mze_tofind; 328 1.1 haad mzap_ent_t *mze; 329 1.1 haad avl_index_t idx; 330 1.1 haad avl_tree_t *avl = &zap->zap_m.zap_avl; 331 1.1 haad uint32_t cd; 332 1.1 haad 333 1.1 haad ASSERT(zap->zap_ismicro); 334 1.1 haad ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 335 1.1 haad 336 1.1 haad mze_tofind.mze_hash = hash; 337 1.4 chs mze_tofind.mze_cd = 0; 338 1.1 haad 339 1.1 haad cd = 0; 340 1.1 haad for (mze = avl_find(avl, &mze_tofind, &idx); 341 1.1 haad mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) { 342 1.4 chs if (mze->mze_cd != cd) 343 1.1 haad break; 344 1.1 haad cd++; 345 1.1 haad } 346 1.1 haad 347 1.1 haad return (cd); 348 1.1 haad } 349 1.1 haad 350 1.1 haad static void 351 1.1 haad mze_remove(zap_t *zap, mzap_ent_t *mze) 352 1.1 haad { 353 1.1 haad ASSERT(zap->zap_ismicro); 354 1.1 haad ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 355 1.1 haad 356 1.1 haad avl_remove(&zap->zap_m.zap_avl, mze); 357 1.1 haad kmem_free(mze, sizeof (mzap_ent_t)); 358 1.1 haad } 359 1.1 haad 360 1.1 haad static void 361 1.1 haad mze_destroy(zap_t *zap) 362 1.1 haad { 363 1.1 haad mzap_ent_t *mze; 364 1.1 haad void *avlcookie = NULL; 365 1.1 haad 366 1.1 haad while (mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie)) 367 1.1 haad kmem_free(mze, sizeof (mzap_ent_t)); 368 1.1 haad avl_destroy(&zap->zap_m.zap_avl); 369 1.1 haad } 370 1.1 haad 371 1.1 haad static zap_t * 372 1.1 haad mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db) 373 1.1 haad { 374 1.1 haad zap_t *winner; 375 1.1 haad zap_t *zap; 376 1.1 haad int i; 377 1.4 chs uint64_t *zap_hdr = (uint64_t *)db->db_data; 378 1.4 chs uint64_t zap_block_type = zap_hdr[0]; 379 1.4 chs uint64_t zap_magic = zap_hdr[1]; 380 1.1 haad 381 1.1 haad ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t)); 382 1.1 haad 383 1.1 haad zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP); 384 1.1 haad rw_init(&zap->zap_rwlock, 0, 0, 0); 385 1.1 haad rw_enter(&zap->zap_rwlock, RW_WRITER); 386 1.1 haad zap->zap_objset = os; 387 1.1 haad zap->zap_object = obj; 388 1.1 haad zap->zap_dbuf = db; 389 1.1 haad 390 1.4 chs if (zap_block_type != ZBT_MICRO) { 391 1.1 haad mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0); 392 1.4 chs zap->zap_f.zap_block_shift = highbit64(db->db_size) - 1; 393 1.4 chs if (zap_block_type != ZBT_HEADER || zap_magic != ZAP_MAGIC) { 394 1.4 chs winner = NULL; /* No actual winner here... */ 395 1.4 chs goto handle_winner; 396 1.4 chs } 397 1.1 haad } else { 398 1.1 haad zap->zap_ismicro = TRUE; 399 1.1 haad } 400 1.1 haad 401 1.1 haad /* 402 1.1 haad * Make sure that zap_ismicro is set before we let others see 403 1.1 haad * it, because zap_lockdir() checks zap_ismicro without the lock 404 1.1 haad * held. 405 1.1 haad */ 406 1.4 chs dmu_buf_init_user(&zap->zap_dbu, zap_evict_sync, NULL, &zap->zap_dbuf); 407 1.4 chs winner = dmu_buf_set_user(db, &zap->zap_dbu); 408 1.1 haad 409 1.4 chs if (winner != NULL) 410 1.4 chs goto handle_winner; 411 1.1 haad 412 1.1 haad if (zap->zap_ismicro) { 413 1.4 chs zap->zap_salt = zap_m_phys(zap)->mz_salt; 414 1.4 chs zap->zap_normflags = zap_m_phys(zap)->mz_normflags; 415 1.1 haad zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1; 416 1.1 haad avl_create(&zap->zap_m.zap_avl, mze_compare, 417 1.1 haad sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node)); 418 1.1 haad 419 1.1 haad for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { 420 1.1 haad mzap_ent_phys_t *mze = 421 1.4 chs &zap_m_phys(zap)->mz_chunk[i]; 422 1.1 haad if (mze->mze_name[0]) { 423 1.1 haad zap_name_t *zn; 424 1.1 haad 425 1.1 haad zn = zap_name_alloc(zap, mze->mze_name, 426 1.1 haad MT_EXACT); 427 1.4 chs if (mze_insert(zap, i, zn->zn_hash) == 0) 428 1.4 chs zap->zap_m.zap_num_entries++; 429 1.4 chs else { 430 1.4 chs printf("ZFS WARNING: Duplicated ZAP " 431 1.4 chs "entry detected (%s).\n", 432 1.4 chs mze->mze_name); 433 1.4 chs } 434 1.1 haad zap_name_free(zn); 435 1.1 haad } 436 1.1 haad } 437 1.1 haad } else { 438 1.4 chs zap->zap_salt = zap_f_phys(zap)->zap_salt; 439 1.4 chs zap->zap_normflags = zap_f_phys(zap)->zap_normflags; 440 1.1 haad 441 1.1 haad ASSERT3U(sizeof (struct zap_leaf_header), ==, 442 1.1 haad 2*ZAP_LEAF_CHUNKSIZE); 443 1.1 haad 444 1.1 haad /* 445 1.1 haad * The embedded pointer table should not overlap the 446 1.1 haad * other members. 447 1.1 haad */ 448 1.1 haad ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >, 449 1.4 chs &zap_f_phys(zap)->zap_salt); 450 1.1 haad 451 1.1 haad /* 452 1.1 haad * The embedded pointer table should end at the end of 453 1.1 haad * the block 454 1.1 haad */ 455 1.1 haad ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap, 456 1.1 haad 1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) - 457 1.4 chs (uintptr_t)zap_f_phys(zap), ==, 458 1.1 haad zap->zap_dbuf->db_size); 459 1.1 haad } 460 1.1 haad rw_exit(&zap->zap_rwlock); 461 1.1 haad return (zap); 462 1.4 chs 463 1.4 chs handle_winner: 464 1.4 chs rw_exit(&zap->zap_rwlock); 465 1.4 chs rw_destroy(&zap->zap_rwlock); 466 1.4 chs if (!zap->zap_ismicro) 467 1.4 chs mutex_destroy(&zap->zap_f.zap_num_entries_mtx); 468 1.4 chs kmem_free(zap, sizeof (zap_t)); 469 1.4 chs return (winner); 470 1.1 haad } 471 1.1 haad 472 1.4 chs static int 473 1.4 chs zap_lockdir_impl(dmu_buf_t *db, void *tag, dmu_tx_t *tx, 474 1.1 haad krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp) 475 1.1 haad { 476 1.1 haad zap_t *zap; 477 1.1 haad krw_t lt; 478 1.4 chs 479 1.4 chs ASSERT0(db->db_offset); 480 1.4 chs objset_t *os = dmu_buf_get_objset(db); 481 1.4 chs uint64_t obj = db->db_object; 482 1.1 haad 483 1.1 haad *zapp = NULL; 484 1.1 haad 485 1.1 haad #ifdef ZFS_DEBUG 486 1.1 haad { 487 1.1 haad dmu_object_info_t doi; 488 1.1 haad dmu_object_info_from_db(db, &doi); 489 1.4 chs ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP); 490 1.1 haad } 491 1.1 haad #endif 492 1.1 haad 493 1.1 haad zap = dmu_buf_get_user(db); 494 1.4 chs if (zap == NULL) { 495 1.1 haad zap = mzap_open(os, obj, db); 496 1.4 chs if (zap == NULL) { 497 1.4 chs /* 498 1.4 chs * mzap_open() didn't like what it saw on-disk. 499 1.4 chs * Check for corruption! 500 1.4 chs */ 501 1.4 chs return (SET_ERROR(EIO)); 502 1.4 chs } 503 1.4 chs } 504 1.1 haad 505 1.1 haad /* 506 1.1 haad * We're checking zap_ismicro without the lock held, in order to 507 1.1 haad * tell what type of lock we want. Once we have some sort of 508 1.1 haad * lock, see if it really is the right type. In practice this 509 1.1 haad * can only be different if it was upgraded from micro to fat, 510 1.1 haad * and micro wanted WRITER but fat only needs READER. 511 1.1 haad */ 512 1.1 haad lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti; 513 1.1 haad rw_enter(&zap->zap_rwlock, lt); 514 1.1 haad if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) { 515 1.1 haad /* it was upgraded, now we only need reader */ 516 1.1 haad ASSERT(lt == RW_WRITER); 517 1.1 haad ASSERT(RW_READER == 518 1.1 haad (!zap->zap_ismicro && fatreader) ? RW_READER : lti); 519 1.1 haad rw_downgrade(&zap->zap_rwlock); 520 1.1 haad lt = RW_READER; 521 1.1 haad } 522 1.1 haad 523 1.1 haad zap->zap_objset = os; 524 1.1 haad 525 1.1 haad if (lt == RW_WRITER) 526 1.1 haad dmu_buf_will_dirty(db, tx); 527 1.1 haad 528 1.1 haad ASSERT3P(zap->zap_dbuf, ==, db); 529 1.1 haad 530 1.1 haad ASSERT(!zap->zap_ismicro || 531 1.1 haad zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks); 532 1.1 haad if (zap->zap_ismicro && tx && adding && 533 1.1 haad zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) { 534 1.1 haad uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE; 535 1.1 haad if (newsz > MZAP_MAX_BLKSZ) { 536 1.1 haad dprintf("upgrading obj %llu: num_entries=%u\n", 537 1.1 haad obj, zap->zap_m.zap_num_entries); 538 1.1 haad *zapp = zap; 539 1.4 chs int err = mzap_upgrade(zapp, tag, tx, 0); 540 1.4 chs if (err != 0) 541 1.4 chs rw_exit(&zap->zap_rwlock); 542 1.4 chs return (err); 543 1.1 haad } 544 1.4 chs VERIFY0(dmu_object_set_blocksize(os, obj, newsz, 0, tx)); 545 1.1 haad zap->zap_m.zap_num_chunks = 546 1.1 haad db->db_size / MZAP_ENT_LEN - 1; 547 1.1 haad } 548 1.1 haad 549 1.1 haad *zapp = zap; 550 1.1 haad return (0); 551 1.1 haad } 552 1.1 haad 553 1.4 chs static int 554 1.4 chs zap_lockdir_by_dnode(dnode_t *dn, dmu_tx_t *tx, 555 1.4 chs krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp) 556 1.4 chs { 557 1.4 chs dmu_buf_t *db; 558 1.4 chs int err; 559 1.4 chs 560 1.4 chs err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH); 561 1.4 chs if (err != 0) { 562 1.4 chs return (err); 563 1.4 chs } 564 1.4 chs err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp); 565 1.4 chs if (err != 0) { 566 1.4 chs dmu_buf_rele(db, tag); 567 1.4 chs } 568 1.4 chs return (err); 569 1.4 chs } 570 1.4 chs 571 1.4 chs int 572 1.4 chs zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx, 573 1.4 chs krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp) 574 1.4 chs { 575 1.4 chs dmu_buf_t *db; 576 1.4 chs int err; 577 1.4 chs 578 1.4 chs err = dmu_buf_hold(os, obj, 0, tag, &db, DMU_READ_NO_PREFETCH); 579 1.4 chs if (err != 0) 580 1.4 chs return (err); 581 1.4 chs err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp); 582 1.4 chs if (err != 0) 583 1.4 chs dmu_buf_rele(db, tag); 584 1.4 chs return (err); 585 1.4 chs } 586 1.4 chs 587 1.1 haad void 588 1.4 chs zap_unlockdir(zap_t *zap, void *tag) 589 1.1 haad { 590 1.1 haad rw_exit(&zap->zap_rwlock); 591 1.4 chs dmu_buf_rele(zap->zap_dbuf, tag); 592 1.1 haad } 593 1.1 haad 594 1.1 haad static int 595 1.4 chs mzap_upgrade(zap_t **zapp, void *tag, dmu_tx_t *tx, zap_flags_t flags) 596 1.1 haad { 597 1.1 haad mzap_phys_t *mzp; 598 1.3 haad int i, sz, nchunks; 599 1.3 haad int err = 0; 600 1.1 haad zap_t *zap = *zapp; 601 1.1 haad 602 1.1 haad ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 603 1.1 haad 604 1.1 haad sz = zap->zap_dbuf->db_size; 605 1.4 chs mzp = zio_buf_alloc(sz); 606 1.1 haad bcopy(zap->zap_dbuf->db_data, mzp, sz); 607 1.1 haad nchunks = zap->zap_m.zap_num_chunks; 608 1.1 haad 609 1.3 haad if (!flags) { 610 1.3 haad err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object, 611 1.3 haad 1ULL << fzap_default_block_shift, 0, tx); 612 1.3 haad if (err) { 613 1.4 chs zio_buf_free(mzp, sz); 614 1.3 haad return (err); 615 1.3 haad } 616 1.1 haad } 617 1.1 haad 618 1.1 haad dprintf("upgrading obj=%llu with %u chunks\n", 619 1.1 haad zap->zap_object, nchunks); 620 1.1 haad /* XXX destroy the avl later, so we can use the stored hash value */ 621 1.1 haad mze_destroy(zap); 622 1.1 haad 623 1.3 haad fzap_upgrade(zap, tx, flags); 624 1.1 haad 625 1.1 haad for (i = 0; i < nchunks; i++) { 626 1.1 haad mzap_ent_phys_t *mze = &mzp->mz_chunk[i]; 627 1.1 haad zap_name_t *zn; 628 1.1 haad if (mze->mze_name[0] == 0) 629 1.1 haad continue; 630 1.1 haad dprintf("adding %s=%llu\n", 631 1.1 haad mze->mze_name, mze->mze_value); 632 1.1 haad zn = zap_name_alloc(zap, mze->mze_name, MT_EXACT); 633 1.4 chs err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd, 634 1.4 chs tag, tx); 635 1.1 haad zap = zn->zn_zap; /* fzap_add_cd() may change zap */ 636 1.1 haad zap_name_free(zn); 637 1.1 haad if (err) 638 1.1 haad break; 639 1.1 haad } 640 1.4 chs zio_buf_free(mzp, sz); 641 1.1 haad *zapp = zap; 642 1.1 haad return (err); 643 1.1 haad } 644 1.1 haad 645 1.4 chs void 646 1.3 haad mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags, 647 1.3 haad dmu_tx_t *tx) 648 1.1 haad { 649 1.1 haad dmu_buf_t *db; 650 1.1 haad mzap_phys_t *zp; 651 1.1 haad 652 1.4 chs VERIFY(0 == dmu_buf_hold(os, obj, 0, FTAG, &db, DMU_READ_NO_PREFETCH)); 653 1.1 haad 654 1.1 haad #ifdef ZFS_DEBUG 655 1.1 haad { 656 1.1 haad dmu_object_info_t doi; 657 1.1 haad dmu_object_info_from_db(db, &doi); 658 1.4 chs ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP); 659 1.1 haad } 660 1.1 haad #endif 661 1.1 haad 662 1.1 haad dmu_buf_will_dirty(db, tx); 663 1.1 haad zp = db->db_data; 664 1.1 haad zp->mz_block_type = ZBT_MICRO; 665 1.1 haad zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL; 666 1.1 haad zp->mz_normflags = normflags; 667 1.1 haad dmu_buf_rele(db, FTAG); 668 1.3 haad 669 1.3 haad if (flags != 0) { 670 1.3 haad zap_t *zap; 671 1.3 haad /* Only fat zap supports flags; upgrade immediately. */ 672 1.3 haad VERIFY(0 == zap_lockdir(os, obj, tx, RW_WRITER, 673 1.4 chs B_FALSE, B_FALSE, FTAG, &zap)); 674 1.4 chs VERIFY3U(0, ==, mzap_upgrade(&zap, FTAG, tx, flags)); 675 1.4 chs zap_unlockdir(zap, FTAG); 676 1.3 haad } 677 1.1 haad } 678 1.1 haad 679 1.1 haad int 680 1.1 haad zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot, 681 1.1 haad dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 682 1.1 haad { 683 1.1 haad return (zap_create_claim_norm(os, obj, 684 1.1 haad 0, ot, bonustype, bonuslen, tx)); 685 1.1 haad } 686 1.1 haad 687 1.1 haad int 688 1.1 haad zap_create_claim_norm(objset_t *os, uint64_t obj, int normflags, 689 1.1 haad dmu_object_type_t ot, 690 1.1 haad dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 691 1.1 haad { 692 1.1 haad int err; 693 1.1 haad 694 1.1 haad err = dmu_object_claim(os, obj, ot, 0, bonustype, bonuslen, tx); 695 1.1 haad if (err != 0) 696 1.1 haad return (err); 697 1.3 haad mzap_create_impl(os, obj, normflags, 0, tx); 698 1.1 haad return (0); 699 1.1 haad } 700 1.1 haad 701 1.1 haad uint64_t 702 1.1 haad zap_create(objset_t *os, dmu_object_type_t ot, 703 1.1 haad dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 704 1.1 haad { 705 1.1 haad return (zap_create_norm(os, 0, ot, bonustype, bonuslen, tx)); 706 1.1 haad } 707 1.1 haad 708 1.1 haad uint64_t 709 1.1 haad zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot, 710 1.1 haad dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 711 1.1 haad { 712 1.1 haad uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); 713 1.1 haad 714 1.3 haad mzap_create_impl(os, obj, normflags, 0, tx); 715 1.3 haad return (obj); 716 1.3 haad } 717 1.3 haad 718 1.3 haad uint64_t 719 1.3 haad zap_create_flags(objset_t *os, int normflags, zap_flags_t flags, 720 1.3 haad dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, 721 1.3 haad dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 722 1.3 haad { 723 1.3 haad uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); 724 1.3 haad 725 1.3 haad ASSERT(leaf_blockshift >= SPA_MINBLOCKSHIFT && 726 1.4 chs leaf_blockshift <= SPA_OLD_MAXBLOCKSHIFT && 727 1.3 haad indirect_blockshift >= SPA_MINBLOCKSHIFT && 728 1.4 chs indirect_blockshift <= SPA_OLD_MAXBLOCKSHIFT); 729 1.3 haad 730 1.3 haad VERIFY(dmu_object_set_blocksize(os, obj, 731 1.3 haad 1ULL << leaf_blockshift, indirect_blockshift, tx) == 0); 732 1.3 haad 733 1.3 haad mzap_create_impl(os, obj, normflags, flags, tx); 734 1.1 haad return (obj); 735 1.1 haad } 736 1.1 haad 737 1.1 haad int 738 1.1 haad zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx) 739 1.1 haad { 740 1.1 haad /* 741 1.1 haad * dmu_object_free will free the object number and free the 742 1.1 haad * data. Freeing the data will cause our pageout function to be 743 1.1 haad * called, which will destroy our data (zap_leaf_t's and zap_t). 744 1.1 haad */ 745 1.1 haad 746 1.1 haad return (dmu_object_free(os, zapobj, tx)); 747 1.1 haad } 748 1.1 haad 749 1.1 haad void 750 1.4 chs zap_evict_sync(void *dbu) 751 1.1 haad { 752 1.4 chs zap_t *zap = dbu; 753 1.1 haad 754 1.1 haad rw_destroy(&zap->zap_rwlock); 755 1.1 haad 756 1.1 haad if (zap->zap_ismicro) 757 1.1 haad mze_destroy(zap); 758 1.1 haad else 759 1.1 haad mutex_destroy(&zap->zap_f.zap_num_entries_mtx); 760 1.1 haad 761 1.1 haad kmem_free(zap, sizeof (zap_t)); 762 1.1 haad } 763 1.1 haad 764 1.1 haad int 765 1.1 haad zap_count(objset_t *os, uint64_t zapobj, uint64_t *count) 766 1.1 haad { 767 1.1 haad zap_t *zap; 768 1.1 haad int err; 769 1.1 haad 770 1.4 chs err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); 771 1.1 haad if (err) 772 1.1 haad return (err); 773 1.1 haad if (!zap->zap_ismicro) { 774 1.1 haad err = fzap_count(zap, count); 775 1.1 haad } else { 776 1.1 haad *count = zap->zap_m.zap_num_entries; 777 1.1 haad } 778 1.4 chs zap_unlockdir(zap, FTAG); 779 1.1 haad return (err); 780 1.1 haad } 781 1.1 haad 782 1.1 haad /* 783 1.1 haad * zn may be NULL; if not specified, it will be computed if needed. 784 1.1 haad * See also the comment above zap_entry_normalization_conflict(). 785 1.1 haad */ 786 1.1 haad static boolean_t 787 1.1 haad mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze) 788 1.1 haad { 789 1.1 haad mzap_ent_t *other; 790 1.1 haad int direction = AVL_BEFORE; 791 1.1 haad boolean_t allocdzn = B_FALSE; 792 1.1 haad 793 1.1 haad if (zap->zap_normflags == 0) 794 1.1 haad return (B_FALSE); 795 1.1 haad 796 1.1 haad again: 797 1.1 haad for (other = avl_walk(&zap->zap_m.zap_avl, mze, direction); 798 1.1 haad other && other->mze_hash == mze->mze_hash; 799 1.1 haad other = avl_walk(&zap->zap_m.zap_avl, other, direction)) { 800 1.1 haad 801 1.1 haad if (zn == NULL) { 802 1.4 chs zn = zap_name_alloc(zap, MZE_PHYS(zap, mze)->mze_name, 803 1.1 haad MT_FIRST); 804 1.1 haad allocdzn = B_TRUE; 805 1.1 haad } 806 1.4 chs if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) { 807 1.1 haad if (allocdzn) 808 1.1 haad zap_name_free(zn); 809 1.1 haad return (B_TRUE); 810 1.1 haad } 811 1.1 haad } 812 1.1 haad 813 1.1 haad if (direction == AVL_BEFORE) { 814 1.1 haad direction = AVL_AFTER; 815 1.1 haad goto again; 816 1.1 haad } 817 1.1 haad 818 1.1 haad if (allocdzn) 819 1.1 haad zap_name_free(zn); 820 1.1 haad return (B_FALSE); 821 1.1 haad } 822 1.1 haad 823 1.1 haad /* 824 1.1 haad * Routines for manipulating attributes. 825 1.1 haad */ 826 1.1 haad 827 1.1 haad int 828 1.1 haad zap_lookup(objset_t *os, uint64_t zapobj, const char *name, 829 1.1 haad uint64_t integer_size, uint64_t num_integers, void *buf) 830 1.1 haad { 831 1.1 haad return (zap_lookup_norm(os, zapobj, name, integer_size, 832 1.1 haad num_integers, buf, MT_EXACT, NULL, 0, NULL)); 833 1.1 haad } 834 1.1 haad 835 1.4 chs static int 836 1.4 chs zap_lookup_impl(zap_t *zap, const char *name, 837 1.1 haad uint64_t integer_size, uint64_t num_integers, void *buf, 838 1.1 haad matchtype_t mt, char *realname, int rn_len, 839 1.1 haad boolean_t *ncp) 840 1.1 haad { 841 1.4 chs int err = 0; 842 1.1 haad mzap_ent_t *mze; 843 1.1 haad zap_name_t *zn; 844 1.1 haad 845 1.1 haad zn = zap_name_alloc(zap, name, mt); 846 1.4 chs if (zn == NULL) 847 1.4 chs return (SET_ERROR(ENOTSUP)); 848 1.1 haad 849 1.1 haad if (!zap->zap_ismicro) { 850 1.1 haad err = fzap_lookup(zn, integer_size, num_integers, buf, 851 1.1 haad realname, rn_len, ncp); 852 1.1 haad } else { 853 1.1 haad mze = mze_find(zn); 854 1.1 haad if (mze == NULL) { 855 1.4 chs err = SET_ERROR(ENOENT); 856 1.1 haad } else { 857 1.1 haad if (num_integers < 1) { 858 1.4 chs err = SET_ERROR(EOVERFLOW); 859 1.1 haad } else if (integer_size != 8) { 860 1.4 chs err = SET_ERROR(EINVAL); 861 1.1 haad } else { 862 1.4 chs *(uint64_t *)buf = 863 1.4 chs MZE_PHYS(zap, mze)->mze_value; 864 1.2 haad if (realname != NULL) 865 1.2 haad (void) strlcpy(realname, 866 1.4 chs MZE_PHYS(zap, mze)->mze_name, rn_len); 867 1.1 haad if (ncp) { 868 1.1 haad *ncp = mzap_normalization_conflict(zap, 869 1.1 haad zn, mze); 870 1.1 haad } 871 1.1 haad } 872 1.1 haad } 873 1.1 haad } 874 1.1 haad zap_name_free(zn); 875 1.4 chs return (err); 876 1.4 chs } 877 1.4 chs 878 1.4 chs int 879 1.4 chs zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name, 880 1.4 chs uint64_t integer_size, uint64_t num_integers, void *buf, 881 1.4 chs matchtype_t mt, char *realname, int rn_len, 882 1.4 chs boolean_t *ncp) 883 1.4 chs { 884 1.4 chs zap_t *zap; 885 1.4 chs int err; 886 1.4 chs 887 1.4 chs err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); 888 1.4 chs if (err != 0) 889 1.4 chs return (err); 890 1.4 chs err = zap_lookup_impl(zap, name, integer_size, 891 1.4 chs num_integers, buf, mt, realname, rn_len, ncp); 892 1.4 chs zap_unlockdir(zap, FTAG); 893 1.4 chs return (err); 894 1.4 chs } 895 1.4 chs 896 1.4 chs int 897 1.4 chs zap_lookup_by_dnode(dnode_t *dn, const char *name, 898 1.4 chs uint64_t integer_size, uint64_t num_integers, void *buf) 899 1.4 chs { 900 1.4 chs return (zap_lookup_norm_by_dnode(dn, name, integer_size, 901 1.4 chs num_integers, buf, MT_EXACT, NULL, 0, NULL)); 902 1.4 chs } 903 1.4 chs 904 1.4 chs int 905 1.4 chs zap_lookup_norm_by_dnode(dnode_t *dn, const char *name, 906 1.4 chs uint64_t integer_size, uint64_t num_integers, void *buf, 907 1.4 chs matchtype_t mt, char *realname, int rn_len, 908 1.4 chs boolean_t *ncp) 909 1.4 chs { 910 1.4 chs zap_t *zap; 911 1.4 chs int err; 912 1.4 chs 913 1.4 chs err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE, 914 1.4 chs FTAG, &zap); 915 1.4 chs if (err != 0) 916 1.4 chs return (err); 917 1.4 chs err = zap_lookup_impl(zap, name, integer_size, 918 1.4 chs num_integers, buf, mt, realname, rn_len, ncp); 919 1.4 chs zap_unlockdir(zap, FTAG); 920 1.4 chs return (err); 921 1.4 chs } 922 1.4 chs 923 1.4 chs int 924 1.4 chs zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 925 1.4 chs int key_numints) 926 1.4 chs { 927 1.4 chs zap_t *zap; 928 1.4 chs int err; 929 1.4 chs zap_name_t *zn; 930 1.4 chs 931 1.4 chs err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); 932 1.4 chs if (err) 933 1.4 chs return (err); 934 1.4 chs zn = zap_name_alloc_uint64(zap, key, key_numints); 935 1.4 chs if (zn == NULL) { 936 1.4 chs zap_unlockdir(zap, FTAG); 937 1.4 chs return (SET_ERROR(ENOTSUP)); 938 1.4 chs } 939 1.4 chs 940 1.4 chs fzap_prefetch(zn); 941 1.4 chs zap_name_free(zn); 942 1.4 chs zap_unlockdir(zap, FTAG); 943 1.1 haad return (err); 944 1.1 haad } 945 1.1 haad 946 1.1 haad int 947 1.3 haad zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 948 1.3 haad int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf) 949 1.3 haad { 950 1.3 haad zap_t *zap; 951 1.3 haad int err; 952 1.3 haad zap_name_t *zn; 953 1.3 haad 954 1.4 chs err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); 955 1.3 haad if (err) 956 1.3 haad return (err); 957 1.3 haad zn = zap_name_alloc_uint64(zap, key, key_numints); 958 1.3 haad if (zn == NULL) { 959 1.4 chs zap_unlockdir(zap, FTAG); 960 1.4 chs return (SET_ERROR(ENOTSUP)); 961 1.3 haad } 962 1.3 haad 963 1.3 haad err = fzap_lookup(zn, integer_size, num_integers, buf, 964 1.3 haad NULL, 0, NULL); 965 1.3 haad zap_name_free(zn); 966 1.4 chs zap_unlockdir(zap, FTAG); 967 1.3 haad return (err); 968 1.3 haad } 969 1.3 haad 970 1.3 haad int 971 1.3 haad zap_contains(objset_t *os, uint64_t zapobj, const char *name) 972 1.3 haad { 973 1.4 chs int err = zap_lookup_norm(os, zapobj, name, 0, 974 1.4 chs 0, NULL, MT_EXACT, NULL, 0, NULL); 975 1.3 haad if (err == EOVERFLOW || err == EINVAL) 976 1.3 haad err = 0; /* found, but skipped reading the value */ 977 1.3 haad return (err); 978 1.3 haad } 979 1.3 haad 980 1.3 haad int 981 1.1 haad zap_length(objset_t *os, uint64_t zapobj, const char *name, 982 1.1 haad uint64_t *integer_size, uint64_t *num_integers) 983 1.1 haad { 984 1.1 haad zap_t *zap; 985 1.1 haad int err; 986 1.1 haad mzap_ent_t *mze; 987 1.1 haad zap_name_t *zn; 988 1.1 haad 989 1.4 chs err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); 990 1.1 haad if (err) 991 1.1 haad return (err); 992 1.1 haad zn = zap_name_alloc(zap, name, MT_EXACT); 993 1.1 haad if (zn == NULL) { 994 1.4 chs zap_unlockdir(zap, FTAG); 995 1.4 chs return (SET_ERROR(ENOTSUP)); 996 1.1 haad } 997 1.1 haad if (!zap->zap_ismicro) { 998 1.1 haad err = fzap_length(zn, integer_size, num_integers); 999 1.1 haad } else { 1000 1.1 haad mze = mze_find(zn); 1001 1.1 haad if (mze == NULL) { 1002 1.4 chs err = SET_ERROR(ENOENT); 1003 1.1 haad } else { 1004 1.1 haad if (integer_size) 1005 1.1 haad *integer_size = 8; 1006 1.1 haad if (num_integers) 1007 1.1 haad *num_integers = 1; 1008 1.1 haad } 1009 1.1 haad } 1010 1.1 haad zap_name_free(zn); 1011 1.4 chs zap_unlockdir(zap, FTAG); 1012 1.1 haad return (err); 1013 1.1 haad } 1014 1.1 haad 1015 1.3 haad int 1016 1.3 haad zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 1017 1.3 haad int key_numints, uint64_t *integer_size, uint64_t *num_integers) 1018 1.3 haad { 1019 1.3 haad zap_t *zap; 1020 1.3 haad int err; 1021 1.3 haad zap_name_t *zn; 1022 1.3 haad 1023 1.4 chs err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); 1024 1.3 haad if (err) 1025 1.3 haad return (err); 1026 1.3 haad zn = zap_name_alloc_uint64(zap, key, key_numints); 1027 1.3 haad if (zn == NULL) { 1028 1.4 chs zap_unlockdir(zap, FTAG); 1029 1.4 chs return (SET_ERROR(ENOTSUP)); 1030 1.3 haad } 1031 1.3 haad err = fzap_length(zn, integer_size, num_integers); 1032 1.3 haad zap_name_free(zn); 1033 1.4 chs zap_unlockdir(zap, FTAG); 1034 1.3 haad return (err); 1035 1.3 haad } 1036 1.3 haad 1037 1.1 haad static void 1038 1.1 haad mzap_addent(zap_name_t *zn, uint64_t value) 1039 1.1 haad { 1040 1.1 haad int i; 1041 1.1 haad zap_t *zap = zn->zn_zap; 1042 1.1 haad int start = zap->zap_m.zap_alloc_next; 1043 1.1 haad uint32_t cd; 1044 1.1 haad 1045 1.1 haad ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 1046 1.1 haad 1047 1.1 haad #ifdef ZFS_DEBUG 1048 1.1 haad for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { 1049 1.4 chs mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i]; 1050 1.3 haad ASSERT(strcmp(zn->zn_key_orig, mze->mze_name) != 0); 1051 1.1 haad } 1052 1.1 haad #endif 1053 1.1 haad 1054 1.1 haad cd = mze_find_unused_cd(zap, zn->zn_hash); 1055 1.1 haad /* given the limited size of the microzap, this can't happen */ 1056 1.3 haad ASSERT(cd < zap_maxcd(zap)); 1057 1.1 haad 1058 1.1 haad again: 1059 1.1 haad for (i = start; i < zap->zap_m.zap_num_chunks; i++) { 1060 1.4 chs mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i]; 1061 1.1 haad if (mze->mze_name[0] == 0) { 1062 1.1 haad mze->mze_value = value; 1063 1.1 haad mze->mze_cd = cd; 1064 1.3 haad (void) strcpy(mze->mze_name, zn->zn_key_orig); 1065 1.1 haad zap->zap_m.zap_num_entries++; 1066 1.1 haad zap->zap_m.zap_alloc_next = i+1; 1067 1.1 haad if (zap->zap_m.zap_alloc_next == 1068 1.1 haad zap->zap_m.zap_num_chunks) 1069 1.1 haad zap->zap_m.zap_alloc_next = 0; 1070 1.4 chs VERIFY(0 == mze_insert(zap, i, zn->zn_hash)); 1071 1.1 haad return; 1072 1.1 haad } 1073 1.1 haad } 1074 1.1 haad if (start != 0) { 1075 1.1 haad start = 0; 1076 1.1 haad goto again; 1077 1.1 haad } 1078 1.1 haad ASSERT(!"out of entries!"); 1079 1.1 haad } 1080 1.1 haad 1081 1.1 haad int 1082 1.3 haad zap_add(objset_t *os, uint64_t zapobj, const char *key, 1083 1.1 haad int integer_size, uint64_t num_integers, 1084 1.1 haad const void *val, dmu_tx_t *tx) 1085 1.1 haad { 1086 1.1 haad zap_t *zap; 1087 1.1 haad int err; 1088 1.1 haad mzap_ent_t *mze; 1089 1.1 haad const uint64_t *intval = val; 1090 1.1 haad zap_name_t *zn; 1091 1.1 haad 1092 1.4 chs err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap); 1093 1.1 haad if (err) 1094 1.1 haad return (err); 1095 1.3 haad zn = zap_name_alloc(zap, key, MT_EXACT); 1096 1.1 haad if (zn == NULL) { 1097 1.4 chs zap_unlockdir(zap, FTAG); 1098 1.4 chs return (SET_ERROR(ENOTSUP)); 1099 1.1 haad } 1100 1.1 haad if (!zap->zap_ismicro) { 1101 1.4 chs err = fzap_add(zn, integer_size, num_integers, val, FTAG, tx); 1102 1.1 haad zap = zn->zn_zap; /* fzap_add() may change zap */ 1103 1.1 haad } else if (integer_size != 8 || num_integers != 1 || 1104 1.3 haad strlen(key) >= MZAP_NAME_LEN) { 1105 1.4 chs err = mzap_upgrade(&zn->zn_zap, FTAG, tx, 0); 1106 1.4 chs if (err == 0) { 1107 1.4 chs err = fzap_add(zn, integer_size, num_integers, val, 1108 1.4 chs FTAG, tx); 1109 1.4 chs } 1110 1.1 haad zap = zn->zn_zap; /* fzap_add() may change zap */ 1111 1.1 haad } else { 1112 1.1 haad mze = mze_find(zn); 1113 1.1 haad if (mze != NULL) { 1114 1.4 chs err = SET_ERROR(EEXIST); 1115 1.1 haad } else { 1116 1.1 haad mzap_addent(zn, *intval); 1117 1.1 haad } 1118 1.1 haad } 1119 1.1 haad ASSERT(zap == zn->zn_zap); 1120 1.1 haad zap_name_free(zn); 1121 1.1 haad if (zap != NULL) /* may be NULL if fzap_add() failed */ 1122 1.4 chs zap_unlockdir(zap, FTAG); 1123 1.1 haad return (err); 1124 1.1 haad } 1125 1.1 haad 1126 1.1 haad int 1127 1.3 haad zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 1128 1.3 haad int key_numints, int integer_size, uint64_t num_integers, 1129 1.3 haad const void *val, dmu_tx_t *tx) 1130 1.3 haad { 1131 1.3 haad zap_t *zap; 1132 1.3 haad int err; 1133 1.3 haad zap_name_t *zn; 1134 1.3 haad 1135 1.4 chs err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap); 1136 1.3 haad if (err) 1137 1.3 haad return (err); 1138 1.3 haad zn = zap_name_alloc_uint64(zap, key, key_numints); 1139 1.3 haad if (zn == NULL) { 1140 1.4 chs zap_unlockdir(zap, FTAG); 1141 1.4 chs return (SET_ERROR(ENOTSUP)); 1142 1.3 haad } 1143 1.4 chs err = fzap_add(zn, integer_size, num_integers, val, FTAG, tx); 1144 1.3 haad zap = zn->zn_zap; /* fzap_add() may change zap */ 1145 1.3 haad zap_name_free(zn); 1146 1.3 haad if (zap != NULL) /* may be NULL if fzap_add() failed */ 1147 1.4 chs zap_unlockdir(zap, FTAG); 1148 1.3 haad return (err); 1149 1.3 haad } 1150 1.3 haad 1151 1.3 haad int 1152 1.1 haad zap_update(objset_t *os, uint64_t zapobj, const char *name, 1153 1.1 haad int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) 1154 1.1 haad { 1155 1.1 haad zap_t *zap; 1156 1.1 haad mzap_ent_t *mze; 1157 1.4 chs uint64_t oldval; 1158 1.1 haad const uint64_t *intval = val; 1159 1.1 haad zap_name_t *zn; 1160 1.1 haad int err; 1161 1.1 haad 1162 1.4 chs #ifdef ZFS_DEBUG 1163 1.4 chs /* 1164 1.4 chs * If there is an old value, it shouldn't change across the 1165 1.4 chs * lockdir (eg, due to bprewrite's xlation). 1166 1.4 chs */ 1167 1.4 chs if (integer_size == 8 && num_integers == 1) 1168 1.4 chs (void) zap_lookup(os, zapobj, name, 8, 1, &oldval); 1169 1.4 chs #endif 1170 1.4 chs 1171 1.4 chs err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap); 1172 1.1 haad if (err) 1173 1.1 haad return (err); 1174 1.1 haad zn = zap_name_alloc(zap, name, MT_EXACT); 1175 1.1 haad if (zn == NULL) { 1176 1.4 chs zap_unlockdir(zap, FTAG); 1177 1.4 chs return (SET_ERROR(ENOTSUP)); 1178 1.1 haad } 1179 1.1 haad if (!zap->zap_ismicro) { 1180 1.4 chs err = fzap_update(zn, integer_size, num_integers, val, 1181 1.4 chs FTAG, tx); 1182 1.1 haad zap = zn->zn_zap; /* fzap_update() may change zap */ 1183 1.1 haad } else if (integer_size != 8 || num_integers != 1 || 1184 1.1 haad strlen(name) >= MZAP_NAME_LEN) { 1185 1.1 haad dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n", 1186 1.1 haad zapobj, integer_size, num_integers, name); 1187 1.4 chs err = mzap_upgrade(&zn->zn_zap, FTAG, tx, 0); 1188 1.4 chs if (err == 0) { 1189 1.1 haad err = fzap_update(zn, integer_size, num_integers, 1190 1.4 chs val, FTAG, tx); 1191 1.4 chs } 1192 1.1 haad zap = zn->zn_zap; /* fzap_update() may change zap */ 1193 1.1 haad } else { 1194 1.1 haad mze = mze_find(zn); 1195 1.1 haad if (mze != NULL) { 1196 1.4 chs ASSERT3U(MZE_PHYS(zap, mze)->mze_value, ==, oldval); 1197 1.4 chs MZE_PHYS(zap, mze)->mze_value = *intval; 1198 1.1 haad } else { 1199 1.1 haad mzap_addent(zn, *intval); 1200 1.1 haad } 1201 1.1 haad } 1202 1.1 haad ASSERT(zap == zn->zn_zap); 1203 1.1 haad zap_name_free(zn); 1204 1.1 haad if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ 1205 1.4 chs zap_unlockdir(zap, FTAG); 1206 1.1 haad return (err); 1207 1.1 haad } 1208 1.1 haad 1209 1.1 haad int 1210 1.3 haad zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 1211 1.3 haad int key_numints, 1212 1.3 haad int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) 1213 1.3 haad { 1214 1.3 haad zap_t *zap; 1215 1.3 haad zap_name_t *zn; 1216 1.3 haad int err; 1217 1.3 haad 1218 1.4 chs err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap); 1219 1.3 haad if (err) 1220 1.3 haad return (err); 1221 1.3 haad zn = zap_name_alloc_uint64(zap, key, key_numints); 1222 1.3 haad if (zn == NULL) { 1223 1.4 chs zap_unlockdir(zap, FTAG); 1224 1.4 chs return (SET_ERROR(ENOTSUP)); 1225 1.3 haad } 1226 1.4 chs err = fzap_update(zn, integer_size, num_integers, val, FTAG, tx); 1227 1.3 haad zap = zn->zn_zap; /* fzap_update() may change zap */ 1228 1.3 haad zap_name_free(zn); 1229 1.3 haad if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ 1230 1.4 chs zap_unlockdir(zap, FTAG); 1231 1.3 haad return (err); 1232 1.3 haad } 1233 1.3 haad 1234 1.3 haad int 1235 1.1 haad zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx) 1236 1.1 haad { 1237 1.1 haad return (zap_remove_norm(os, zapobj, name, MT_EXACT, tx)); 1238 1.1 haad } 1239 1.1 haad 1240 1.1 haad int 1241 1.1 haad zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name, 1242 1.1 haad matchtype_t mt, dmu_tx_t *tx) 1243 1.1 haad { 1244 1.1 haad zap_t *zap; 1245 1.1 haad int err; 1246 1.1 haad mzap_ent_t *mze; 1247 1.1 haad zap_name_t *zn; 1248 1.1 haad 1249 1.4 chs err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap); 1250 1.1 haad if (err) 1251 1.1 haad return (err); 1252 1.1 haad zn = zap_name_alloc(zap, name, mt); 1253 1.1 haad if (zn == NULL) { 1254 1.4 chs zap_unlockdir(zap, FTAG); 1255 1.4 chs return (SET_ERROR(ENOTSUP)); 1256 1.1 haad } 1257 1.1 haad if (!zap->zap_ismicro) { 1258 1.1 haad err = fzap_remove(zn, tx); 1259 1.1 haad } else { 1260 1.1 haad mze = mze_find(zn); 1261 1.1 haad if (mze == NULL) { 1262 1.4 chs err = SET_ERROR(ENOENT); 1263 1.1 haad } else { 1264 1.1 haad zap->zap_m.zap_num_entries--; 1265 1.4 chs bzero(&zap_m_phys(zap)->mz_chunk[mze->mze_chunkid], 1266 1.1 haad sizeof (mzap_ent_phys_t)); 1267 1.1 haad mze_remove(zap, mze); 1268 1.1 haad } 1269 1.1 haad } 1270 1.1 haad zap_name_free(zn); 1271 1.4 chs zap_unlockdir(zap, FTAG); 1272 1.1 haad return (err); 1273 1.1 haad } 1274 1.1 haad 1275 1.3 haad int 1276 1.3 haad zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 1277 1.3 haad int key_numints, dmu_tx_t *tx) 1278 1.3 haad { 1279 1.3 haad zap_t *zap; 1280 1.3 haad int err; 1281 1.3 haad zap_name_t *zn; 1282 1.3 haad 1283 1.4 chs err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap); 1284 1.3 haad if (err) 1285 1.3 haad return (err); 1286 1.3 haad zn = zap_name_alloc_uint64(zap, key, key_numints); 1287 1.3 haad if (zn == NULL) { 1288 1.4 chs zap_unlockdir(zap, FTAG); 1289 1.4 chs return (SET_ERROR(ENOTSUP)); 1290 1.3 haad } 1291 1.3 haad err = fzap_remove(zn, tx); 1292 1.3 haad zap_name_free(zn); 1293 1.4 chs zap_unlockdir(zap, FTAG); 1294 1.3 haad return (err); 1295 1.3 haad } 1296 1.3 haad 1297 1.1 haad /* 1298 1.1 haad * Routines for iterating over the attributes. 1299 1.1 haad */ 1300 1.1 haad 1301 1.1 haad void 1302 1.1 haad zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, 1303 1.1 haad uint64_t serialized) 1304 1.1 haad { 1305 1.1 haad zc->zc_objset = os; 1306 1.1 haad zc->zc_zap = NULL; 1307 1.1 haad zc->zc_leaf = NULL; 1308 1.1 haad zc->zc_zapobj = zapobj; 1309 1.3 haad zc->zc_serialized = serialized; 1310 1.3 haad zc->zc_hash = 0; 1311 1.3 haad zc->zc_cd = 0; 1312 1.1 haad } 1313 1.1 haad 1314 1.1 haad void 1315 1.1 haad zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj) 1316 1.1 haad { 1317 1.1 haad zap_cursor_init_serialized(zc, os, zapobj, 0); 1318 1.1 haad } 1319 1.1 haad 1320 1.1 haad void 1321 1.1 haad zap_cursor_fini(zap_cursor_t *zc) 1322 1.1 haad { 1323 1.1 haad if (zc->zc_zap) { 1324 1.1 haad rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); 1325 1.4 chs zap_unlockdir(zc->zc_zap, NULL); 1326 1.1 haad zc->zc_zap = NULL; 1327 1.1 haad } 1328 1.1 haad if (zc->zc_leaf) { 1329 1.1 haad rw_enter(&zc->zc_leaf->l_rwlock, RW_READER); 1330 1.1 haad zap_put_leaf(zc->zc_leaf); 1331 1.1 haad zc->zc_leaf = NULL; 1332 1.1 haad } 1333 1.1 haad zc->zc_objset = NULL; 1334 1.1 haad } 1335 1.1 haad 1336 1.1 haad uint64_t 1337 1.1 haad zap_cursor_serialize(zap_cursor_t *zc) 1338 1.1 haad { 1339 1.1 haad if (zc->zc_hash == -1ULL) 1340 1.1 haad return (-1ULL); 1341 1.3 haad if (zc->zc_zap == NULL) 1342 1.3 haad return (zc->zc_serialized); 1343 1.3 haad ASSERT((zc->zc_hash & zap_maxcd(zc->zc_zap)) == 0); 1344 1.3 haad ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap)); 1345 1.3 haad 1346 1.3 haad /* 1347 1.3 haad * We want to keep the high 32 bits of the cursor zero if we can, so 1348 1.3 haad * that 32-bit programs can access this. So usually use a small 1349 1.3 haad * (28-bit) hash value so we can fit 4 bits of cd into the low 32-bits 1350 1.3 haad * of the cursor. 1351 1.3 haad * 1352 1.3 haad * [ collision differentiator | zap_hashbits()-bit hash value ] 1353 1.3 haad */ 1354 1.3 haad return ((zc->zc_hash >> (64 - zap_hashbits(zc->zc_zap))) | 1355 1.3 haad ((uint64_t)zc->zc_cd << zap_hashbits(zc->zc_zap))); 1356 1.1 haad } 1357 1.1 haad 1358 1.1 haad int 1359 1.1 haad zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) 1360 1.1 haad { 1361 1.1 haad int err; 1362 1.1 haad avl_index_t idx; 1363 1.1 haad mzap_ent_t mze_tofind; 1364 1.1 haad mzap_ent_t *mze; 1365 1.1 haad 1366 1.1 haad if (zc->zc_hash == -1ULL) 1367 1.4 chs return (SET_ERROR(ENOENT)); 1368 1.1 haad 1369 1.1 haad if (zc->zc_zap == NULL) { 1370 1.3 haad int hb; 1371 1.1 haad err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL, 1372 1.4 chs RW_READER, TRUE, FALSE, NULL, &zc->zc_zap); 1373 1.1 haad if (err) 1374 1.1 haad return (err); 1375 1.3 haad 1376 1.3 haad /* 1377 1.3 haad * To support zap_cursor_init_serialized, advance, retrieve, 1378 1.3 haad * we must add to the existing zc_cd, which may already 1379 1.3 haad * be 1 due to the zap_cursor_advance. 1380 1.3 haad */ 1381 1.3 haad ASSERT(zc->zc_hash == 0); 1382 1.3 haad hb = zap_hashbits(zc->zc_zap); 1383 1.3 haad zc->zc_hash = zc->zc_serialized << (64 - hb); 1384 1.3 haad zc->zc_cd += zc->zc_serialized >> hb; 1385 1.3 haad if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */ 1386 1.3 haad zc->zc_cd = 0; 1387 1.1 haad } else { 1388 1.1 haad rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); 1389 1.1 haad } 1390 1.1 haad if (!zc->zc_zap->zap_ismicro) { 1391 1.1 haad err = fzap_cursor_retrieve(zc->zc_zap, zc, za); 1392 1.1 haad } else { 1393 1.1 haad mze_tofind.mze_hash = zc->zc_hash; 1394 1.4 chs mze_tofind.mze_cd = zc->zc_cd; 1395 1.1 haad 1396 1.1 haad mze = avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx); 1397 1.1 haad if (mze == NULL) { 1398 1.1 haad mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl, 1399 1.1 haad idx, AVL_AFTER); 1400 1.1 haad } 1401 1.1 haad if (mze) { 1402 1.4 chs mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze); 1403 1.4 chs ASSERT3U(mze->mze_cd, ==, mzep->mze_cd); 1404 1.1 haad za->za_normalization_conflict = 1405 1.1 haad mzap_normalization_conflict(zc->zc_zap, NULL, mze); 1406 1.1 haad za->za_integer_length = 8; 1407 1.1 haad za->za_num_integers = 1; 1408 1.4 chs za->za_first_integer = mzep->mze_value; 1409 1.4 chs (void) strcpy(za->za_name, mzep->mze_name); 1410 1.1 haad zc->zc_hash = mze->mze_hash; 1411 1.4 chs zc->zc_cd = mze->mze_cd; 1412 1.1 haad err = 0; 1413 1.1 haad } else { 1414 1.1 haad zc->zc_hash = -1ULL; 1415 1.4 chs err = SET_ERROR(ENOENT); 1416 1.1 haad } 1417 1.1 haad } 1418 1.1 haad rw_exit(&zc->zc_zap->zap_rwlock); 1419 1.1 haad return (err); 1420 1.1 haad } 1421 1.1 haad 1422 1.1 haad void 1423 1.1 haad zap_cursor_advance(zap_cursor_t *zc) 1424 1.1 haad { 1425 1.1 haad if (zc->zc_hash == -1ULL) 1426 1.1 haad return; 1427 1.1 haad zc->zc_cd++; 1428 1.3 haad } 1429 1.3 haad 1430 1.3 haad int 1431 1.3 haad zap_cursor_move_to_key(zap_cursor_t *zc, const char *name, matchtype_t mt) 1432 1.3 haad { 1433 1.3 haad int err = 0; 1434 1.3 haad mzap_ent_t *mze; 1435 1.3 haad zap_name_t *zn; 1436 1.3 haad 1437 1.3 haad if (zc->zc_zap == NULL) { 1438 1.3 haad err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL, 1439 1.4 chs RW_READER, TRUE, FALSE, FTAG, &zc->zc_zap); 1440 1.3 haad if (err) 1441 1.3 haad return (err); 1442 1.3 haad } else { 1443 1.3 haad rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); 1444 1.3 haad } 1445 1.3 haad 1446 1.3 haad zn = zap_name_alloc(zc->zc_zap, name, mt); 1447 1.3 haad if (zn == NULL) { 1448 1.3 haad rw_exit(&zc->zc_zap->zap_rwlock); 1449 1.4 chs return (SET_ERROR(ENOTSUP)); 1450 1.3 haad } 1451 1.3 haad 1452 1.3 haad if (!zc->zc_zap->zap_ismicro) { 1453 1.3 haad err = fzap_cursor_move_to_key(zc, zn); 1454 1.3 haad } else { 1455 1.3 haad mze = mze_find(zn); 1456 1.3 haad if (mze == NULL) { 1457 1.4 chs err = SET_ERROR(ENOENT); 1458 1.3 haad goto out; 1459 1.3 haad } 1460 1.3 haad zc->zc_hash = mze->mze_hash; 1461 1.4 chs zc->zc_cd = mze->mze_cd; 1462 1.1 haad } 1463 1.3 haad 1464 1.3 haad out: 1465 1.3 haad zap_name_free(zn); 1466 1.3 haad rw_exit(&zc->zc_zap->zap_rwlock); 1467 1.3 haad return (err); 1468 1.1 haad } 1469 1.1 haad 1470 1.1 haad int 1471 1.1 haad zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs) 1472 1.1 haad { 1473 1.1 haad int err; 1474 1.1 haad zap_t *zap; 1475 1.1 haad 1476 1.4 chs err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); 1477 1.1 haad if (err) 1478 1.1 haad return (err); 1479 1.1 haad 1480 1.1 haad bzero(zs, sizeof (zap_stats_t)); 1481 1.1 haad 1482 1.1 haad if (zap->zap_ismicro) { 1483 1.1 haad zs->zs_blocksize = zap->zap_dbuf->db_size; 1484 1.1 haad zs->zs_num_entries = zap->zap_m.zap_num_entries; 1485 1.1 haad zs->zs_num_blocks = 1; 1486 1.1 haad } else { 1487 1.1 haad fzap_get_stats(zap, zs); 1488 1.1 haad } 1489 1.4 chs zap_unlockdir(zap, FTAG); 1490 1.1 haad return (0); 1491 1.1 haad } 1492 1.3 haad 1493 1.3 haad int 1494 1.4 chs zap_count_write_by_dnode(dnode_t *dn, const char *name, int add, 1495 1.4 chs refcount_t *towrite, refcount_t *tooverwrite) 1496 1.3 haad { 1497 1.3 haad zap_t *zap; 1498 1.3 haad int err = 0; 1499 1.3 haad 1500 1.3 haad /* 1501 1.3 haad * Since, we don't have a name, we cannot figure out which blocks will 1502 1.3 haad * be affected in this operation. So, account for the worst case : 1503 1.3 haad * - 3 blocks overwritten: target leaf, ptrtbl block, header block 1504 1.3 haad * - 4 new blocks written if adding: 1505 1.4 chs * - 2 blocks for possibly split leaves, 1506 1.4 chs * - 2 grown ptrtbl blocks 1507 1.3 haad * 1508 1.4 chs * This also accommodates the case where an add operation to a fairly 1509 1.3 haad * large microzap results in a promotion to fatzap. 1510 1.3 haad */ 1511 1.3 haad if (name == NULL) { 1512 1.4 chs (void) refcount_add_many(towrite, 1513 1.4 chs (3 + (add ? 4 : 0)) * SPA_OLD_MAXBLOCKSIZE, FTAG); 1514 1.3 haad return (err); 1515 1.3 haad } 1516 1.3 haad 1517 1.3 haad /* 1518 1.4 chs * We lock the zap with adding == FALSE. Because, if we pass 1519 1.3 haad * the actual value of add, it could trigger a mzap_upgrade(). 1520 1.3 haad * At present we are just evaluating the possibility of this operation 1521 1.4 chs * and hence we do not want to trigger an upgrade. 1522 1.3 haad */ 1523 1.4 chs err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE, 1524 1.4 chs FTAG, &zap); 1525 1.4 chs if (err != 0) 1526 1.3 haad return (err); 1527 1.3 haad 1528 1.3 haad if (!zap->zap_ismicro) { 1529 1.3 haad zap_name_t *zn = zap_name_alloc(zap, name, MT_EXACT); 1530 1.3 haad if (zn) { 1531 1.3 haad err = fzap_count_write(zn, add, towrite, 1532 1.3 haad tooverwrite); 1533 1.3 haad zap_name_free(zn); 1534 1.3 haad } else { 1535 1.3 haad /* 1536 1.3 haad * We treat this case as similar to (name == NULL) 1537 1.3 haad */ 1538 1.4 chs (void) refcount_add_many(towrite, 1539 1.4 chs (3 + (add ? 4 : 0)) * SPA_OLD_MAXBLOCKSIZE, FTAG); 1540 1.3 haad } 1541 1.3 haad } else { 1542 1.3 haad /* 1543 1.3 haad * We are here if (name != NULL) and this is a micro-zap. 1544 1.3 haad * We account for the header block depending on whether it 1545 1.3 haad * is freeable. 1546 1.3 haad * 1547 1.3 haad * Incase of an add-operation it is hard to find out 1548 1.3 haad * if this add will promote this microzap to fatzap. 1549 1.3 haad * Hence, we consider the worst case and account for the 1550 1.3 haad * blocks assuming this microzap would be promoted to a 1551 1.3 haad * fatzap. 1552 1.3 haad * 1553 1.3 haad * 1 block overwritten : header block 1554 1.3 haad * 4 new blocks written : 2 new split leaf, 2 grown 1555 1.3 haad * ptrtbl blocks 1556 1.3 haad */ 1557 1.4 chs if (dmu_buf_freeable(zap->zap_dbuf)) { 1558 1.4 chs (void) refcount_add_many(tooverwrite, 1559 1.4 chs MZAP_MAX_BLKSZ, FTAG); 1560 1.4 chs } else { 1561 1.4 chs (void) refcount_add_many(towrite, 1562 1.4 chs MZAP_MAX_BLKSZ, FTAG); 1563 1.4 chs } 1564 1.3 haad 1565 1.3 haad if (add) { 1566 1.4 chs (void) refcount_add_many(towrite, 1567 1.4 chs 4 * MZAP_MAX_BLKSZ, FTAG); 1568 1.3 haad } 1569 1.3 haad } 1570 1.3 haad 1571 1.4 chs zap_unlockdir(zap, FTAG); 1572 1.3 haad return (err); 1573 1.3 haad } 1574