Home | History | Annotate | Line # | Download | only in zfs
      1  1.1  haad /*
      2  1.1  haad  * CDDL HEADER START
      3  1.1  haad  *
      4  1.1  haad  * The contents of this file are subject to the terms of the
      5  1.1  haad  * Common Development and Distribution License (the "License").
      6  1.1  haad  * You may not use this file except in compliance with the License.
      7  1.1  haad  *
      8  1.1  haad  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  1.1  haad  * or http://www.opensolaris.org/os/licensing.
     10  1.1  haad  * See the License for the specific language governing permissions
     11  1.1  haad  * and limitations under the License.
     12  1.1  haad  *
     13  1.1  haad  * When distributing Covered Code, include this CDDL HEADER in each
     14  1.1  haad  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  1.1  haad  * If applicable, add the following below this CDDL HEADER, with the
     16  1.1  haad  * fields enclosed by brackets "[]" replaced with your own identifying
     17  1.1  haad  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  1.1  haad  *
     19  1.1  haad  * CDDL HEADER END
     20  1.1  haad  */
     21  1.1  haad /*
     22  1.4   chs  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
     23  1.4   chs  * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
     24  1.4   chs  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
     25  1.4   chs  * Copyright (c) 2014 Integros [integros.com]
     26  1.1  haad  */
     27  1.1  haad 
     28  1.3  haad #include <sys/zio.h>
     29  1.1  haad #include <sys/spa.h>
     30  1.1  haad #include <sys/dmu.h>
     31  1.1  haad #include <sys/zfs_context.h>
     32  1.1  haad #include <sys/zap.h>
     33  1.1  haad #include <sys/refcount.h>
     34  1.1  haad #include <sys/zap_impl.h>
     35  1.1  haad #include <sys/zap_leaf.h>
     36  1.1  haad #include <sys/avl.h>
     37  1.4   chs #include <sys/arc.h>
     38  1.4   chs #include <sys/dmu_objset.h>
     39  1.1  haad 
     40  1.1  haad #ifdef _KERNEL
     41  1.1  haad #include <sys/sunddi.h>
     42  1.1  haad #endif
     43  1.1  haad 
     44  1.4   chs extern inline mzap_phys_t *zap_m_phys(zap_t *zap);
     45  1.4   chs 
     46  1.4   chs static int mzap_upgrade(zap_t **zapp,
     47  1.4   chs     void *tag, dmu_tx_t *tx, zap_flags_t flags);
     48  1.1  haad 
     49  1.3  haad uint64_t
     50  1.3  haad zap_getflags(zap_t *zap)
     51  1.3  haad {
     52  1.3  haad 	if (zap->zap_ismicro)
     53  1.3  haad 		return (0);
     54  1.4   chs 	return (zap_f_phys(zap)->zap_flags);
     55  1.3  haad }
     56  1.3  haad 
     57  1.3  haad int
     58  1.3  haad zap_hashbits(zap_t *zap)
     59  1.3  haad {
     60  1.3  haad 	if (zap_getflags(zap) & ZAP_FLAG_HASH64)
     61  1.3  haad 		return (48);
     62  1.3  haad 	else
     63  1.3  haad 		return (28);
     64  1.3  haad }
     65  1.3  haad 
     66  1.3  haad uint32_t
     67  1.3  haad zap_maxcd(zap_t *zap)
     68  1.3  haad {
     69  1.3  haad 	if (zap_getflags(zap) & ZAP_FLAG_HASH64)
     70  1.3  haad 		return ((1<<16)-1);
     71  1.3  haad 	else
     72  1.3  haad 		return (-1U);
     73  1.3  haad }
     74  1.1  haad 
     75  1.1  haad static uint64_t
     76  1.3  haad zap_hash(zap_name_t *zn)
     77  1.1  haad {
     78  1.3  haad 	zap_t *zap = zn->zn_zap;
     79  1.3  haad 	uint64_t h = 0;
     80  1.3  haad 
     81  1.3  haad 	if (zap_getflags(zap) & ZAP_FLAG_PRE_HASHED_KEY) {
     82  1.3  haad 		ASSERT(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY);
     83  1.3  haad 		h = *(uint64_t *)zn->zn_key_orig;
     84  1.3  haad 	} else {
     85  1.3  haad 		h = zap->zap_salt;
     86  1.3  haad 		ASSERT(h != 0);
     87  1.3  haad 		ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
     88  1.3  haad 
     89  1.3  haad 		if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) {
     90  1.3  haad 			int i;
     91  1.3  haad 			const uint64_t *wp = zn->zn_key_norm;
     92  1.3  haad 
     93  1.3  haad 			ASSERT(zn->zn_key_intlen == 8);
     94  1.3  haad 			for (i = 0; i < zn->zn_key_norm_numints; wp++, i++) {
     95  1.3  haad 				int j;
     96  1.3  haad 				uint64_t word = *wp;
     97  1.3  haad 
     98  1.3  haad 				for (j = 0; j < zn->zn_key_intlen; j++) {
     99  1.3  haad 					h = (h >> 8) ^
    100  1.3  haad 					    zfs_crc64_table[(h ^ word) & 0xFF];
    101  1.3  haad 					word >>= NBBY;
    102  1.3  haad 				}
    103  1.3  haad 			}
    104  1.3  haad 		} else {
    105  1.3  haad 			int i, len;
    106  1.3  haad 			const uint8_t *cp = zn->zn_key_norm;
    107  1.3  haad 
    108  1.3  haad 			/*
    109  1.3  haad 			 * We previously stored the terminating null on
    110  1.3  haad 			 * disk, but didn't hash it, so we need to
    111  1.3  haad 			 * continue to not hash it.  (The
    112  1.3  haad 			 * zn_key_*_numints includes the terminating
    113  1.3  haad 			 * null for non-binary keys.)
    114  1.3  haad 			 */
    115  1.3  haad 			len = zn->zn_key_norm_numints - 1;
    116  1.3  haad 
    117  1.3  haad 			ASSERT(zn->zn_key_intlen == 1);
    118  1.3  haad 			for (i = 0; i < len; cp++, i++) {
    119  1.3  haad 				h = (h >> 8) ^
    120  1.3  haad 				    zfs_crc64_table[(h ^ *cp) & 0xFF];
    121  1.3  haad 			}
    122  1.3  haad 		}
    123  1.1  haad 	}
    124  1.1  haad 	/*
    125  1.3  haad 	 * Don't use all 64 bits, since we need some in the cookie for
    126  1.3  haad 	 * the collision differentiator.  We MUST use the high bits,
    127  1.3  haad 	 * since those are the ones that we first pay attention to when
    128  1.1  haad 	 * chosing the bucket.
    129  1.1  haad 	 */
    130  1.3  haad 	h &= ~((1ULL << (64 - zap_hashbits(zap))) - 1);
    131  1.1  haad 
    132  1.3  haad 	return (h);
    133  1.1  haad }
    134  1.1  haad 
    135  1.1  haad static int
    136  1.1  haad zap_normalize(zap_t *zap, const char *name, char *namenorm)
    137  1.1  haad {
    138  1.1  haad 	size_t inlen, outlen;
    139  1.1  haad 	int err;
    140  1.1  haad 
    141  1.3  haad 	ASSERT(!(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY));
    142  1.3  haad 
    143  1.1  haad 	inlen = strlen(name) + 1;
    144  1.1  haad 	outlen = ZAP_MAXNAMELEN;
    145  1.1  haad 
    146  1.1  haad 	err = 0;
    147  1.1  haad 	(void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen,
    148  1.3  haad 	    zap->zap_normflags | U8_TEXTPREP_IGNORE_NULL |
    149  1.3  haad 	    U8_TEXTPREP_IGNORE_INVALID, U8_UNICODE_LATEST, &err);
    150  1.1  haad 
    151  1.1  haad 	return (err);
    152  1.1  haad }
    153  1.1  haad 
    154  1.1  haad boolean_t
    155  1.1  haad zap_match(zap_name_t *zn, const char *matchname)
    156  1.1  haad {
    157  1.3  haad 	ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY));
    158  1.3  haad 
    159  1.1  haad 	if (zn->zn_matchtype == MT_FIRST) {
    160  1.1  haad 		char norm[ZAP_MAXNAMELEN];
    161  1.1  haad 
    162  1.1  haad 		if (zap_normalize(zn->zn_zap, matchname, norm) != 0)
    163  1.1  haad 			return (B_FALSE);
    164  1.1  haad 
    165  1.3  haad 		return (strcmp(zn->zn_key_norm, norm) == 0);
    166  1.1  haad 	} else {
    167  1.1  haad 		/* MT_BEST or MT_EXACT */
    168  1.3  haad 		return (strcmp(zn->zn_key_orig, matchname) == 0);
    169  1.1  haad 	}
    170  1.1  haad }
    171  1.1  haad 
    172  1.1  haad void
    173  1.1  haad zap_name_free(zap_name_t *zn)
    174  1.1  haad {
    175  1.1  haad 	kmem_free(zn, sizeof (zap_name_t));
    176  1.1  haad }
    177  1.1  haad 
    178  1.1  haad zap_name_t *
    179  1.3  haad zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt)
    180  1.1  haad {
    181  1.1  haad 	zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP);
    182  1.1  haad 
    183  1.1  haad 	zn->zn_zap = zap;
    184  1.3  haad 	zn->zn_key_intlen = sizeof (*key);
    185  1.3  haad 	zn->zn_key_orig = key;
    186  1.3  haad 	zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1;
    187  1.1  haad 	zn->zn_matchtype = mt;
    188  1.1  haad 	if (zap->zap_normflags) {
    189  1.3  haad 		if (zap_normalize(zap, key, zn->zn_normbuf) != 0) {
    190  1.1  haad 			zap_name_free(zn);
    191  1.1  haad 			return (NULL);
    192  1.1  haad 		}
    193  1.3  haad 		zn->zn_key_norm = zn->zn_normbuf;
    194  1.3  haad 		zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1;
    195  1.1  haad 	} else {
    196  1.1  haad 		if (mt != MT_EXACT) {
    197  1.1  haad 			zap_name_free(zn);
    198  1.1  haad 			return (NULL);
    199  1.1  haad 		}
    200  1.3  haad 		zn->zn_key_norm = zn->zn_key_orig;
    201  1.3  haad 		zn->zn_key_norm_numints = zn->zn_key_orig_numints;
    202  1.1  haad 	}
    203  1.1  haad 
    204  1.3  haad 	zn->zn_hash = zap_hash(zn);
    205  1.3  haad 	return (zn);
    206  1.3  haad }
    207  1.3  haad 
    208  1.3  haad zap_name_t *
    209  1.3  haad zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints)
    210  1.3  haad {
    211  1.3  haad 	zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP);
    212  1.3  haad 
    213  1.3  haad 	ASSERT(zap->zap_normflags == 0);
    214  1.3  haad 	zn->zn_zap = zap;
    215  1.3  haad 	zn->zn_key_intlen = sizeof (*key);
    216  1.3  haad 	zn->zn_key_orig = zn->zn_key_norm = key;
    217  1.3  haad 	zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints;
    218  1.3  haad 	zn->zn_matchtype = MT_EXACT;
    219  1.3  haad 
    220  1.3  haad 	zn->zn_hash = zap_hash(zn);
    221  1.1  haad 	return (zn);
    222  1.1  haad }
    223  1.1  haad 
    224  1.1  haad static void
    225  1.1  haad mzap_byteswap(mzap_phys_t *buf, size_t size)
    226  1.1  haad {
    227  1.1  haad 	int i, max;
    228  1.1  haad 	buf->mz_block_type = BSWAP_64(buf->mz_block_type);
    229  1.1  haad 	buf->mz_salt = BSWAP_64(buf->mz_salt);
    230  1.1  haad 	buf->mz_normflags = BSWAP_64(buf->mz_normflags);
    231  1.1  haad 	max = (size / MZAP_ENT_LEN) - 1;
    232  1.1  haad 	for (i = 0; i < max; i++) {
    233  1.1  haad 		buf->mz_chunk[i].mze_value =
    234  1.1  haad 		    BSWAP_64(buf->mz_chunk[i].mze_value);
    235  1.1  haad 		buf->mz_chunk[i].mze_cd =
    236  1.1  haad 		    BSWAP_32(buf->mz_chunk[i].mze_cd);
    237  1.1  haad 	}
    238  1.1  haad }
    239  1.1  haad 
    240  1.1  haad void
    241  1.1  haad zap_byteswap(void *buf, size_t size)
    242  1.1  haad {
    243  1.1  haad 	uint64_t block_type;
    244  1.1  haad 
    245  1.1  haad 	block_type = *(uint64_t *)buf;
    246  1.1  haad 
    247  1.1  haad 	if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) {
    248  1.1  haad 		/* ASSERT(magic == ZAP_LEAF_MAGIC); */
    249  1.1  haad 		mzap_byteswap(buf, size);
    250  1.1  haad 	} else {
    251  1.1  haad 		fzap_byteswap(buf, size);
    252  1.1  haad 	}
    253  1.1  haad }
    254  1.1  haad 
    255  1.1  haad static int
    256  1.1  haad mze_compare(const void *arg1, const void *arg2)
    257  1.1  haad {
    258  1.1  haad 	const mzap_ent_t *mze1 = arg1;
    259  1.1  haad 	const mzap_ent_t *mze2 = arg2;
    260  1.1  haad 
    261  1.1  haad 	if (mze1->mze_hash > mze2->mze_hash)
    262  1.1  haad 		return (+1);
    263  1.1  haad 	if (mze1->mze_hash < mze2->mze_hash)
    264  1.1  haad 		return (-1);
    265  1.4   chs 	if (mze1->mze_cd > mze2->mze_cd)
    266  1.1  haad 		return (+1);
    267  1.4   chs 	if (mze1->mze_cd < mze2->mze_cd)
    268  1.1  haad 		return (-1);
    269  1.1  haad 	return (0);
    270  1.1  haad }
    271  1.1  haad 
    272  1.4   chs static int
    273  1.4   chs mze_insert(zap_t *zap, int chunkid, uint64_t hash)
    274  1.1  haad {
    275  1.1  haad 	mzap_ent_t *mze;
    276  1.4   chs 	avl_index_t idx;
    277  1.1  haad 
    278  1.1  haad 	ASSERT(zap->zap_ismicro);
    279  1.1  haad 	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
    280  1.1  haad 
    281  1.1  haad 	mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP);
    282  1.1  haad 	mze->mze_chunkid = chunkid;
    283  1.1  haad 	mze->mze_hash = hash;
    284  1.4   chs 	mze->mze_cd = MZE_PHYS(zap, mze)->mze_cd;
    285  1.4   chs 	ASSERT(MZE_PHYS(zap, mze)->mze_name[0] != 0);
    286  1.4   chs 	if (avl_find(&zap->zap_m.zap_avl, mze, &idx) != NULL) {
    287  1.4   chs 		kmem_free(mze, sizeof (mzap_ent_t));
    288  1.4   chs 		return (EEXIST);
    289  1.4   chs 	}
    290  1.4   chs 	avl_insert(&zap->zap_m.zap_avl, mze, idx);
    291  1.4   chs 	return (0);
    292  1.1  haad }
    293  1.1  haad 
    294  1.1  haad static mzap_ent_t *
    295  1.1  haad mze_find(zap_name_t *zn)
    296  1.1  haad {
    297  1.1  haad 	mzap_ent_t mze_tofind;
    298  1.1  haad 	mzap_ent_t *mze;
    299  1.1  haad 	avl_index_t idx;
    300  1.1  haad 	avl_tree_t *avl = &zn->zn_zap->zap_m.zap_avl;
    301  1.1  haad 
    302  1.1  haad 	ASSERT(zn->zn_zap->zap_ismicro);
    303  1.1  haad 	ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock));
    304  1.1  haad 
    305  1.1  haad 	mze_tofind.mze_hash = zn->zn_hash;
    306  1.4   chs 	mze_tofind.mze_cd = 0;
    307  1.1  haad 
    308  1.1  haad again:
    309  1.1  haad 	mze = avl_find(avl, &mze_tofind, &idx);
    310  1.1  haad 	if (mze == NULL)
    311  1.1  haad 		mze = avl_nearest(avl, idx, AVL_AFTER);
    312  1.1  haad 	for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) {
    313  1.4   chs 		ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd);
    314  1.4   chs 		if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name))
    315  1.1  haad 			return (mze);
    316  1.1  haad 	}
    317  1.1  haad 	if (zn->zn_matchtype == MT_BEST) {
    318  1.1  haad 		zn->zn_matchtype = MT_FIRST;
    319  1.1  haad 		goto again;
    320  1.1  haad 	}
    321  1.1  haad 	return (NULL);
    322  1.1  haad }
    323  1.1  haad 
    324  1.1  haad static uint32_t
    325  1.1  haad mze_find_unused_cd(zap_t *zap, uint64_t hash)
    326  1.1  haad {
    327  1.1  haad 	mzap_ent_t mze_tofind;
    328  1.1  haad 	mzap_ent_t *mze;
    329  1.1  haad 	avl_index_t idx;
    330  1.1  haad 	avl_tree_t *avl = &zap->zap_m.zap_avl;
    331  1.1  haad 	uint32_t cd;
    332  1.1  haad 
    333  1.1  haad 	ASSERT(zap->zap_ismicro);
    334  1.1  haad 	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
    335  1.1  haad 
    336  1.1  haad 	mze_tofind.mze_hash = hash;
    337  1.4   chs 	mze_tofind.mze_cd = 0;
    338  1.1  haad 
    339  1.1  haad 	cd = 0;
    340  1.1  haad 	for (mze = avl_find(avl, &mze_tofind, &idx);
    341  1.1  haad 	    mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) {
    342  1.4   chs 		if (mze->mze_cd != cd)
    343  1.1  haad 			break;
    344  1.1  haad 		cd++;
    345  1.1  haad 	}
    346  1.1  haad 
    347  1.1  haad 	return (cd);
    348  1.1  haad }
    349  1.1  haad 
    350  1.1  haad static void
    351  1.1  haad mze_remove(zap_t *zap, mzap_ent_t *mze)
    352  1.1  haad {
    353  1.1  haad 	ASSERT(zap->zap_ismicro);
    354  1.1  haad 	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
    355  1.1  haad 
    356  1.1  haad 	avl_remove(&zap->zap_m.zap_avl, mze);
    357  1.1  haad 	kmem_free(mze, sizeof (mzap_ent_t));
    358  1.1  haad }
    359  1.1  haad 
    360  1.1  haad static void
    361  1.1  haad mze_destroy(zap_t *zap)
    362  1.1  haad {
    363  1.1  haad 	mzap_ent_t *mze;
    364  1.1  haad 	void *avlcookie = NULL;
    365  1.1  haad 
    366  1.1  haad 	while (mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie))
    367  1.1  haad 		kmem_free(mze, sizeof (mzap_ent_t));
    368  1.1  haad 	avl_destroy(&zap->zap_m.zap_avl);
    369  1.1  haad }
    370  1.1  haad 
    371  1.1  haad static zap_t *
    372  1.1  haad mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
    373  1.1  haad {
    374  1.1  haad 	zap_t *winner;
    375  1.1  haad 	zap_t *zap;
    376  1.1  haad 	int i;
    377  1.4   chs 	uint64_t *zap_hdr = (uint64_t *)db->db_data;
    378  1.4   chs 	uint64_t zap_block_type = zap_hdr[0];
    379  1.4   chs 	uint64_t zap_magic = zap_hdr[1];
    380  1.1  haad 
    381  1.1  haad 	ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t));
    382  1.1  haad 
    383  1.1  haad 	zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP);
    384  1.1  haad 	rw_init(&zap->zap_rwlock, 0, 0, 0);
    385  1.1  haad 	rw_enter(&zap->zap_rwlock, RW_WRITER);
    386  1.1  haad 	zap->zap_objset = os;
    387  1.1  haad 	zap->zap_object = obj;
    388  1.1  haad 	zap->zap_dbuf = db;
    389  1.1  haad 
    390  1.4   chs 	if (zap_block_type != ZBT_MICRO) {
    391  1.1  haad 		mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
    392  1.4   chs 		zap->zap_f.zap_block_shift = highbit64(db->db_size) - 1;
    393  1.4   chs 		if (zap_block_type != ZBT_HEADER || zap_magic != ZAP_MAGIC) {
    394  1.4   chs 			winner = NULL;	/* No actual winner here... */
    395  1.4   chs 			goto handle_winner;
    396  1.4   chs 		}
    397  1.1  haad 	} else {
    398  1.1  haad 		zap->zap_ismicro = TRUE;
    399  1.1  haad 	}
    400  1.1  haad 
    401  1.1  haad 	/*
    402  1.1  haad 	 * Make sure that zap_ismicro is set before we let others see
    403  1.1  haad 	 * it, because zap_lockdir() checks zap_ismicro without the lock
    404  1.1  haad 	 * held.
    405  1.1  haad 	 */
    406  1.4   chs 	dmu_buf_init_user(&zap->zap_dbu, zap_evict_sync, NULL, &zap->zap_dbuf);
    407  1.4   chs 	winner = dmu_buf_set_user(db, &zap->zap_dbu);
    408  1.1  haad 
    409  1.4   chs 	if (winner != NULL)
    410  1.4   chs 		goto handle_winner;
    411  1.1  haad 
    412  1.1  haad 	if (zap->zap_ismicro) {
    413  1.4   chs 		zap->zap_salt = zap_m_phys(zap)->mz_salt;
    414  1.4   chs 		zap->zap_normflags = zap_m_phys(zap)->mz_normflags;
    415  1.1  haad 		zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1;
    416  1.1  haad 		avl_create(&zap->zap_m.zap_avl, mze_compare,
    417  1.1  haad 		    sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node));
    418  1.1  haad 
    419  1.1  haad 		for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
    420  1.1  haad 			mzap_ent_phys_t *mze =
    421  1.4   chs 			    &zap_m_phys(zap)->mz_chunk[i];
    422  1.1  haad 			if (mze->mze_name[0]) {
    423  1.1  haad 				zap_name_t *zn;
    424  1.1  haad 
    425  1.1  haad 				zn = zap_name_alloc(zap, mze->mze_name,
    426  1.1  haad 				    MT_EXACT);
    427  1.4   chs 				if (mze_insert(zap, i, zn->zn_hash) == 0)
    428  1.4   chs 					zap->zap_m.zap_num_entries++;
    429  1.4   chs 				else {
    430  1.4   chs 					printf("ZFS WARNING: Duplicated ZAP "
    431  1.4   chs 					    "entry detected (%s).\n",
    432  1.4   chs 					    mze->mze_name);
    433  1.4   chs 				}
    434  1.1  haad 				zap_name_free(zn);
    435  1.1  haad 			}
    436  1.1  haad 		}
    437  1.1  haad 	} else {
    438  1.4   chs 		zap->zap_salt = zap_f_phys(zap)->zap_salt;
    439  1.4   chs 		zap->zap_normflags = zap_f_phys(zap)->zap_normflags;
    440  1.1  haad 
    441  1.1  haad 		ASSERT3U(sizeof (struct zap_leaf_header), ==,
    442  1.1  haad 		    2*ZAP_LEAF_CHUNKSIZE);
    443  1.1  haad 
    444  1.1  haad 		/*
    445  1.1  haad 		 * The embedded pointer table should not overlap the
    446  1.1  haad 		 * other members.
    447  1.1  haad 		 */
    448  1.1  haad 		ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >,
    449  1.4   chs 		    &zap_f_phys(zap)->zap_salt);
    450  1.1  haad 
    451  1.1  haad 		/*
    452  1.1  haad 		 * The embedded pointer table should end at the end of
    453  1.1  haad 		 * the block
    454  1.1  haad 		 */
    455  1.1  haad 		ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap,
    456  1.1  haad 		    1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) -
    457  1.4   chs 		    (uintptr_t)zap_f_phys(zap), ==,
    458  1.1  haad 		    zap->zap_dbuf->db_size);
    459  1.1  haad 	}
    460  1.1  haad 	rw_exit(&zap->zap_rwlock);
    461  1.1  haad 	return (zap);
    462  1.4   chs 
    463  1.4   chs handle_winner:
    464  1.4   chs 	rw_exit(&zap->zap_rwlock);
    465  1.4   chs 	rw_destroy(&zap->zap_rwlock);
    466  1.4   chs 	if (!zap->zap_ismicro)
    467  1.4   chs 		mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
    468  1.4   chs 	kmem_free(zap, sizeof (zap_t));
    469  1.4   chs 	return (winner);
    470  1.1  haad }
    471  1.1  haad 
    472  1.4   chs static int
    473  1.4   chs zap_lockdir_impl(dmu_buf_t *db, void *tag, dmu_tx_t *tx,
    474  1.1  haad     krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp)
    475  1.1  haad {
    476  1.1  haad 	zap_t *zap;
    477  1.1  haad 	krw_t lt;
    478  1.4   chs 
    479  1.4   chs 	ASSERT0(db->db_offset);
    480  1.4   chs 	objset_t *os = dmu_buf_get_objset(db);
    481  1.4   chs 	uint64_t obj = db->db_object;
    482  1.1  haad 
    483  1.1  haad 	*zapp = NULL;
    484  1.1  haad 
    485  1.1  haad #ifdef ZFS_DEBUG
    486  1.1  haad 	{
    487  1.1  haad 		dmu_object_info_t doi;
    488  1.1  haad 		dmu_object_info_from_db(db, &doi);
    489  1.4   chs 		ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
    490  1.1  haad 	}
    491  1.1  haad #endif
    492  1.1  haad 
    493  1.1  haad 	zap = dmu_buf_get_user(db);
    494  1.4   chs 	if (zap == NULL) {
    495  1.1  haad 		zap = mzap_open(os, obj, db);
    496  1.4   chs 		if (zap == NULL) {
    497  1.4   chs 			/*
    498  1.4   chs 			 * mzap_open() didn't like what it saw on-disk.
    499  1.4   chs 			 * Check for corruption!
    500  1.4   chs 			 */
    501  1.4   chs 			return (SET_ERROR(EIO));
    502  1.4   chs 		}
    503  1.4   chs 	}
    504  1.1  haad 
    505  1.1  haad 	/*
    506  1.1  haad 	 * We're checking zap_ismicro without the lock held, in order to
    507  1.1  haad 	 * tell what type of lock we want.  Once we have some sort of
    508  1.1  haad 	 * lock, see if it really is the right type.  In practice this
    509  1.1  haad 	 * can only be different if it was upgraded from micro to fat,
    510  1.1  haad 	 * and micro wanted WRITER but fat only needs READER.
    511  1.1  haad 	 */
    512  1.1  haad 	lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti;
    513  1.1  haad 	rw_enter(&zap->zap_rwlock, lt);
    514  1.1  haad 	if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) {
    515  1.1  haad 		/* it was upgraded, now we only need reader */
    516  1.1  haad 		ASSERT(lt == RW_WRITER);
    517  1.1  haad 		ASSERT(RW_READER ==
    518  1.1  haad 		    (!zap->zap_ismicro && fatreader) ? RW_READER : lti);
    519  1.1  haad 		rw_downgrade(&zap->zap_rwlock);
    520  1.1  haad 		lt = RW_READER;
    521  1.1  haad 	}
    522  1.1  haad 
    523  1.1  haad 	zap->zap_objset = os;
    524  1.1  haad 
    525  1.1  haad 	if (lt == RW_WRITER)
    526  1.1  haad 		dmu_buf_will_dirty(db, tx);
    527  1.1  haad 
    528  1.1  haad 	ASSERT3P(zap->zap_dbuf, ==, db);
    529  1.1  haad 
    530  1.1  haad 	ASSERT(!zap->zap_ismicro ||
    531  1.1  haad 	    zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks);
    532  1.1  haad 	if (zap->zap_ismicro && tx && adding &&
    533  1.1  haad 	    zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) {
    534  1.1  haad 		uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE;
    535  1.1  haad 		if (newsz > MZAP_MAX_BLKSZ) {
    536  1.1  haad 			dprintf("upgrading obj %llu: num_entries=%u\n",
    537  1.1  haad 			    obj, zap->zap_m.zap_num_entries);
    538  1.1  haad 			*zapp = zap;
    539  1.4   chs 			int err = mzap_upgrade(zapp, tag, tx, 0);
    540  1.4   chs 			if (err != 0)
    541  1.4   chs 				rw_exit(&zap->zap_rwlock);
    542  1.4   chs 			return (err);
    543  1.1  haad 		}
    544  1.4   chs 		VERIFY0(dmu_object_set_blocksize(os, obj, newsz, 0, tx));
    545  1.1  haad 		zap->zap_m.zap_num_chunks =
    546  1.1  haad 		    db->db_size / MZAP_ENT_LEN - 1;
    547  1.1  haad 	}
    548  1.1  haad 
    549  1.1  haad 	*zapp = zap;
    550  1.1  haad 	return (0);
    551  1.1  haad }
    552  1.1  haad 
    553  1.4   chs static int
    554  1.4   chs zap_lockdir_by_dnode(dnode_t *dn, dmu_tx_t *tx,
    555  1.4   chs     krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp)
    556  1.4   chs {
    557  1.4   chs 	dmu_buf_t *db;
    558  1.4   chs 	int err;
    559  1.4   chs 
    560  1.4   chs 	err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH);
    561  1.4   chs 	if (err != 0) {
    562  1.4   chs 		return (err);
    563  1.4   chs 	}
    564  1.4   chs 	err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp);
    565  1.4   chs 	if (err != 0) {
    566  1.4   chs 		dmu_buf_rele(db, tag);
    567  1.4   chs 	}
    568  1.4   chs 	return (err);
    569  1.4   chs }
    570  1.4   chs 
    571  1.4   chs int
    572  1.4   chs zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
    573  1.4   chs     krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp)
    574  1.4   chs {
    575  1.4   chs 	dmu_buf_t *db;
    576  1.4   chs 	int err;
    577  1.4   chs 
    578  1.4   chs 	err = dmu_buf_hold(os, obj, 0, tag, &db, DMU_READ_NO_PREFETCH);
    579  1.4   chs 	if (err != 0)
    580  1.4   chs 		return (err);
    581  1.4   chs 	err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp);
    582  1.4   chs 	if (err != 0)
    583  1.4   chs 		dmu_buf_rele(db, tag);
    584  1.4   chs 	return (err);
    585  1.4   chs }
    586  1.4   chs 
    587  1.1  haad void
    588  1.4   chs zap_unlockdir(zap_t *zap, void *tag)
    589  1.1  haad {
    590  1.1  haad 	rw_exit(&zap->zap_rwlock);
    591  1.4   chs 	dmu_buf_rele(zap->zap_dbuf, tag);
    592  1.1  haad }
    593  1.1  haad 
    594  1.1  haad static int
    595  1.4   chs mzap_upgrade(zap_t **zapp, void *tag, dmu_tx_t *tx, zap_flags_t flags)
    596  1.1  haad {
    597  1.1  haad 	mzap_phys_t *mzp;
    598  1.3  haad 	int i, sz, nchunks;
    599  1.3  haad 	int err = 0;
    600  1.1  haad 	zap_t *zap = *zapp;
    601  1.1  haad 
    602  1.1  haad 	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
    603  1.1  haad 
    604  1.1  haad 	sz = zap->zap_dbuf->db_size;
    605  1.4   chs 	mzp = zio_buf_alloc(sz);
    606  1.1  haad 	bcopy(zap->zap_dbuf->db_data, mzp, sz);
    607  1.1  haad 	nchunks = zap->zap_m.zap_num_chunks;
    608  1.1  haad 
    609  1.3  haad 	if (!flags) {
    610  1.3  haad 		err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object,
    611  1.3  haad 		    1ULL << fzap_default_block_shift, 0, tx);
    612  1.3  haad 		if (err) {
    613  1.4   chs 			zio_buf_free(mzp, sz);
    614  1.3  haad 			return (err);
    615  1.3  haad 		}
    616  1.1  haad 	}
    617  1.1  haad 
    618  1.1  haad 	dprintf("upgrading obj=%llu with %u chunks\n",
    619  1.1  haad 	    zap->zap_object, nchunks);
    620  1.1  haad 	/* XXX destroy the avl later, so we can use the stored hash value */
    621  1.1  haad 	mze_destroy(zap);
    622  1.1  haad 
    623  1.3  haad 	fzap_upgrade(zap, tx, flags);
    624  1.1  haad 
    625  1.1  haad 	for (i = 0; i < nchunks; i++) {
    626  1.1  haad 		mzap_ent_phys_t *mze = &mzp->mz_chunk[i];
    627  1.1  haad 		zap_name_t *zn;
    628  1.1  haad 		if (mze->mze_name[0] == 0)
    629  1.1  haad 			continue;
    630  1.1  haad 		dprintf("adding %s=%llu\n",
    631  1.1  haad 		    mze->mze_name, mze->mze_value);
    632  1.1  haad 		zn = zap_name_alloc(zap, mze->mze_name, MT_EXACT);
    633  1.4   chs 		err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd,
    634  1.4   chs 		    tag, tx);
    635  1.1  haad 		zap = zn->zn_zap;	/* fzap_add_cd() may change zap */
    636  1.1  haad 		zap_name_free(zn);
    637  1.1  haad 		if (err)
    638  1.1  haad 			break;
    639  1.1  haad 	}
    640  1.4   chs 	zio_buf_free(mzp, sz);
    641  1.1  haad 	*zapp = zap;
    642  1.1  haad 	return (err);
    643  1.1  haad }
    644  1.1  haad 
    645  1.4   chs void
    646  1.3  haad mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags,
    647  1.3  haad     dmu_tx_t *tx)
    648  1.1  haad {
    649  1.1  haad 	dmu_buf_t *db;
    650  1.1  haad 	mzap_phys_t *zp;
    651  1.1  haad 
    652  1.4   chs 	VERIFY(0 == dmu_buf_hold(os, obj, 0, FTAG, &db, DMU_READ_NO_PREFETCH));
    653  1.1  haad 
    654  1.1  haad #ifdef ZFS_DEBUG
    655  1.1  haad 	{
    656  1.1  haad 		dmu_object_info_t doi;
    657  1.1  haad 		dmu_object_info_from_db(db, &doi);
    658  1.4   chs 		ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
    659  1.1  haad 	}
    660  1.1  haad #endif
    661  1.1  haad 
    662  1.1  haad 	dmu_buf_will_dirty(db, tx);
    663  1.1  haad 	zp = db->db_data;
    664  1.1  haad 	zp->mz_block_type = ZBT_MICRO;
    665  1.1  haad 	zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL;
    666  1.1  haad 	zp->mz_normflags = normflags;
    667  1.1  haad 	dmu_buf_rele(db, FTAG);
    668  1.3  haad 
    669  1.3  haad 	if (flags != 0) {
    670  1.3  haad 		zap_t *zap;
    671  1.3  haad 		/* Only fat zap supports flags; upgrade immediately. */
    672  1.3  haad 		VERIFY(0 == zap_lockdir(os, obj, tx, RW_WRITER,
    673  1.4   chs 		    B_FALSE, B_FALSE, FTAG, &zap));
    674  1.4   chs 		VERIFY3U(0, ==, mzap_upgrade(&zap, FTAG, tx, flags));
    675  1.4   chs 		zap_unlockdir(zap, FTAG);
    676  1.3  haad 	}
    677  1.1  haad }
    678  1.1  haad 
    679  1.1  haad int
    680  1.1  haad zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot,
    681  1.1  haad     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
    682  1.1  haad {
    683  1.1  haad 	return (zap_create_claim_norm(os, obj,
    684  1.1  haad 	    0, ot, bonustype, bonuslen, tx));
    685  1.1  haad }
    686  1.1  haad 
    687  1.1  haad int
    688  1.1  haad zap_create_claim_norm(objset_t *os, uint64_t obj, int normflags,
    689  1.1  haad     dmu_object_type_t ot,
    690  1.1  haad     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
    691  1.1  haad {
    692  1.1  haad 	int err;
    693  1.1  haad 
    694  1.1  haad 	err = dmu_object_claim(os, obj, ot, 0, bonustype, bonuslen, tx);
    695  1.1  haad 	if (err != 0)
    696  1.1  haad 		return (err);
    697  1.3  haad 	mzap_create_impl(os, obj, normflags, 0, tx);
    698  1.1  haad 	return (0);
    699  1.1  haad }
    700  1.1  haad 
    701  1.1  haad uint64_t
    702  1.1  haad zap_create(objset_t *os, dmu_object_type_t ot,
    703  1.1  haad     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
    704  1.1  haad {
    705  1.1  haad 	return (zap_create_norm(os, 0, ot, bonustype, bonuslen, tx));
    706  1.1  haad }
    707  1.1  haad 
    708  1.1  haad uint64_t
    709  1.1  haad zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot,
    710  1.1  haad     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
    711  1.1  haad {
    712  1.1  haad 	uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx);
    713  1.1  haad 
    714  1.3  haad 	mzap_create_impl(os, obj, normflags, 0, tx);
    715  1.3  haad 	return (obj);
    716  1.3  haad }
    717  1.3  haad 
    718  1.3  haad uint64_t
    719  1.3  haad zap_create_flags(objset_t *os, int normflags, zap_flags_t flags,
    720  1.3  haad     dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
    721  1.3  haad     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
    722  1.3  haad {
    723  1.3  haad 	uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx);
    724  1.3  haad 
    725  1.3  haad 	ASSERT(leaf_blockshift >= SPA_MINBLOCKSHIFT &&
    726  1.4   chs 	    leaf_blockshift <= SPA_OLD_MAXBLOCKSHIFT &&
    727  1.3  haad 	    indirect_blockshift >= SPA_MINBLOCKSHIFT &&
    728  1.4   chs 	    indirect_blockshift <= SPA_OLD_MAXBLOCKSHIFT);
    729  1.3  haad 
    730  1.3  haad 	VERIFY(dmu_object_set_blocksize(os, obj,
    731  1.3  haad 	    1ULL << leaf_blockshift, indirect_blockshift, tx) == 0);
    732  1.3  haad 
    733  1.3  haad 	mzap_create_impl(os, obj, normflags, flags, tx);
    734  1.1  haad 	return (obj);
    735  1.1  haad }
    736  1.1  haad 
    737  1.1  haad int
    738  1.1  haad zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx)
    739  1.1  haad {
    740  1.1  haad 	/*
    741  1.1  haad 	 * dmu_object_free will free the object number and free the
    742  1.1  haad 	 * data.  Freeing the data will cause our pageout function to be
    743  1.1  haad 	 * called, which will destroy our data (zap_leaf_t's and zap_t).
    744  1.1  haad 	 */
    745  1.1  haad 
    746  1.1  haad 	return (dmu_object_free(os, zapobj, tx));
    747  1.1  haad }
    748  1.1  haad 
    749  1.1  haad void
    750  1.4   chs zap_evict_sync(void *dbu)
    751  1.1  haad {
    752  1.4   chs 	zap_t *zap = dbu;
    753  1.1  haad 
    754  1.1  haad 	rw_destroy(&zap->zap_rwlock);
    755  1.1  haad 
    756  1.1  haad 	if (zap->zap_ismicro)
    757  1.1  haad 		mze_destroy(zap);
    758  1.1  haad 	else
    759  1.1  haad 		mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
    760  1.1  haad 
    761  1.1  haad 	kmem_free(zap, sizeof (zap_t));
    762  1.1  haad }
    763  1.1  haad 
    764  1.1  haad int
    765  1.1  haad zap_count(objset_t *os, uint64_t zapobj, uint64_t *count)
    766  1.1  haad {
    767  1.1  haad 	zap_t *zap;
    768  1.1  haad 	int err;
    769  1.1  haad 
    770  1.4   chs 	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
    771  1.1  haad 	if (err)
    772  1.1  haad 		return (err);
    773  1.1  haad 	if (!zap->zap_ismicro) {
    774  1.1  haad 		err = fzap_count(zap, count);
    775  1.1  haad 	} else {
    776  1.1  haad 		*count = zap->zap_m.zap_num_entries;
    777  1.1  haad 	}
    778  1.4   chs 	zap_unlockdir(zap, FTAG);
    779  1.1  haad 	return (err);
    780  1.1  haad }
    781  1.1  haad 
    782  1.1  haad /*
    783  1.1  haad  * zn may be NULL; if not specified, it will be computed if needed.
    784  1.1  haad  * See also the comment above zap_entry_normalization_conflict().
    785  1.1  haad  */
    786  1.1  haad static boolean_t
    787  1.1  haad mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze)
    788  1.1  haad {
    789  1.1  haad 	mzap_ent_t *other;
    790  1.1  haad 	int direction = AVL_BEFORE;
    791  1.1  haad 	boolean_t allocdzn = B_FALSE;
    792  1.1  haad 
    793  1.1  haad 	if (zap->zap_normflags == 0)
    794  1.1  haad 		return (B_FALSE);
    795  1.1  haad 
    796  1.1  haad again:
    797  1.1  haad 	for (other = avl_walk(&zap->zap_m.zap_avl, mze, direction);
    798  1.1  haad 	    other && other->mze_hash == mze->mze_hash;
    799  1.1  haad 	    other = avl_walk(&zap->zap_m.zap_avl, other, direction)) {
    800  1.1  haad 
    801  1.1  haad 		if (zn == NULL) {
    802  1.4   chs 			zn = zap_name_alloc(zap, MZE_PHYS(zap, mze)->mze_name,
    803  1.1  haad 			    MT_FIRST);
    804  1.1  haad 			allocdzn = B_TRUE;
    805  1.1  haad 		}
    806  1.4   chs 		if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) {
    807  1.1  haad 			if (allocdzn)
    808  1.1  haad 				zap_name_free(zn);
    809  1.1  haad 			return (B_TRUE);
    810  1.1  haad 		}
    811  1.1  haad 	}
    812  1.1  haad 
    813  1.1  haad 	if (direction == AVL_BEFORE) {
    814  1.1  haad 		direction = AVL_AFTER;
    815  1.1  haad 		goto again;
    816  1.1  haad 	}
    817  1.1  haad 
    818  1.1  haad 	if (allocdzn)
    819  1.1  haad 		zap_name_free(zn);
    820  1.1  haad 	return (B_FALSE);
    821  1.1  haad }
    822  1.1  haad 
    823  1.1  haad /*
    824  1.1  haad  * Routines for manipulating attributes.
    825  1.1  haad  */
    826  1.1  haad 
    827  1.1  haad int
    828  1.1  haad zap_lookup(objset_t *os, uint64_t zapobj, const char *name,
    829  1.1  haad     uint64_t integer_size, uint64_t num_integers, void *buf)
    830  1.1  haad {
    831  1.1  haad 	return (zap_lookup_norm(os, zapobj, name, integer_size,
    832  1.1  haad 	    num_integers, buf, MT_EXACT, NULL, 0, NULL));
    833  1.1  haad }
    834  1.1  haad 
    835  1.4   chs static int
    836  1.4   chs zap_lookup_impl(zap_t *zap, const char *name,
    837  1.1  haad     uint64_t integer_size, uint64_t num_integers, void *buf,
    838  1.1  haad     matchtype_t mt, char *realname, int rn_len,
    839  1.1  haad     boolean_t *ncp)
    840  1.1  haad {
    841  1.4   chs 	int err = 0;
    842  1.1  haad 	mzap_ent_t *mze;
    843  1.1  haad 	zap_name_t *zn;
    844  1.1  haad 
    845  1.1  haad 	zn = zap_name_alloc(zap, name, mt);
    846  1.4   chs 	if (zn == NULL)
    847  1.4   chs 		return (SET_ERROR(ENOTSUP));
    848  1.1  haad 
    849  1.1  haad 	if (!zap->zap_ismicro) {
    850  1.1  haad 		err = fzap_lookup(zn, integer_size, num_integers, buf,
    851  1.1  haad 		    realname, rn_len, ncp);
    852  1.1  haad 	} else {
    853  1.1  haad 		mze = mze_find(zn);
    854  1.1  haad 		if (mze == NULL) {
    855  1.4   chs 			err = SET_ERROR(ENOENT);
    856  1.1  haad 		} else {
    857  1.1  haad 			if (num_integers < 1) {
    858  1.4   chs 				err = SET_ERROR(EOVERFLOW);
    859  1.1  haad 			} else if (integer_size != 8) {
    860  1.4   chs 				err = SET_ERROR(EINVAL);
    861  1.1  haad 			} else {
    862  1.4   chs 				*(uint64_t *)buf =
    863  1.4   chs 				    MZE_PHYS(zap, mze)->mze_value;
    864  1.2  haad 				if (realname != NULL)
    865  1.2  haad 					(void) strlcpy(realname,
    866  1.4   chs 					    MZE_PHYS(zap, mze)->mze_name, rn_len);
    867  1.1  haad 				if (ncp) {
    868  1.1  haad 					*ncp = mzap_normalization_conflict(zap,
    869  1.1  haad 					    zn, mze);
    870  1.1  haad 				}
    871  1.1  haad 			}
    872  1.1  haad 		}
    873  1.1  haad 	}
    874  1.1  haad 	zap_name_free(zn);
    875  1.4   chs 	return (err);
    876  1.4   chs }
    877  1.4   chs 
    878  1.4   chs int
    879  1.4   chs zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name,
    880  1.4   chs     uint64_t integer_size, uint64_t num_integers, void *buf,
    881  1.4   chs     matchtype_t mt, char *realname, int rn_len,
    882  1.4   chs     boolean_t *ncp)
    883  1.4   chs {
    884  1.4   chs 	zap_t *zap;
    885  1.4   chs 	int err;
    886  1.4   chs 
    887  1.4   chs 	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
    888  1.4   chs 	if (err != 0)
    889  1.4   chs 		return (err);
    890  1.4   chs 	err = zap_lookup_impl(zap, name, integer_size,
    891  1.4   chs 	    num_integers, buf, mt, realname, rn_len, ncp);
    892  1.4   chs 	zap_unlockdir(zap, FTAG);
    893  1.4   chs 	return (err);
    894  1.4   chs }
    895  1.4   chs 
    896  1.4   chs int
    897  1.4   chs zap_lookup_by_dnode(dnode_t *dn, const char *name,
    898  1.4   chs     uint64_t integer_size, uint64_t num_integers, void *buf)
    899  1.4   chs {
    900  1.4   chs 	return (zap_lookup_norm_by_dnode(dn, name, integer_size,
    901  1.4   chs 	    num_integers, buf, MT_EXACT, NULL, 0, NULL));
    902  1.4   chs }
    903  1.4   chs 
    904  1.4   chs int
    905  1.4   chs zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,
    906  1.4   chs     uint64_t integer_size, uint64_t num_integers, void *buf,
    907  1.4   chs     matchtype_t mt, char *realname, int rn_len,
    908  1.4   chs     boolean_t *ncp)
    909  1.4   chs {
    910  1.4   chs 	zap_t *zap;
    911  1.4   chs 	int err;
    912  1.4   chs 
    913  1.4   chs 	err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,
    914  1.4   chs 	    FTAG, &zap);
    915  1.4   chs 	if (err != 0)
    916  1.4   chs 		return (err);
    917  1.4   chs 	err = zap_lookup_impl(zap, name, integer_size,
    918  1.4   chs 	    num_integers, buf, mt, realname, rn_len, ncp);
    919  1.4   chs 	zap_unlockdir(zap, FTAG);
    920  1.4   chs 	return (err);
    921  1.4   chs }
    922  1.4   chs 
    923  1.4   chs int
    924  1.4   chs zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
    925  1.4   chs     int key_numints)
    926  1.4   chs {
    927  1.4   chs 	zap_t *zap;
    928  1.4   chs 	int err;
    929  1.4   chs 	zap_name_t *zn;
    930  1.4   chs 
    931  1.4   chs 	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
    932  1.4   chs 	if (err)
    933  1.4   chs 		return (err);
    934  1.4   chs 	zn = zap_name_alloc_uint64(zap, key, key_numints);
    935  1.4   chs 	if (zn == NULL) {
    936  1.4   chs 		zap_unlockdir(zap, FTAG);
    937  1.4   chs 		return (SET_ERROR(ENOTSUP));
    938  1.4   chs 	}
    939  1.4   chs 
    940  1.4   chs 	fzap_prefetch(zn);
    941  1.4   chs 	zap_name_free(zn);
    942  1.4   chs 	zap_unlockdir(zap, FTAG);
    943  1.1  haad 	return (err);
    944  1.1  haad }
    945  1.1  haad 
    946  1.1  haad int
    947  1.3  haad zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
    948  1.3  haad     int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf)
    949  1.3  haad {
    950  1.3  haad 	zap_t *zap;
    951  1.3  haad 	int err;
    952  1.3  haad 	zap_name_t *zn;
    953  1.3  haad 
    954  1.4   chs 	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
    955  1.3  haad 	if (err)
    956  1.3  haad 		return (err);
    957  1.3  haad 	zn = zap_name_alloc_uint64(zap, key, key_numints);
    958  1.3  haad 	if (zn == NULL) {
    959  1.4   chs 		zap_unlockdir(zap, FTAG);
    960  1.4   chs 		return (SET_ERROR(ENOTSUP));
    961  1.3  haad 	}
    962  1.3  haad 
    963  1.3  haad 	err = fzap_lookup(zn, integer_size, num_integers, buf,
    964  1.3  haad 	    NULL, 0, NULL);
    965  1.3  haad 	zap_name_free(zn);
    966  1.4   chs 	zap_unlockdir(zap, FTAG);
    967  1.3  haad 	return (err);
    968  1.3  haad }
    969  1.3  haad 
    970  1.3  haad int
    971  1.3  haad zap_contains(objset_t *os, uint64_t zapobj, const char *name)
    972  1.3  haad {
    973  1.4   chs 	int err = zap_lookup_norm(os, zapobj, name, 0,
    974  1.4   chs 	    0, NULL, MT_EXACT, NULL, 0, NULL);
    975  1.3  haad 	if (err == EOVERFLOW || err == EINVAL)
    976  1.3  haad 		err = 0; /* found, but skipped reading the value */
    977  1.3  haad 	return (err);
    978  1.3  haad }
    979  1.3  haad 
    980  1.3  haad int
    981  1.1  haad zap_length(objset_t *os, uint64_t zapobj, const char *name,
    982  1.1  haad     uint64_t *integer_size, uint64_t *num_integers)
    983  1.1  haad {
    984  1.1  haad 	zap_t *zap;
    985  1.1  haad 	int err;
    986  1.1  haad 	mzap_ent_t *mze;
    987  1.1  haad 	zap_name_t *zn;
    988  1.1  haad 
    989  1.4   chs 	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
    990  1.1  haad 	if (err)
    991  1.1  haad 		return (err);
    992  1.1  haad 	zn = zap_name_alloc(zap, name, MT_EXACT);
    993  1.1  haad 	if (zn == NULL) {
    994  1.4   chs 		zap_unlockdir(zap, FTAG);
    995  1.4   chs 		return (SET_ERROR(ENOTSUP));
    996  1.1  haad 	}
    997  1.1  haad 	if (!zap->zap_ismicro) {
    998  1.1  haad 		err = fzap_length(zn, integer_size, num_integers);
    999  1.1  haad 	} else {
   1000  1.1  haad 		mze = mze_find(zn);
   1001  1.1  haad 		if (mze == NULL) {
   1002  1.4   chs 			err = SET_ERROR(ENOENT);
   1003  1.1  haad 		} else {
   1004  1.1  haad 			if (integer_size)
   1005  1.1  haad 				*integer_size = 8;
   1006  1.1  haad 			if (num_integers)
   1007  1.1  haad 				*num_integers = 1;
   1008  1.1  haad 		}
   1009  1.1  haad 	}
   1010  1.1  haad 	zap_name_free(zn);
   1011  1.4   chs 	zap_unlockdir(zap, FTAG);
   1012  1.1  haad 	return (err);
   1013  1.1  haad }
   1014  1.1  haad 
   1015  1.3  haad int
   1016  1.3  haad zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
   1017  1.3  haad     int key_numints, uint64_t *integer_size, uint64_t *num_integers)
   1018  1.3  haad {
   1019  1.3  haad 	zap_t *zap;
   1020  1.3  haad 	int err;
   1021  1.3  haad 	zap_name_t *zn;
   1022  1.3  haad 
   1023  1.4   chs 	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
   1024  1.3  haad 	if (err)
   1025  1.3  haad 		return (err);
   1026  1.3  haad 	zn = zap_name_alloc_uint64(zap, key, key_numints);
   1027  1.3  haad 	if (zn == NULL) {
   1028  1.4   chs 		zap_unlockdir(zap, FTAG);
   1029  1.4   chs 		return (SET_ERROR(ENOTSUP));
   1030  1.3  haad 	}
   1031  1.3  haad 	err = fzap_length(zn, integer_size, num_integers);
   1032  1.3  haad 	zap_name_free(zn);
   1033  1.4   chs 	zap_unlockdir(zap, FTAG);
   1034  1.3  haad 	return (err);
   1035  1.3  haad }
   1036  1.3  haad 
   1037  1.1  haad static void
   1038  1.1  haad mzap_addent(zap_name_t *zn, uint64_t value)
   1039  1.1  haad {
   1040  1.1  haad 	int i;
   1041  1.1  haad 	zap_t *zap = zn->zn_zap;
   1042  1.1  haad 	int start = zap->zap_m.zap_alloc_next;
   1043  1.1  haad 	uint32_t cd;
   1044  1.1  haad 
   1045  1.1  haad 	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
   1046  1.1  haad 
   1047  1.1  haad #ifdef ZFS_DEBUG
   1048  1.1  haad 	for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
   1049  1.4   chs 		mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i];
   1050  1.3  haad 		ASSERT(strcmp(zn->zn_key_orig, mze->mze_name) != 0);
   1051  1.1  haad 	}
   1052  1.1  haad #endif
   1053  1.1  haad 
   1054  1.1  haad 	cd = mze_find_unused_cd(zap, zn->zn_hash);
   1055  1.1  haad 	/* given the limited size of the microzap, this can't happen */
   1056  1.3  haad 	ASSERT(cd < zap_maxcd(zap));
   1057  1.1  haad 
   1058  1.1  haad again:
   1059  1.1  haad 	for (i = start; i < zap->zap_m.zap_num_chunks; i++) {
   1060  1.4   chs 		mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i];
   1061  1.1  haad 		if (mze->mze_name[0] == 0) {
   1062  1.1  haad 			mze->mze_value = value;
   1063  1.1  haad 			mze->mze_cd = cd;
   1064  1.3  haad 			(void) strcpy(mze->mze_name, zn->zn_key_orig);
   1065  1.1  haad 			zap->zap_m.zap_num_entries++;
   1066  1.1  haad 			zap->zap_m.zap_alloc_next = i+1;
   1067  1.1  haad 			if (zap->zap_m.zap_alloc_next ==
   1068  1.1  haad 			    zap->zap_m.zap_num_chunks)
   1069  1.1  haad 				zap->zap_m.zap_alloc_next = 0;
   1070  1.4   chs 			VERIFY(0 == mze_insert(zap, i, zn->zn_hash));
   1071  1.1  haad 			return;
   1072  1.1  haad 		}
   1073  1.1  haad 	}
   1074  1.1  haad 	if (start != 0) {
   1075  1.1  haad 		start = 0;
   1076  1.1  haad 		goto again;
   1077  1.1  haad 	}
   1078  1.1  haad 	ASSERT(!"out of entries!");
   1079  1.1  haad }
   1080  1.1  haad 
   1081  1.1  haad int
   1082  1.3  haad zap_add(objset_t *os, uint64_t zapobj, const char *key,
   1083  1.1  haad     int integer_size, uint64_t num_integers,
   1084  1.1  haad     const void *val, dmu_tx_t *tx)
   1085  1.1  haad {
   1086  1.1  haad 	zap_t *zap;
   1087  1.1  haad 	int err;
   1088  1.1  haad 	mzap_ent_t *mze;
   1089  1.1  haad 	const uint64_t *intval = val;
   1090  1.1  haad 	zap_name_t *zn;
   1091  1.1  haad 
   1092  1.4   chs 	err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
   1093  1.1  haad 	if (err)
   1094  1.1  haad 		return (err);
   1095  1.3  haad 	zn = zap_name_alloc(zap, key, MT_EXACT);
   1096  1.1  haad 	if (zn == NULL) {
   1097  1.4   chs 		zap_unlockdir(zap, FTAG);
   1098  1.4   chs 		return (SET_ERROR(ENOTSUP));
   1099  1.1  haad 	}
   1100  1.1  haad 	if (!zap->zap_ismicro) {
   1101  1.4   chs 		err = fzap_add(zn, integer_size, num_integers, val, FTAG, tx);
   1102  1.1  haad 		zap = zn->zn_zap;	/* fzap_add() may change zap */
   1103  1.1  haad 	} else if (integer_size != 8 || num_integers != 1 ||
   1104  1.3  haad 	    strlen(key) >= MZAP_NAME_LEN) {
   1105  1.4   chs 		err = mzap_upgrade(&zn->zn_zap, FTAG, tx, 0);
   1106  1.4   chs 		if (err == 0) {
   1107  1.4   chs 			err = fzap_add(zn, integer_size, num_integers, val,
   1108  1.4   chs 			    FTAG, tx);
   1109  1.4   chs 		}
   1110  1.1  haad 		zap = zn->zn_zap;	/* fzap_add() may change zap */
   1111  1.1  haad 	} else {
   1112  1.1  haad 		mze = mze_find(zn);
   1113  1.1  haad 		if (mze != NULL) {
   1114  1.4   chs 			err = SET_ERROR(EEXIST);
   1115  1.1  haad 		} else {
   1116  1.1  haad 			mzap_addent(zn, *intval);
   1117  1.1  haad 		}
   1118  1.1  haad 	}
   1119  1.1  haad 	ASSERT(zap == zn->zn_zap);
   1120  1.1  haad 	zap_name_free(zn);
   1121  1.1  haad 	if (zap != NULL)	/* may be NULL if fzap_add() failed */
   1122  1.4   chs 		zap_unlockdir(zap, FTAG);
   1123  1.1  haad 	return (err);
   1124  1.1  haad }
   1125  1.1  haad 
   1126  1.1  haad int
   1127  1.3  haad zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
   1128  1.3  haad     int key_numints, int integer_size, uint64_t num_integers,
   1129  1.3  haad     const void *val, dmu_tx_t *tx)
   1130  1.3  haad {
   1131  1.3  haad 	zap_t *zap;
   1132  1.3  haad 	int err;
   1133  1.3  haad 	zap_name_t *zn;
   1134  1.3  haad 
   1135  1.4   chs 	err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
   1136  1.3  haad 	if (err)
   1137  1.3  haad 		return (err);
   1138  1.3  haad 	zn = zap_name_alloc_uint64(zap, key, key_numints);
   1139  1.3  haad 	if (zn == NULL) {
   1140  1.4   chs 		zap_unlockdir(zap, FTAG);
   1141  1.4   chs 		return (SET_ERROR(ENOTSUP));
   1142  1.3  haad 	}
   1143  1.4   chs 	err = fzap_add(zn, integer_size, num_integers, val, FTAG, tx);
   1144  1.3  haad 	zap = zn->zn_zap;	/* fzap_add() may change zap */
   1145  1.3  haad 	zap_name_free(zn);
   1146  1.3  haad 	if (zap != NULL)	/* may be NULL if fzap_add() failed */
   1147  1.4   chs 		zap_unlockdir(zap, FTAG);
   1148  1.3  haad 	return (err);
   1149  1.3  haad }
   1150  1.3  haad 
   1151  1.3  haad int
   1152  1.1  haad zap_update(objset_t *os, uint64_t zapobj, const char *name,
   1153  1.1  haad     int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
   1154  1.1  haad {
   1155  1.1  haad 	zap_t *zap;
   1156  1.1  haad 	mzap_ent_t *mze;
   1157  1.4   chs 	uint64_t oldval;
   1158  1.1  haad 	const uint64_t *intval = val;
   1159  1.1  haad 	zap_name_t *zn;
   1160  1.1  haad 	int err;
   1161  1.1  haad 
   1162  1.4   chs #ifdef ZFS_DEBUG
   1163  1.4   chs 	/*
   1164  1.4   chs 	 * If there is an old value, it shouldn't change across the
   1165  1.4   chs 	 * lockdir (eg, due to bprewrite's xlation).
   1166  1.4   chs 	 */
   1167  1.4   chs 	if (integer_size == 8 && num_integers == 1)
   1168  1.4   chs 		(void) zap_lookup(os, zapobj, name, 8, 1, &oldval);
   1169  1.4   chs #endif
   1170  1.4   chs 
   1171  1.4   chs 	err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
   1172  1.1  haad 	if (err)
   1173  1.1  haad 		return (err);
   1174  1.1  haad 	zn = zap_name_alloc(zap, name, MT_EXACT);
   1175  1.1  haad 	if (zn == NULL) {
   1176  1.4   chs 		zap_unlockdir(zap, FTAG);
   1177  1.4   chs 		return (SET_ERROR(ENOTSUP));
   1178  1.1  haad 	}
   1179  1.1  haad 	if (!zap->zap_ismicro) {
   1180  1.4   chs 		err = fzap_update(zn, integer_size, num_integers, val,
   1181  1.4   chs 		    FTAG, tx);
   1182  1.1  haad 		zap = zn->zn_zap;	/* fzap_update() may change zap */
   1183  1.1  haad 	} else if (integer_size != 8 || num_integers != 1 ||
   1184  1.1  haad 	    strlen(name) >= MZAP_NAME_LEN) {
   1185  1.1  haad 		dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n",
   1186  1.1  haad 		    zapobj, integer_size, num_integers, name);
   1187  1.4   chs 		err = mzap_upgrade(&zn->zn_zap, FTAG, tx, 0);
   1188  1.4   chs 		if (err == 0) {
   1189  1.1  haad 			err = fzap_update(zn, integer_size, num_integers,
   1190  1.4   chs 			    val, FTAG, tx);
   1191  1.4   chs 		}
   1192  1.1  haad 		zap = zn->zn_zap;	/* fzap_update() may change zap */
   1193  1.1  haad 	} else {
   1194  1.1  haad 		mze = mze_find(zn);
   1195  1.1  haad 		if (mze != NULL) {
   1196  1.4   chs 			ASSERT3U(MZE_PHYS(zap, mze)->mze_value, ==, oldval);
   1197  1.4   chs 			MZE_PHYS(zap, mze)->mze_value = *intval;
   1198  1.1  haad 		} else {
   1199  1.1  haad 			mzap_addent(zn, *intval);
   1200  1.1  haad 		}
   1201  1.1  haad 	}
   1202  1.1  haad 	ASSERT(zap == zn->zn_zap);
   1203  1.1  haad 	zap_name_free(zn);
   1204  1.1  haad 	if (zap != NULL)	/* may be NULL if fzap_upgrade() failed */
   1205  1.4   chs 		zap_unlockdir(zap, FTAG);
   1206  1.1  haad 	return (err);
   1207  1.1  haad }
   1208  1.1  haad 
   1209  1.1  haad int
   1210  1.3  haad zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
   1211  1.3  haad     int key_numints,
   1212  1.3  haad     int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
   1213  1.3  haad {
   1214  1.3  haad 	zap_t *zap;
   1215  1.3  haad 	zap_name_t *zn;
   1216  1.3  haad 	int err;
   1217  1.3  haad 
   1218  1.4   chs 	err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
   1219  1.3  haad 	if (err)
   1220  1.3  haad 		return (err);
   1221  1.3  haad 	zn = zap_name_alloc_uint64(zap, key, key_numints);
   1222  1.3  haad 	if (zn == NULL) {
   1223  1.4   chs 		zap_unlockdir(zap, FTAG);
   1224  1.4   chs 		return (SET_ERROR(ENOTSUP));
   1225  1.3  haad 	}
   1226  1.4   chs 	err = fzap_update(zn, integer_size, num_integers, val, FTAG, tx);
   1227  1.3  haad 	zap = zn->zn_zap;	/* fzap_update() may change zap */
   1228  1.3  haad 	zap_name_free(zn);
   1229  1.3  haad 	if (zap != NULL)	/* may be NULL if fzap_upgrade() failed */
   1230  1.4   chs 		zap_unlockdir(zap, FTAG);
   1231  1.3  haad 	return (err);
   1232  1.3  haad }
   1233  1.3  haad 
   1234  1.3  haad int
   1235  1.1  haad zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx)
   1236  1.1  haad {
   1237  1.1  haad 	return (zap_remove_norm(os, zapobj, name, MT_EXACT, tx));
   1238  1.1  haad }
   1239  1.1  haad 
   1240  1.1  haad int
   1241  1.1  haad zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name,
   1242  1.1  haad     matchtype_t mt, dmu_tx_t *tx)
   1243  1.1  haad {
   1244  1.1  haad 	zap_t *zap;
   1245  1.1  haad 	int err;
   1246  1.1  haad 	mzap_ent_t *mze;
   1247  1.1  haad 	zap_name_t *zn;
   1248  1.1  haad 
   1249  1.4   chs 	err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
   1250  1.1  haad 	if (err)
   1251  1.1  haad 		return (err);
   1252  1.1  haad 	zn = zap_name_alloc(zap, name, mt);
   1253  1.1  haad 	if (zn == NULL) {
   1254  1.4   chs 		zap_unlockdir(zap, FTAG);
   1255  1.4   chs 		return (SET_ERROR(ENOTSUP));
   1256  1.1  haad 	}
   1257  1.1  haad 	if (!zap->zap_ismicro) {
   1258  1.1  haad 		err = fzap_remove(zn, tx);
   1259  1.1  haad 	} else {
   1260  1.1  haad 		mze = mze_find(zn);
   1261  1.1  haad 		if (mze == NULL) {
   1262  1.4   chs 			err = SET_ERROR(ENOENT);
   1263  1.1  haad 		} else {
   1264  1.1  haad 			zap->zap_m.zap_num_entries--;
   1265  1.4   chs 			bzero(&zap_m_phys(zap)->mz_chunk[mze->mze_chunkid],
   1266  1.1  haad 			    sizeof (mzap_ent_phys_t));
   1267  1.1  haad 			mze_remove(zap, mze);
   1268  1.1  haad 		}
   1269  1.1  haad 	}
   1270  1.1  haad 	zap_name_free(zn);
   1271  1.4   chs 	zap_unlockdir(zap, FTAG);
   1272  1.1  haad 	return (err);
   1273  1.1  haad }
   1274  1.1  haad 
   1275  1.3  haad int
   1276  1.3  haad zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
   1277  1.3  haad     int key_numints, dmu_tx_t *tx)
   1278  1.3  haad {
   1279  1.3  haad 	zap_t *zap;
   1280  1.3  haad 	int err;
   1281  1.3  haad 	zap_name_t *zn;
   1282  1.3  haad 
   1283  1.4   chs 	err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
   1284  1.3  haad 	if (err)
   1285  1.3  haad 		return (err);
   1286  1.3  haad 	zn = zap_name_alloc_uint64(zap, key, key_numints);
   1287  1.3  haad 	if (zn == NULL) {
   1288  1.4   chs 		zap_unlockdir(zap, FTAG);
   1289  1.4   chs 		return (SET_ERROR(ENOTSUP));
   1290  1.3  haad 	}
   1291  1.3  haad 	err = fzap_remove(zn, tx);
   1292  1.3  haad 	zap_name_free(zn);
   1293  1.4   chs 	zap_unlockdir(zap, FTAG);
   1294  1.3  haad 	return (err);
   1295  1.3  haad }
   1296  1.3  haad 
   1297  1.1  haad /*
   1298  1.1  haad  * Routines for iterating over the attributes.
   1299  1.1  haad  */
   1300  1.1  haad 
   1301  1.1  haad void
   1302  1.1  haad zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj,
   1303  1.1  haad     uint64_t serialized)
   1304  1.1  haad {
   1305  1.1  haad 	zc->zc_objset = os;
   1306  1.1  haad 	zc->zc_zap = NULL;
   1307  1.1  haad 	zc->zc_leaf = NULL;
   1308  1.1  haad 	zc->zc_zapobj = zapobj;
   1309  1.3  haad 	zc->zc_serialized = serialized;
   1310  1.3  haad 	zc->zc_hash = 0;
   1311  1.3  haad 	zc->zc_cd = 0;
   1312  1.1  haad }
   1313  1.1  haad 
   1314  1.1  haad void
   1315  1.1  haad zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj)
   1316  1.1  haad {
   1317  1.1  haad 	zap_cursor_init_serialized(zc, os, zapobj, 0);
   1318  1.1  haad }
   1319  1.1  haad 
   1320  1.1  haad void
   1321  1.1  haad zap_cursor_fini(zap_cursor_t *zc)
   1322  1.1  haad {
   1323  1.1  haad 	if (zc->zc_zap) {
   1324  1.1  haad 		rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
   1325  1.4   chs 		zap_unlockdir(zc->zc_zap, NULL);
   1326  1.1  haad 		zc->zc_zap = NULL;
   1327  1.1  haad 	}
   1328  1.1  haad 	if (zc->zc_leaf) {
   1329  1.1  haad 		rw_enter(&zc->zc_leaf->l_rwlock, RW_READER);
   1330  1.1  haad 		zap_put_leaf(zc->zc_leaf);
   1331  1.1  haad 		zc->zc_leaf = NULL;
   1332  1.1  haad 	}
   1333  1.1  haad 	zc->zc_objset = NULL;
   1334  1.1  haad }
   1335  1.1  haad 
   1336  1.1  haad uint64_t
   1337  1.1  haad zap_cursor_serialize(zap_cursor_t *zc)
   1338  1.1  haad {
   1339  1.1  haad 	if (zc->zc_hash == -1ULL)
   1340  1.1  haad 		return (-1ULL);
   1341  1.3  haad 	if (zc->zc_zap == NULL)
   1342  1.3  haad 		return (zc->zc_serialized);
   1343  1.3  haad 	ASSERT((zc->zc_hash & zap_maxcd(zc->zc_zap)) == 0);
   1344  1.3  haad 	ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap));
   1345  1.3  haad 
   1346  1.3  haad 	/*
   1347  1.3  haad 	 * We want to keep the high 32 bits of the cursor zero if we can, so
   1348  1.3  haad 	 * that 32-bit programs can access this.  So usually use a small
   1349  1.3  haad 	 * (28-bit) hash value so we can fit 4 bits of cd into the low 32-bits
   1350  1.3  haad 	 * of the cursor.
   1351  1.3  haad 	 *
   1352  1.3  haad 	 * [ collision differentiator | zap_hashbits()-bit hash value ]
   1353  1.3  haad 	 */
   1354  1.3  haad 	return ((zc->zc_hash >> (64 - zap_hashbits(zc->zc_zap))) |
   1355  1.3  haad 	    ((uint64_t)zc->zc_cd << zap_hashbits(zc->zc_zap)));
   1356  1.1  haad }
   1357  1.1  haad 
   1358  1.1  haad int
   1359  1.1  haad zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za)
   1360  1.1  haad {
   1361  1.1  haad 	int err;
   1362  1.1  haad 	avl_index_t idx;
   1363  1.1  haad 	mzap_ent_t mze_tofind;
   1364  1.1  haad 	mzap_ent_t *mze;
   1365  1.1  haad 
   1366  1.1  haad 	if (zc->zc_hash == -1ULL)
   1367  1.4   chs 		return (SET_ERROR(ENOENT));
   1368  1.1  haad 
   1369  1.1  haad 	if (zc->zc_zap == NULL) {
   1370  1.3  haad 		int hb;
   1371  1.1  haad 		err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL,
   1372  1.4   chs 		    RW_READER, TRUE, FALSE, NULL, &zc->zc_zap);
   1373  1.1  haad 		if (err)
   1374  1.1  haad 			return (err);
   1375  1.3  haad 
   1376  1.3  haad 		/*
   1377  1.3  haad 		 * To support zap_cursor_init_serialized, advance, retrieve,
   1378  1.3  haad 		 * we must add to the existing zc_cd, which may already
   1379  1.3  haad 		 * be 1 due to the zap_cursor_advance.
   1380  1.3  haad 		 */
   1381  1.3  haad 		ASSERT(zc->zc_hash == 0);
   1382  1.3  haad 		hb = zap_hashbits(zc->zc_zap);
   1383  1.3  haad 		zc->zc_hash = zc->zc_serialized << (64 - hb);
   1384  1.3  haad 		zc->zc_cd += zc->zc_serialized >> hb;
   1385  1.3  haad 		if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */
   1386  1.3  haad 			zc->zc_cd = 0;
   1387  1.1  haad 	} else {
   1388  1.1  haad 		rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
   1389  1.1  haad 	}
   1390  1.1  haad 	if (!zc->zc_zap->zap_ismicro) {
   1391  1.1  haad 		err = fzap_cursor_retrieve(zc->zc_zap, zc, za);
   1392  1.1  haad 	} else {
   1393  1.1  haad 		mze_tofind.mze_hash = zc->zc_hash;
   1394  1.4   chs 		mze_tofind.mze_cd = zc->zc_cd;
   1395  1.1  haad 
   1396  1.1  haad 		mze = avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx);
   1397  1.1  haad 		if (mze == NULL) {
   1398  1.1  haad 			mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl,
   1399  1.1  haad 			    idx, AVL_AFTER);
   1400  1.1  haad 		}
   1401  1.1  haad 		if (mze) {
   1402  1.4   chs 			mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze);
   1403  1.4   chs 			ASSERT3U(mze->mze_cd, ==, mzep->mze_cd);
   1404  1.1  haad 			za->za_normalization_conflict =
   1405  1.1  haad 			    mzap_normalization_conflict(zc->zc_zap, NULL, mze);
   1406  1.1  haad 			za->za_integer_length = 8;
   1407  1.1  haad 			za->za_num_integers = 1;
   1408  1.4   chs 			za->za_first_integer = mzep->mze_value;
   1409  1.4   chs 			(void) strcpy(za->za_name, mzep->mze_name);
   1410  1.1  haad 			zc->zc_hash = mze->mze_hash;
   1411  1.4   chs 			zc->zc_cd = mze->mze_cd;
   1412  1.1  haad 			err = 0;
   1413  1.1  haad 		} else {
   1414  1.1  haad 			zc->zc_hash = -1ULL;
   1415  1.4   chs 			err = SET_ERROR(ENOENT);
   1416  1.1  haad 		}
   1417  1.1  haad 	}
   1418  1.1  haad 	rw_exit(&zc->zc_zap->zap_rwlock);
   1419  1.1  haad 	return (err);
   1420  1.1  haad }
   1421  1.1  haad 
   1422  1.1  haad void
   1423  1.1  haad zap_cursor_advance(zap_cursor_t *zc)
   1424  1.1  haad {
   1425  1.1  haad 	if (zc->zc_hash == -1ULL)
   1426  1.1  haad 		return;
   1427  1.1  haad 	zc->zc_cd++;
   1428  1.3  haad }
   1429  1.3  haad 
   1430  1.3  haad int
   1431  1.3  haad zap_cursor_move_to_key(zap_cursor_t *zc, const char *name, matchtype_t mt)
   1432  1.3  haad {
   1433  1.3  haad 	int err = 0;
   1434  1.3  haad 	mzap_ent_t *mze;
   1435  1.3  haad 	zap_name_t *zn;
   1436  1.3  haad 
   1437  1.3  haad 	if (zc->zc_zap == NULL) {
   1438  1.3  haad 		err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL,
   1439  1.4   chs 		    RW_READER, TRUE, FALSE, FTAG, &zc->zc_zap);
   1440  1.3  haad 		if (err)
   1441  1.3  haad 			return (err);
   1442  1.3  haad 	} else {
   1443  1.3  haad 		rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
   1444  1.3  haad 	}
   1445  1.3  haad 
   1446  1.3  haad 	zn = zap_name_alloc(zc->zc_zap, name, mt);
   1447  1.3  haad 	if (zn == NULL) {
   1448  1.3  haad 		rw_exit(&zc->zc_zap->zap_rwlock);
   1449  1.4   chs 		return (SET_ERROR(ENOTSUP));
   1450  1.3  haad 	}
   1451  1.3  haad 
   1452  1.3  haad 	if (!zc->zc_zap->zap_ismicro) {
   1453  1.3  haad 		err = fzap_cursor_move_to_key(zc, zn);
   1454  1.3  haad 	} else {
   1455  1.3  haad 		mze = mze_find(zn);
   1456  1.3  haad 		if (mze == NULL) {
   1457  1.4   chs 			err = SET_ERROR(ENOENT);
   1458  1.3  haad 			goto out;
   1459  1.3  haad 		}
   1460  1.3  haad 		zc->zc_hash = mze->mze_hash;
   1461  1.4   chs 		zc->zc_cd = mze->mze_cd;
   1462  1.1  haad 	}
   1463  1.3  haad 
   1464  1.3  haad out:
   1465  1.3  haad 	zap_name_free(zn);
   1466  1.3  haad 	rw_exit(&zc->zc_zap->zap_rwlock);
   1467  1.3  haad 	return (err);
   1468  1.1  haad }
   1469  1.1  haad 
   1470  1.1  haad int
   1471  1.1  haad zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs)
   1472  1.1  haad {
   1473  1.1  haad 	int err;
   1474  1.1  haad 	zap_t *zap;
   1475  1.1  haad 
   1476  1.4   chs 	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
   1477  1.1  haad 	if (err)
   1478  1.1  haad 		return (err);
   1479  1.1  haad 
   1480  1.1  haad 	bzero(zs, sizeof (zap_stats_t));
   1481  1.1  haad 
   1482  1.1  haad 	if (zap->zap_ismicro) {
   1483  1.1  haad 		zs->zs_blocksize = zap->zap_dbuf->db_size;
   1484  1.1  haad 		zs->zs_num_entries = zap->zap_m.zap_num_entries;
   1485  1.1  haad 		zs->zs_num_blocks = 1;
   1486  1.1  haad 	} else {
   1487  1.1  haad 		fzap_get_stats(zap, zs);
   1488  1.1  haad 	}
   1489  1.4   chs 	zap_unlockdir(zap, FTAG);
   1490  1.1  haad 	return (0);
   1491  1.1  haad }
   1492  1.3  haad 
   1493  1.3  haad int
   1494  1.4   chs zap_count_write_by_dnode(dnode_t *dn, const char *name, int add,
   1495  1.4   chs     refcount_t *towrite, refcount_t *tooverwrite)
   1496  1.3  haad {
   1497  1.3  haad 	zap_t *zap;
   1498  1.3  haad 	int err = 0;
   1499  1.3  haad 
   1500  1.3  haad 	/*
   1501  1.3  haad 	 * Since, we don't have a name, we cannot figure out which blocks will
   1502  1.3  haad 	 * be affected in this operation. So, account for the worst case :
   1503  1.3  haad 	 * - 3 blocks overwritten: target leaf, ptrtbl block, header block
   1504  1.3  haad 	 * - 4 new blocks written if adding:
   1505  1.4   chs 	 *    - 2 blocks for possibly split leaves,
   1506  1.4   chs 	 *    - 2 grown ptrtbl blocks
   1507  1.3  haad 	 *
   1508  1.4   chs 	 * This also accommodates the case where an add operation to a fairly
   1509  1.3  haad 	 * large microzap results in a promotion to fatzap.
   1510  1.3  haad 	 */
   1511  1.3  haad 	if (name == NULL) {
   1512  1.4   chs 		(void) refcount_add_many(towrite,
   1513  1.4   chs 		    (3 + (add ? 4 : 0)) * SPA_OLD_MAXBLOCKSIZE, FTAG);
   1514  1.3  haad 		return (err);
   1515  1.3  haad 	}
   1516  1.3  haad 
   1517  1.3  haad 	/*
   1518  1.4   chs 	 * We lock the zap with adding == FALSE. Because, if we pass
   1519  1.3  haad 	 * the actual value of add, it could trigger a mzap_upgrade().
   1520  1.3  haad 	 * At present we are just evaluating the possibility of this operation
   1521  1.4   chs 	 * and hence we do not want to trigger an upgrade.
   1522  1.3  haad 	 */
   1523  1.4   chs 	err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,
   1524  1.4   chs 	    FTAG, &zap);
   1525  1.4   chs 	if (err != 0)
   1526  1.3  haad 		return (err);
   1527  1.3  haad 
   1528  1.3  haad 	if (!zap->zap_ismicro) {
   1529  1.3  haad 		zap_name_t *zn = zap_name_alloc(zap, name, MT_EXACT);
   1530  1.3  haad 		if (zn) {
   1531  1.3  haad 			err = fzap_count_write(zn, add, towrite,
   1532  1.3  haad 			    tooverwrite);
   1533  1.3  haad 			zap_name_free(zn);
   1534  1.3  haad 		} else {
   1535  1.3  haad 			/*
   1536  1.3  haad 			 * We treat this case as similar to (name == NULL)
   1537  1.3  haad 			 */
   1538  1.4   chs 			(void) refcount_add_many(towrite,
   1539  1.4   chs 			    (3 + (add ? 4 : 0)) * SPA_OLD_MAXBLOCKSIZE, FTAG);
   1540  1.3  haad 		}
   1541  1.3  haad 	} else {
   1542  1.3  haad 		/*
   1543  1.3  haad 		 * We are here if (name != NULL) and this is a micro-zap.
   1544  1.3  haad 		 * We account for the header block depending on whether it
   1545  1.3  haad 		 * is freeable.
   1546  1.3  haad 		 *
   1547  1.3  haad 		 * Incase of an add-operation it is hard to find out
   1548  1.3  haad 		 * if this add will promote this microzap to fatzap.
   1549  1.3  haad 		 * Hence, we consider the worst case and account for the
   1550  1.3  haad 		 * blocks assuming this microzap would be promoted to a
   1551  1.3  haad 		 * fatzap.
   1552  1.3  haad 		 *
   1553  1.3  haad 		 * 1 block overwritten  : header block
   1554  1.3  haad 		 * 4 new blocks written : 2 new split leaf, 2 grown
   1555  1.3  haad 		 *			ptrtbl blocks
   1556  1.3  haad 		 */
   1557  1.4   chs 		if (dmu_buf_freeable(zap->zap_dbuf)) {
   1558  1.4   chs 			(void) refcount_add_many(tooverwrite,
   1559  1.4   chs 			    MZAP_MAX_BLKSZ, FTAG);
   1560  1.4   chs 		} else {
   1561  1.4   chs 			(void) refcount_add_many(towrite,
   1562  1.4   chs 			    MZAP_MAX_BLKSZ, FTAG);
   1563  1.4   chs 		}
   1564  1.3  haad 
   1565  1.3  haad 		if (add) {
   1566  1.4   chs 			(void) refcount_add_many(towrite,
   1567  1.4   chs 			    4 * MZAP_MAX_BLKSZ, FTAG);
   1568  1.3  haad 		}
   1569  1.3  haad 	}
   1570  1.3  haad 
   1571  1.4   chs 	zap_unlockdir(zap, FTAG);
   1572  1.3  haad 	return (err);
   1573  1.3  haad }
   1574