1 1.1 chs /* 2 1.1 chs * CDDL HEADER START 3 1.1 chs * 4 1.1 chs * The contents of this file are subject to the terms of the 5 1.1 chs * Common Development and Distribution License (the "License"). 6 1.1 chs * You may not use this file except in compliance with the License. 7 1.1 chs * 8 1.1 chs * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 1.1 chs * or http://www.opensolaris.org/os/licensing. 10 1.1 chs * See the License for the specific language governing permissions 11 1.1 chs * and limitations under the License. 12 1.1 chs * 13 1.1 chs * When distributing Covered Code, include this CDDL HEADER in each 14 1.1 chs * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 1.1 chs * If applicable, add the following below this CDDL HEADER, with the 16 1.1 chs * fields enclosed by brackets "[]" replaced with your own identifying 17 1.1 chs * information: Portions Copyright [yyyy] [name of copyright owner] 18 1.1 chs * 19 1.1 chs * CDDL HEADER END 20 1.1 chs */ 21 1.1 chs 22 1.1 chs /* 23 1.1 chs * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 1.1 chs * Portions Copyright 2011 iXsystems, Inc 25 1.1 chs * Copyright (c) 2013, 2016 by Delphix. All rights reserved. 26 1.1 chs * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. 27 1.1 chs * Copyright (c) 2014 Integros [integros.com] 28 1.1 chs */ 29 1.1 chs 30 1.1 chs #include <sys/zfs_context.h> 31 1.1 chs #include <sys/types.h> 32 1.1 chs #include <sys/param.h> 33 1.1 chs #include <sys/systm.h> 34 1.1 chs #include <sys/sysmacros.h> 35 1.1 chs #include <sys/dmu.h> 36 1.1 chs #include <sys/dmu_impl.h> 37 1.1 chs #include <sys/dmu_objset.h> 38 1.1 chs #include <sys/dbuf.h> 39 1.1 chs #include <sys/dnode.h> 40 1.1 chs #include <sys/zap.h> 41 1.1 chs #include <sys/sa.h> 42 1.1 chs #include <sys/sunddi.h> 43 1.1 chs #include <sys/sa_impl.h> 44 1.1 chs #include <sys/dnode.h> 45 1.1 chs #include <sys/errno.h> 46 1.1 chs #include <sys/zfs_context.h> 47 1.1 chs 48 1.1 chs /* 49 1.1 chs * ZFS System attributes: 50 1.1 chs * 51 1.1 chs * A generic mechanism to allow for arbitrary attributes 52 1.1 chs * to be stored in a dnode. The data will be stored in the bonus buffer of 53 1.1 chs * the dnode and if necessary a special "spill" block will be used to handle 54 1.1 chs * overflow situations. The spill block will be sized to fit the data 55 1.1 chs * from 512 - 128K. When a spill block is used the BP (blkptr_t) for the 56 1.1 chs * spill block is stored at the end of the current bonus buffer. Any 57 1.1 chs * attributes that would be in the way of the blkptr_t will be relocated 58 1.1 chs * into the spill block. 59 1.1 chs * 60 1.1 chs * Attribute registration: 61 1.1 chs * 62 1.1 chs * Stored persistently on a per dataset basis 63 1.1 chs * a mapping between attribute "string" names and their actual attribute 64 1.1 chs * numeric values, length, and byteswap function. The names are only used 65 1.1 chs * during registration. All attributes are known by their unique attribute 66 1.1 chs * id value. If an attribute can have a variable size then the value 67 1.1 chs * 0 will be used to indicate this. 68 1.1 chs * 69 1.1 chs * Attribute Layout: 70 1.1 chs * 71 1.1 chs * Attribute layouts are a way to compactly store multiple attributes, but 72 1.1 chs * without taking the overhead associated with managing each attribute 73 1.1 chs * individually. Since you will typically have the same set of attributes 74 1.1 chs * stored in the same order a single table will be used to represent that 75 1.1 chs * layout. The ZPL for example will usually have only about 10 different 76 1.1 chs * layouts (regular files, device files, symlinks, 77 1.1 chs * regular files + scanstamp, files/dir with extended attributes, and then 78 1.1 chs * you have the possibility of all of those minus ACL, because it would 79 1.1 chs * be kicked out into the spill block) 80 1.1 chs * 81 1.1 chs * Layouts are simply an array of the attributes and their 82 1.1 chs * ordering i.e. [0, 1, 4, 5, 2] 83 1.1 chs * 84 1.1 chs * Each distinct layout is given a unique layout number and that is whats 85 1.1 chs * stored in the header at the beginning of the SA data buffer. 86 1.1 chs * 87 1.1 chs * A layout only covers a single dbuf (bonus or spill). If a set of 88 1.1 chs * attributes is split up between the bonus buffer and a spill buffer then 89 1.1 chs * two different layouts will be used. This allows us to byteswap the 90 1.1 chs * spill without looking at the bonus buffer and keeps the on disk format of 91 1.1 chs * the bonus and spill buffer the same. 92 1.1 chs * 93 1.1 chs * Adding a single attribute will cause the entire set of attributes to 94 1.1 chs * be rewritten and could result in a new layout number being constructed 95 1.1 chs * as part of the rewrite if no such layout exists for the new set of 96 1.1 chs * attribues. The new attribute will be appended to the end of the already 97 1.1 chs * existing attributes. 98 1.1 chs * 99 1.1 chs * Both the attribute registration and attribute layout information are 100 1.1 chs * stored in normal ZAP attributes. Their should be a small number of 101 1.1 chs * known layouts and the set of attributes is assumed to typically be quite 102 1.1 chs * small. 103 1.1 chs * 104 1.1 chs * The registered attributes and layout "table" information is maintained 105 1.1 chs * in core and a special "sa_os_t" is attached to the objset_t. 106 1.1 chs * 107 1.1 chs * A special interface is provided to allow for quickly applying 108 1.1 chs * a large set of attributes at once. sa_replace_all_by_template() is 109 1.1 chs * used to set an array of attributes. This is used by the ZPL when 110 1.1 chs * creating a brand new file. The template that is passed into the function 111 1.1 chs * specifies the attribute, size for variable length attributes, location of 112 1.1 chs * data and special "data locator" function if the data isn't in a contiguous 113 1.1 chs * location. 114 1.1 chs * 115 1.1 chs * Byteswap implications: 116 1.1 chs * 117 1.1 chs * Since the SA attributes are not entirely self describing we can't do 118 1.1 chs * the normal byteswap processing. The special ZAP layout attribute and 119 1.1 chs * attribute registration attributes define the byteswap function and the 120 1.1 chs * size of the attributes, unless it is variable sized. 121 1.1 chs * The normal ZFS byteswapping infrastructure assumes you don't need 122 1.1 chs * to read any objects in order to do the necessary byteswapping. Whereas 123 1.1 chs * SA attributes can only be properly byteswapped if the dataset is opened 124 1.1 chs * and the layout/attribute ZAP attributes are available. Because of this 125 1.1 chs * the SA attributes will be byteswapped when they are first accessed by 126 1.1 chs * the SA code that will read the SA data. 127 1.1 chs */ 128 1.1 chs 129 1.1 chs typedef void (sa_iterfunc_t)(void *hdr, void *addr, sa_attr_type_t, 130 1.1 chs uint16_t length, int length_idx, boolean_t, void *userp); 131 1.1 chs 132 1.1 chs static int sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype); 133 1.1 chs static void sa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab); 134 1.1 chs static void *sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype, 135 1.1 chs void *data); 136 1.1 chs static void sa_idx_tab_rele(objset_t *os, void *arg); 137 1.1 chs static void sa_copy_data(sa_data_locator_t *func, void *start, void *target, 138 1.1 chs int buflen); 139 1.1 chs static int sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr, 140 1.1 chs sa_data_op_t action, sa_data_locator_t *locator, void *datastart, 141 1.1 chs uint16_t buflen, dmu_tx_t *tx); 142 1.1 chs 143 1.1 chs arc_byteswap_func_t *sa_bswap_table[] = { 144 1.1 chs byteswap_uint64_array, 145 1.1 chs byteswap_uint32_array, 146 1.1 chs byteswap_uint16_array, 147 1.1 chs byteswap_uint8_array, 148 1.1 chs zfs_acl_byteswap, 149 1.1 chs }; 150 1.1 chs 151 1.1 chs #define SA_COPY_DATA(f, s, t, l) \ 152 1.1 chs { \ 153 1.1 chs if (f == NULL) { \ 154 1.1 chs if (l == 8) { \ 155 1.1 chs *(uint64_t *)t = *(uint64_t *)s; \ 156 1.1 chs } else if (l == 16) { \ 157 1.1 chs *(uint64_t *)t = *(uint64_t *)s; \ 158 1.1 chs *(uint64_t *)((uintptr_t)t + 8) = \ 159 1.1 chs *(uint64_t *)((uintptr_t)s + 8); \ 160 1.1 chs } else { \ 161 1.1 chs bcopy(s, t, l); \ 162 1.1 chs } \ 163 1.1 chs } else \ 164 1.1 chs sa_copy_data(f, s, t, l); \ 165 1.1 chs } 166 1.1 chs 167 1.1 chs /* 168 1.1 chs * This table is fixed and cannot be changed. Its purpose is to 169 1.1 chs * allow the SA code to work with both old/new ZPL file systems. 170 1.1 chs * It contains the list of legacy attributes. These attributes aren't 171 1.1 chs * stored in the "attribute" registry zap objects, since older ZPL file systems 172 1.1 chs * won't have the registry. Only objsets of type ZFS_TYPE_FILESYSTEM will 173 1.1 chs * use this static table. 174 1.1 chs */ 175 1.1 chs sa_attr_reg_t sa_legacy_attrs[] = { 176 1.1 chs {"ZPL_ATIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 0}, 177 1.1 chs {"ZPL_MTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 1}, 178 1.1 chs {"ZPL_CTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 2}, 179 1.1 chs {"ZPL_CRTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 3}, 180 1.1 chs {"ZPL_GEN", sizeof (uint64_t), SA_UINT64_ARRAY, 4}, 181 1.1 chs {"ZPL_MODE", sizeof (uint64_t), SA_UINT64_ARRAY, 5}, 182 1.1 chs {"ZPL_SIZE", sizeof (uint64_t), SA_UINT64_ARRAY, 6}, 183 1.1 chs {"ZPL_PARENT", sizeof (uint64_t), SA_UINT64_ARRAY, 7}, 184 1.1 chs {"ZPL_LINKS", sizeof (uint64_t), SA_UINT64_ARRAY, 8}, 185 1.1 chs {"ZPL_XATTR", sizeof (uint64_t), SA_UINT64_ARRAY, 9}, 186 1.1 chs {"ZPL_RDEV", sizeof (uint64_t), SA_UINT64_ARRAY, 10}, 187 1.1 chs {"ZPL_FLAGS", sizeof (uint64_t), SA_UINT64_ARRAY, 11}, 188 1.1 chs {"ZPL_UID", sizeof (uint64_t), SA_UINT64_ARRAY, 12}, 189 1.1 chs {"ZPL_GID", sizeof (uint64_t), SA_UINT64_ARRAY, 13}, 190 1.1 chs {"ZPL_PAD", sizeof (uint64_t) * 4, SA_UINT64_ARRAY, 14}, 191 1.1 chs {"ZPL_ZNODE_ACL", 88, SA_UINT8_ARRAY, 15}, 192 1.1 chs }; 193 1.1 chs 194 1.1 chs /* 195 1.1 chs * This is only used for objects of type DMU_OT_ZNODE 196 1.1 chs */ 197 1.1 chs sa_attr_type_t sa_legacy_zpl_layout[] = { 198 1.1 chs 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 199 1.1 chs }; 200 1.1 chs 201 1.1 chs /* 202 1.1 chs * Special dummy layout used for buffers with no attributes. 203 1.1 chs */ 204 1.1 chs sa_attr_type_t sa_dummy_zpl_layout[] = { 0 }; 205 1.1 chs 206 1.1 chs static int sa_legacy_attr_count = 16; 207 1.1 chs static kmem_cache_t *sa_cache = NULL; 208 1.1 chs 209 1.1 chs /*ARGSUSED*/ 210 1.1 chs static int 211 1.1 chs sa_cache_constructor(void *buf, void *unused, int kmflag) 212 1.1 chs { 213 1.1 chs sa_handle_t *hdl = buf; 214 1.1 chs 215 1.1 chs mutex_init(&hdl->sa_lock, NULL, MUTEX_DEFAULT, NULL); 216 1.1 chs return (0); 217 1.1 chs } 218 1.1 chs 219 1.1 chs /*ARGSUSED*/ 220 1.1 chs static void 221 1.1 chs sa_cache_destructor(void *buf, void *unused) 222 1.1 chs { 223 1.1 chs sa_handle_t *hdl = buf; 224 1.2 chs 225 1.1 chs mutex_destroy(&hdl->sa_lock); 226 1.1 chs } 227 1.1 chs 228 1.1 chs void 229 1.1 chs sa_cache_init(void) 230 1.1 chs { 231 1.1 chs sa_cache = kmem_cache_create("sa_cache", 232 1.1 chs sizeof (sa_handle_t), 0, sa_cache_constructor, 233 1.1 chs sa_cache_destructor, NULL, NULL, NULL, 0); 234 1.1 chs } 235 1.1 chs 236 1.1 chs void 237 1.1 chs sa_cache_fini(void) 238 1.1 chs { 239 1.1 chs if (sa_cache) 240 1.1 chs kmem_cache_destroy(sa_cache); 241 1.1 chs } 242 1.1 chs 243 1.1 chs static int 244 1.1 chs layout_num_compare(const void *arg1, const void *arg2) 245 1.1 chs { 246 1.1 chs const sa_lot_t *node1 = arg1; 247 1.1 chs const sa_lot_t *node2 = arg2; 248 1.1 chs 249 1.1 chs if (node1->lot_num > node2->lot_num) 250 1.1 chs return (1); 251 1.1 chs else if (node1->lot_num < node2->lot_num) 252 1.1 chs return (-1); 253 1.1 chs return (0); 254 1.1 chs } 255 1.1 chs 256 1.1 chs static int 257 1.1 chs layout_hash_compare(const void *arg1, const void *arg2) 258 1.1 chs { 259 1.1 chs const sa_lot_t *node1 = arg1; 260 1.1 chs const sa_lot_t *node2 = arg2; 261 1.1 chs 262 1.1 chs if (node1->lot_hash > node2->lot_hash) 263 1.1 chs return (1); 264 1.1 chs if (node1->lot_hash < node2->lot_hash) 265 1.1 chs return (-1); 266 1.1 chs if (node1->lot_instance > node2->lot_instance) 267 1.1 chs return (1); 268 1.1 chs if (node1->lot_instance < node2->lot_instance) 269 1.1 chs return (-1); 270 1.1 chs return (0); 271 1.1 chs } 272 1.1 chs 273 1.1 chs boolean_t 274 1.1 chs sa_layout_equal(sa_lot_t *tbf, sa_attr_type_t *attrs, int count) 275 1.1 chs { 276 1.1 chs int i; 277 1.1 chs 278 1.1 chs if (count != tbf->lot_attr_count) 279 1.1 chs return (1); 280 1.1 chs 281 1.1 chs for (i = 0; i != count; i++) { 282 1.1 chs if (attrs[i] != tbf->lot_attrs[i]) 283 1.1 chs return (1); 284 1.1 chs } 285 1.1 chs return (0); 286 1.1 chs } 287 1.1 chs 288 1.1 chs #define SA_ATTR_HASH(attr) (zfs_crc64_table[(-1ULL ^ attr) & 0xFF]) 289 1.1 chs 290 1.1 chs static uint64_t 291 1.1 chs sa_layout_info_hash(sa_attr_type_t *attrs, int attr_count) 292 1.1 chs { 293 1.1 chs int i; 294 1.1 chs uint64_t crc = -1ULL; 295 1.1 chs 296 1.1 chs for (i = 0; i != attr_count; i++) 297 1.1 chs crc ^= SA_ATTR_HASH(attrs[i]); 298 1.1 chs 299 1.1 chs return (crc); 300 1.1 chs } 301 1.1 chs 302 1.1 chs static int 303 1.1 chs sa_get_spill(sa_handle_t *hdl) 304 1.1 chs { 305 1.1 chs int rc; 306 1.1 chs if (hdl->sa_spill == NULL) { 307 1.1 chs if ((rc = dmu_spill_hold_existing(hdl->sa_bonus, NULL, 308 1.1 chs &hdl->sa_spill)) == 0) 309 1.1 chs VERIFY(0 == sa_build_index(hdl, SA_SPILL)); 310 1.1 chs } else { 311 1.1 chs rc = 0; 312 1.1 chs } 313 1.1 chs 314 1.1 chs return (rc); 315 1.1 chs } 316 1.1 chs 317 1.1 chs /* 318 1.1 chs * Main attribute lookup/update function 319 1.1 chs * returns 0 for success or non zero for failures 320 1.1 chs * 321 1.1 chs * Operates on bulk array, first failure will abort further processing 322 1.1 chs */ 323 1.1 chs int 324 1.1 chs sa_attr_op(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count, 325 1.1 chs sa_data_op_t data_op, dmu_tx_t *tx) 326 1.1 chs { 327 1.1 chs sa_os_t *sa = hdl->sa_os->os_sa; 328 1.1 chs int i; 329 1.1 chs int error = 0; 330 1.1 chs sa_buf_type_t buftypes; 331 1.1 chs 332 1.1 chs buftypes = 0; 333 1.1 chs 334 1.1 chs ASSERT(count > 0); 335 1.1 chs for (i = 0; i != count; i++) { 336 1.1 chs ASSERT(bulk[i].sa_attr <= hdl->sa_os->os_sa->sa_num_attrs); 337 1.1 chs 338 1.1 chs bulk[i].sa_addr = NULL; 339 1.1 chs /* First check the bonus buffer */ 340 1.1 chs 341 1.1 chs if (hdl->sa_bonus_tab && TOC_ATTR_PRESENT( 342 1.1 chs hdl->sa_bonus_tab->sa_idx_tab[bulk[i].sa_attr])) { 343 1.1 chs SA_ATTR_INFO(sa, hdl->sa_bonus_tab, 344 1.1 chs SA_GET_HDR(hdl, SA_BONUS), 345 1.1 chs bulk[i].sa_attr, bulk[i], SA_BONUS, hdl); 346 1.1 chs if (tx && !(buftypes & SA_BONUS)) { 347 1.1 chs dmu_buf_will_dirty(hdl->sa_bonus, tx); 348 1.1 chs buftypes |= SA_BONUS; 349 1.1 chs } 350 1.1 chs } 351 1.1 chs if (bulk[i].sa_addr == NULL && 352 1.1 chs ((error = sa_get_spill(hdl)) == 0)) { 353 1.1 chs if (TOC_ATTR_PRESENT( 354 1.1 chs hdl->sa_spill_tab->sa_idx_tab[bulk[i].sa_attr])) { 355 1.1 chs SA_ATTR_INFO(sa, hdl->sa_spill_tab, 356 1.1 chs SA_GET_HDR(hdl, SA_SPILL), 357 1.1 chs bulk[i].sa_attr, bulk[i], SA_SPILL, hdl); 358 1.1 chs if (tx && !(buftypes & SA_SPILL) && 359 1.1 chs bulk[i].sa_size == bulk[i].sa_length) { 360 1.1 chs dmu_buf_will_dirty(hdl->sa_spill, tx); 361 1.1 chs buftypes |= SA_SPILL; 362 1.1 chs } 363 1.1 chs } 364 1.1 chs } 365 1.1 chs if (error && error != ENOENT) { 366 1.1 chs return ((error == ECKSUM) ? EIO : error); 367 1.1 chs } 368 1.1 chs 369 1.1 chs switch (data_op) { 370 1.1 chs case SA_LOOKUP: 371 1.1 chs if (bulk[i].sa_addr == NULL) 372 1.1 chs return (SET_ERROR(ENOENT)); 373 1.1 chs if (bulk[i].sa_data) { 374 1.1 chs SA_COPY_DATA(bulk[i].sa_data_func, 375 1.1 chs bulk[i].sa_addr, bulk[i].sa_data, 376 1.1 chs bulk[i].sa_size); 377 1.1 chs } 378 1.1 chs continue; 379 1.1 chs 380 1.1 chs case SA_UPDATE: 381 1.1 chs /* existing rewrite of attr */ 382 1.1 chs if (bulk[i].sa_addr && 383 1.1 chs bulk[i].sa_size == bulk[i].sa_length) { 384 1.1 chs SA_COPY_DATA(bulk[i].sa_data_func, 385 1.1 chs bulk[i].sa_data, bulk[i].sa_addr, 386 1.1 chs bulk[i].sa_length); 387 1.1 chs continue; 388 1.1 chs } else if (bulk[i].sa_addr) { /* attr size change */ 389 1.1 chs error = sa_modify_attrs(hdl, bulk[i].sa_attr, 390 1.1 chs SA_REPLACE, bulk[i].sa_data_func, 391 1.1 chs bulk[i].sa_data, bulk[i].sa_length, tx); 392 1.1 chs } else { /* adding new attribute */ 393 1.1 chs error = sa_modify_attrs(hdl, bulk[i].sa_attr, 394 1.1 chs SA_ADD, bulk[i].sa_data_func, 395 1.1 chs bulk[i].sa_data, bulk[i].sa_length, tx); 396 1.1 chs } 397 1.1 chs if (error) 398 1.1 chs return (error); 399 1.1 chs break; 400 1.1 chs } 401 1.1 chs } 402 1.1 chs return (error); 403 1.1 chs } 404 1.1 chs 405 1.1 chs static sa_lot_t * 406 1.1 chs sa_add_layout_entry(objset_t *os, sa_attr_type_t *attrs, int attr_count, 407 1.1 chs uint64_t lot_num, uint64_t hash, boolean_t zapadd, dmu_tx_t *tx) 408 1.1 chs { 409 1.1 chs sa_os_t *sa = os->os_sa; 410 1.1 chs sa_lot_t *tb, *findtb; 411 1.1 chs int i; 412 1.1 chs avl_index_t loc; 413 1.1 chs 414 1.1 chs ASSERT(MUTEX_HELD(&sa->sa_lock)); 415 1.1 chs tb = kmem_zalloc(sizeof (sa_lot_t), KM_SLEEP); 416 1.1 chs tb->lot_attr_count = attr_count; 417 1.2 chs #ifdef __NetBSD__ 418 1.2 chs if (attr_count != 0) 419 1.2 chs #endif 420 1.1 chs tb->lot_attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count, 421 1.1 chs KM_SLEEP); 422 1.1 chs bcopy(attrs, tb->lot_attrs, sizeof (sa_attr_type_t) * attr_count); 423 1.1 chs tb->lot_num = lot_num; 424 1.1 chs tb->lot_hash = hash; 425 1.1 chs tb->lot_instance = 0; 426 1.1 chs 427 1.1 chs if (zapadd) { 428 1.1 chs char attr_name[8]; 429 1.1 chs 430 1.1 chs if (sa->sa_layout_attr_obj == 0) { 431 1.1 chs sa->sa_layout_attr_obj = zap_create_link(os, 432 1.1 chs DMU_OT_SA_ATTR_LAYOUTS, 433 1.1 chs sa->sa_master_obj, SA_LAYOUTS, tx); 434 1.1 chs } 435 1.1 chs 436 1.1 chs (void) snprintf(attr_name, sizeof (attr_name), 437 1.1 chs "%d", (int)lot_num); 438 1.1 chs VERIFY(0 == zap_update(os, os->os_sa->sa_layout_attr_obj, 439 1.1 chs attr_name, 2, attr_count, attrs, tx)); 440 1.1 chs } 441 1.1 chs 442 1.1 chs list_create(&tb->lot_idx_tab, sizeof (sa_idx_tab_t), 443 1.1 chs offsetof(sa_idx_tab_t, sa_next)); 444 1.1 chs 445 1.1 chs for (i = 0; i != attr_count; i++) { 446 1.1 chs if (sa->sa_attr_table[tb->lot_attrs[i]].sa_length == 0) 447 1.1 chs tb->lot_var_sizes++; 448 1.1 chs } 449 1.1 chs 450 1.1 chs avl_add(&sa->sa_layout_num_tree, tb); 451 1.1 chs 452 1.1 chs /* verify we don't have a hash collision */ 453 1.1 chs if ((findtb = avl_find(&sa->sa_layout_hash_tree, tb, &loc)) != NULL) { 454 1.1 chs for (; findtb && findtb->lot_hash == hash; 455 1.1 chs findtb = AVL_NEXT(&sa->sa_layout_hash_tree, findtb)) { 456 1.1 chs if (findtb->lot_instance != tb->lot_instance) 457 1.1 chs break; 458 1.1 chs tb->lot_instance++; 459 1.1 chs } 460 1.1 chs } 461 1.1 chs avl_add(&sa->sa_layout_hash_tree, tb); 462 1.1 chs return (tb); 463 1.1 chs } 464 1.1 chs 465 1.1 chs static void 466 1.1 chs sa_find_layout(objset_t *os, uint64_t hash, sa_attr_type_t *attrs, 467 1.1 chs int count, dmu_tx_t *tx, sa_lot_t **lot) 468 1.1 chs { 469 1.1 chs sa_lot_t *tb, tbsearch; 470 1.1 chs avl_index_t loc; 471 1.1 chs sa_os_t *sa = os->os_sa; 472 1.1 chs boolean_t found = B_FALSE; 473 1.1 chs 474 1.1 chs mutex_enter(&sa->sa_lock); 475 1.1 chs tbsearch.lot_hash = hash; 476 1.1 chs tbsearch.lot_instance = 0; 477 1.1 chs tb = avl_find(&sa->sa_layout_hash_tree, &tbsearch, &loc); 478 1.1 chs if (tb) { 479 1.1 chs for (; tb && tb->lot_hash == hash; 480 1.1 chs tb = AVL_NEXT(&sa->sa_layout_hash_tree, tb)) { 481 1.1 chs if (sa_layout_equal(tb, attrs, count) == 0) { 482 1.1 chs found = B_TRUE; 483 1.1 chs break; 484 1.1 chs } 485 1.1 chs } 486 1.1 chs } 487 1.1 chs if (!found) { 488 1.1 chs tb = sa_add_layout_entry(os, attrs, count, 489 1.1 chs avl_numnodes(&sa->sa_layout_num_tree), hash, B_TRUE, tx); 490 1.1 chs } 491 1.1 chs mutex_exit(&sa->sa_lock); 492 1.1 chs *lot = tb; 493 1.1 chs } 494 1.1 chs 495 1.1 chs static int 496 1.1 chs sa_resize_spill(sa_handle_t *hdl, uint32_t size, dmu_tx_t *tx) 497 1.1 chs { 498 1.1 chs int error; 499 1.1 chs uint32_t blocksize; 500 1.1 chs 501 1.1 chs if (size == 0) { 502 1.1 chs blocksize = SPA_MINBLOCKSIZE; 503 1.1 chs } else if (size > SPA_OLD_MAXBLOCKSIZE) { 504 1.1 chs ASSERT(0); 505 1.1 chs return (SET_ERROR(EFBIG)); 506 1.1 chs } else { 507 1.1 chs blocksize = P2ROUNDUP_TYPED(size, SPA_MINBLOCKSIZE, uint32_t); 508 1.1 chs } 509 1.1 chs 510 1.1 chs error = dbuf_spill_set_blksz(hdl->sa_spill, blocksize, tx); 511 1.1 chs ASSERT(error == 0); 512 1.1 chs return (error); 513 1.1 chs } 514 1.1 chs 515 1.1 chs static void 516 1.1 chs sa_copy_data(sa_data_locator_t *func, void *datastart, void *target, int buflen) 517 1.1 chs { 518 1.1 chs if (func == NULL) { 519 1.1 chs bcopy(datastart, target, buflen); 520 1.1 chs } else { 521 1.1 chs boolean_t start; 522 1.1 chs int bytes; 523 1.1 chs void *dataptr; 524 1.1 chs void *saptr = target; 525 1.1 chs uint32_t length; 526 1.1 chs 527 1.1 chs start = B_TRUE; 528 1.1 chs bytes = 0; 529 1.1 chs while (bytes < buflen) { 530 1.1 chs func(&dataptr, &length, buflen, start, datastart); 531 1.1 chs bcopy(dataptr, saptr, length); 532 1.1 chs saptr = (void *)((caddr_t)saptr + length); 533 1.1 chs bytes += length; 534 1.1 chs start = B_FALSE; 535 1.1 chs } 536 1.1 chs } 537 1.1 chs } 538 1.1 chs 539 1.1 chs /* 540 1.1 chs * Determine several different sizes 541 1.1 chs * first the sa header size 542 1.1 chs * the number of bytes to be stored 543 1.1 chs * if spill would occur the index in the attribute array is returned 544 1.1 chs * 545 1.1 chs * the boolean will_spill will be set when spilling is necessary. It 546 1.1 chs * is only set when the buftype is SA_BONUS 547 1.1 chs */ 548 1.1 chs static int 549 1.1 chs sa_find_sizes(sa_os_t *sa, sa_bulk_attr_t *attr_desc, int attr_count, 550 1.1 chs dmu_buf_t *db, sa_buf_type_t buftype, int *index, int *total, 551 1.1 chs boolean_t *will_spill) 552 1.1 chs { 553 1.1 chs int var_size = 0; 554 1.1 chs int i; 555 1.1 chs int full_space; 556 1.1 chs int hdrsize; 557 1.1 chs int extra_hdrsize; 558 1.1 chs 559 1.1 chs if (buftype == SA_BONUS && sa->sa_force_spill) { 560 1.1 chs *total = 0; 561 1.1 chs *index = 0; 562 1.1 chs *will_spill = B_TRUE; 563 1.1 chs return (0); 564 1.1 chs } 565 1.1 chs 566 1.1 chs *index = -1; 567 1.1 chs *total = 0; 568 1.1 chs *will_spill = B_FALSE; 569 1.1 chs 570 1.1 chs extra_hdrsize = 0; 571 1.1 chs hdrsize = (SA_BONUSTYPE_FROM_DB(db) == DMU_OT_ZNODE) ? 0 : 572 1.1 chs sizeof (sa_hdr_phys_t); 573 1.1 chs 574 1.1 chs full_space = (buftype == SA_BONUS) ? DN_MAX_BONUSLEN : db->db_size; 575 1.1 chs ASSERT(IS_P2ALIGNED(full_space, 8)); 576 1.1 chs 577 1.1 chs for (i = 0; i != attr_count; i++) { 578 1.1 chs boolean_t is_var_sz; 579 1.1 chs 580 1.1 chs *total = P2ROUNDUP(*total, 8); 581 1.1 chs *total += attr_desc[i].sa_length; 582 1.1 chs if (*will_spill) 583 1.1 chs continue; 584 1.1 chs 585 1.1 chs is_var_sz = (SA_REGISTERED_LEN(sa, attr_desc[i].sa_attr) == 0); 586 1.1 chs if (is_var_sz) { 587 1.1 chs var_size++; 588 1.1 chs } 589 1.1 chs 590 1.1 chs if (is_var_sz && var_size > 1) { 591 1.1 chs /* 592 1.1 chs * Don't worry that the spill block might overflow. 593 1.1 chs * It will be resized if needed in sa_build_layouts(). 594 1.1 chs */ 595 1.1 chs if (buftype == SA_SPILL || 596 1.1 chs P2ROUNDUP(hdrsize + sizeof (uint16_t), 8) + 597 1.1 chs *total < full_space) { 598 1.1 chs /* 599 1.1 chs * Account for header space used by array of 600 1.1 chs * optional sizes of variable-length attributes. 601 1.1 chs * Record the extra header size in case this 602 1.1 chs * increase needs to be reversed due to 603 1.1 chs * spill-over. 604 1.1 chs */ 605 1.1 chs hdrsize += sizeof (uint16_t); 606 1.1 chs if (*index != -1) 607 1.1 chs extra_hdrsize += sizeof (uint16_t); 608 1.1 chs } else { 609 1.1 chs ASSERT(buftype == SA_BONUS); 610 1.1 chs if (*index == -1) 611 1.1 chs *index = i; 612 1.1 chs *will_spill = B_TRUE; 613 1.1 chs continue; 614 1.1 chs } 615 1.1 chs } 616 1.1 chs 617 1.1 chs /* 618 1.1 chs * find index of where spill *could* occur. 619 1.1 chs * Then continue to count of remainder attribute 620 1.1 chs * space. The sum is used later for sizing bonus 621 1.1 chs * and spill buffer. 622 1.1 chs */ 623 1.1 chs if (buftype == SA_BONUS && *index == -1 && 624 1.1 chs (*total + P2ROUNDUP(hdrsize, 8)) > 625 1.1 chs (full_space - sizeof (blkptr_t))) { 626 1.1 chs *index = i; 627 1.1 chs } 628 1.1 chs 629 1.1 chs if ((*total + P2ROUNDUP(hdrsize, 8)) > full_space && 630 1.1 chs buftype == SA_BONUS) 631 1.1 chs *will_spill = B_TRUE; 632 1.1 chs } 633 1.1 chs 634 1.1 chs if (*will_spill) 635 1.1 chs hdrsize -= extra_hdrsize; 636 1.1 chs 637 1.1 chs hdrsize = P2ROUNDUP(hdrsize, 8); 638 1.1 chs return (hdrsize); 639 1.1 chs } 640 1.1 chs 641 1.1 chs #define BUF_SPACE_NEEDED(total, header) (total + header) 642 1.1 chs 643 1.1 chs /* 644 1.1 chs * Find layout that corresponds to ordering of attributes 645 1.1 chs * If not found a new layout number is created and added to 646 1.1 chs * persistent layout tables. 647 1.1 chs */ 648 1.1 chs static int 649 1.1 chs sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count, 650 1.1 chs dmu_tx_t *tx) 651 1.1 chs { 652 1.1 chs sa_os_t *sa = hdl->sa_os->os_sa; 653 1.1 chs uint64_t hash; 654 1.1 chs sa_buf_type_t buftype; 655 1.1 chs sa_hdr_phys_t *sahdr; 656 1.1 chs void *data_start; 657 1.1 chs int buf_space; 658 1.1 chs sa_attr_type_t *attrs, *attrs_start; 659 1.1 chs int i, lot_count; 660 1.1 chs int hdrsize; 661 1.1 chs int spillhdrsize = 0; 662 1.1 chs int used; 663 1.1 chs dmu_object_type_t bonustype; 664 1.1 chs sa_lot_t *lot; 665 1.1 chs int len_idx; 666 1.1 chs int spill_used; 667 1.1 chs boolean_t spilling; 668 1.1 chs 669 1.1 chs dmu_buf_will_dirty(hdl->sa_bonus, tx); 670 1.1 chs bonustype = SA_BONUSTYPE_FROM_DB(hdl->sa_bonus); 671 1.1 chs 672 1.1 chs /* first determine bonus header size and sum of all attributes */ 673 1.1 chs hdrsize = sa_find_sizes(sa, attr_desc, attr_count, hdl->sa_bonus, 674 1.1 chs SA_BONUS, &i, &used, &spilling); 675 1.1 chs 676 1.1 chs if (used > SPA_OLD_MAXBLOCKSIZE) 677 1.1 chs return (SET_ERROR(EFBIG)); 678 1.1 chs 679 1.1 chs VERIFY(0 == dmu_set_bonus(hdl->sa_bonus, spilling ? 680 1.1 chs MIN(DN_MAX_BONUSLEN - sizeof (blkptr_t), used + hdrsize) : 681 1.1 chs used + hdrsize, tx)); 682 1.1 chs 683 1.1 chs ASSERT((bonustype == DMU_OT_ZNODE && spilling == 0) || 684 1.1 chs bonustype == DMU_OT_SA); 685 1.1 chs 686 1.1 chs /* setup and size spill buffer when needed */ 687 1.1 chs if (spilling) { 688 1.1 chs boolean_t dummy; 689 1.1 chs 690 1.1 chs if (hdl->sa_spill == NULL) { 691 1.1 chs VERIFY(dmu_spill_hold_by_bonus(hdl->sa_bonus, NULL, 692 1.1 chs &hdl->sa_spill) == 0); 693 1.1 chs } 694 1.1 chs dmu_buf_will_dirty(hdl->sa_spill, tx); 695 1.1 chs 696 1.1 chs spillhdrsize = sa_find_sizes(sa, &attr_desc[i], 697 1.1 chs attr_count - i, hdl->sa_spill, SA_SPILL, &i, 698 1.1 chs &spill_used, &dummy); 699 1.1 chs 700 1.1 chs if (spill_used > SPA_OLD_MAXBLOCKSIZE) 701 1.1 chs return (SET_ERROR(EFBIG)); 702 1.1 chs 703 1.1 chs buf_space = hdl->sa_spill->db_size - spillhdrsize; 704 1.1 chs if (BUF_SPACE_NEEDED(spill_used, spillhdrsize) > 705 1.1 chs hdl->sa_spill->db_size) 706 1.1 chs VERIFY(0 == sa_resize_spill(hdl, 707 1.1 chs BUF_SPACE_NEEDED(spill_used, spillhdrsize), tx)); 708 1.1 chs } 709 1.1 chs 710 1.1 chs /* setup starting pointers to lay down data */ 711 1.1 chs data_start = (void *)((uintptr_t)hdl->sa_bonus->db_data + hdrsize); 712 1.1 chs sahdr = (sa_hdr_phys_t *)hdl->sa_bonus->db_data; 713 1.1 chs buftype = SA_BONUS; 714 1.1 chs 715 1.1 chs if (spilling) 716 1.1 chs buf_space = (sa->sa_force_spill) ? 717 1.1 chs 0 : SA_BLKPTR_SPACE - hdrsize; 718 1.1 chs else 719 1.1 chs buf_space = hdl->sa_bonus->db_size - hdrsize; 720 1.1 chs 721 1.1 chs attrs_start = attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count, 722 1.1 chs KM_SLEEP); 723 1.1 chs lot_count = 0; 724 1.1 chs 725 1.1 chs for (i = 0, len_idx = 0, hash = -1ULL; i != attr_count; i++) { 726 1.1 chs uint16_t length; 727 1.1 chs 728 1.1 chs ASSERT(IS_P2ALIGNED(data_start, 8)); 729 1.1 chs ASSERT(IS_P2ALIGNED(buf_space, 8)); 730 1.1 chs attrs[i] = attr_desc[i].sa_attr; 731 1.1 chs length = SA_REGISTERED_LEN(sa, attrs[i]); 732 1.1 chs if (length == 0) 733 1.1 chs length = attr_desc[i].sa_length; 734 1.1 chs else 735 1.1 chs VERIFY(length == attr_desc[i].sa_length); 736 1.1 chs 737 1.1 chs if (buf_space < length) { /* switch to spill buffer */ 738 1.1 chs VERIFY(spilling); 739 1.1 chs VERIFY(bonustype == DMU_OT_SA); 740 1.1 chs if (buftype == SA_BONUS && !sa->sa_force_spill) { 741 1.1 chs sa_find_layout(hdl->sa_os, hash, attrs_start, 742 1.1 chs lot_count, tx, &lot); 743 1.1 chs SA_SET_HDR(sahdr, lot->lot_num, hdrsize); 744 1.1 chs } 745 1.1 chs 746 1.1 chs buftype = SA_SPILL; 747 1.1 chs hash = -1ULL; 748 1.1 chs len_idx = 0; 749 1.1 chs 750 1.1 chs sahdr = (sa_hdr_phys_t *)hdl->sa_spill->db_data; 751 1.1 chs sahdr->sa_magic = SA_MAGIC; 752 1.1 chs data_start = (void *)((uintptr_t)sahdr + 753 1.1 chs spillhdrsize); 754 1.1 chs attrs_start = &attrs[i]; 755 1.1 chs buf_space = hdl->sa_spill->db_size - spillhdrsize; 756 1.1 chs lot_count = 0; 757 1.1 chs } 758 1.1 chs hash ^= SA_ATTR_HASH(attrs[i]); 759 1.1 chs attr_desc[i].sa_addr = data_start; 760 1.1 chs attr_desc[i].sa_size = length; 761 1.1 chs SA_COPY_DATA(attr_desc[i].sa_data_func, attr_desc[i].sa_data, 762 1.1 chs data_start, length); 763 1.1 chs if (sa->sa_attr_table[attrs[i]].sa_length == 0) { 764 1.1 chs sahdr->sa_lengths[len_idx++] = length; 765 1.1 chs } 766 1.1 chs VERIFY((uintptr_t)data_start % 8 == 0); 767 1.1 chs data_start = (void *)P2ROUNDUP(((uintptr_t)data_start + 768 1.1 chs length), 8); 769 1.1 chs buf_space -= P2ROUNDUP(length, 8); 770 1.1 chs lot_count++; 771 1.1 chs } 772 1.1 chs 773 1.1 chs sa_find_layout(hdl->sa_os, hash, attrs_start, lot_count, tx, &lot); 774 1.1 chs 775 1.1 chs /* 776 1.1 chs * Verify that old znodes always have layout number 0. 777 1.1 chs * Must be DMU_OT_SA for arbitrary layouts 778 1.1 chs */ 779 1.1 chs VERIFY((bonustype == DMU_OT_ZNODE && lot->lot_num == 0) || 780 1.1 chs (bonustype == DMU_OT_SA && lot->lot_num > 1)); 781 1.1 chs 782 1.1 chs if (bonustype == DMU_OT_SA) { 783 1.1 chs SA_SET_HDR(sahdr, lot->lot_num, 784 1.1 chs buftype == SA_BONUS ? hdrsize : spillhdrsize); 785 1.1 chs } 786 1.1 chs 787 1.1 chs kmem_free(attrs, sizeof (sa_attr_type_t) * attr_count); 788 1.1 chs if (hdl->sa_bonus_tab) { 789 1.1 chs sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab); 790 1.1 chs hdl->sa_bonus_tab = NULL; 791 1.1 chs } 792 1.1 chs if (!sa->sa_force_spill) 793 1.1 chs VERIFY(0 == sa_build_index(hdl, SA_BONUS)); 794 1.1 chs if (hdl->sa_spill) { 795 1.1 chs sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab); 796 1.1 chs if (!spilling) { 797 1.1 chs /* 798 1.1 chs * remove spill block that is no longer needed. 799 1.1 chs */ 800 1.1 chs dmu_buf_rele(hdl->sa_spill, NULL); 801 1.1 chs hdl->sa_spill = NULL; 802 1.1 chs hdl->sa_spill_tab = NULL; 803 1.1 chs VERIFY(0 == dmu_rm_spill(hdl->sa_os, 804 1.1 chs sa_handle_object(hdl), tx)); 805 1.1 chs } else { 806 1.1 chs VERIFY(0 == sa_build_index(hdl, SA_SPILL)); 807 1.1 chs } 808 1.1 chs } 809 1.1 chs 810 1.1 chs return (0); 811 1.1 chs } 812 1.1 chs 813 1.1 chs static void 814 1.1 chs sa_free_attr_table(sa_os_t *sa) 815 1.1 chs { 816 1.1 chs int i; 817 1.1 chs 818 1.1 chs if (sa->sa_attr_table == NULL) 819 1.1 chs return; 820 1.1 chs 821 1.1 chs for (i = 0; i != sa->sa_num_attrs; i++) { 822 1.1 chs if (sa->sa_attr_table[i].sa_name) 823 1.1 chs kmem_free(sa->sa_attr_table[i].sa_name, 824 1.1 chs strlen(sa->sa_attr_table[i].sa_name) + 1); 825 1.1 chs } 826 1.1 chs 827 1.1 chs kmem_free(sa->sa_attr_table, 828 1.1 chs sizeof (sa_attr_table_t) * sa->sa_num_attrs); 829 1.1 chs 830 1.1 chs sa->sa_attr_table = NULL; 831 1.1 chs } 832 1.1 chs 833 1.1 chs static int 834 1.1 chs sa_attr_table_setup(objset_t *os, sa_attr_reg_t *reg_attrs, int count) 835 1.1 chs { 836 1.1 chs sa_os_t *sa = os->os_sa; 837 1.1 chs uint64_t sa_attr_count = 0; 838 1.1 chs uint64_t sa_reg_count = 0; 839 1.1 chs int error = 0; 840 1.1 chs uint64_t attr_value; 841 1.1 chs sa_attr_table_t *tb; 842 1.1 chs zap_cursor_t zc; 843 1.1 chs zap_attribute_t za; 844 1.1 chs int registered_count = 0; 845 1.1 chs int i; 846 1.1 chs dmu_objset_type_t ostype = dmu_objset_type(os); 847 1.1 chs 848 1.1 chs sa->sa_user_table = 849 1.1 chs kmem_zalloc(count * sizeof (sa_attr_type_t), KM_SLEEP); 850 1.1 chs sa->sa_user_table_sz = count * sizeof (sa_attr_type_t); 851 1.1 chs 852 1.1 chs if (sa->sa_reg_attr_obj != 0) { 853 1.1 chs error = zap_count(os, sa->sa_reg_attr_obj, 854 1.1 chs &sa_attr_count); 855 1.1 chs 856 1.1 chs /* 857 1.1 chs * Make sure we retrieved a count and that it isn't zero 858 1.1 chs */ 859 1.1 chs if (error || (error == 0 && sa_attr_count == 0)) { 860 1.1 chs if (error == 0) 861 1.1 chs error = SET_ERROR(EINVAL); 862 1.1 chs goto bail; 863 1.1 chs } 864 1.1 chs sa_reg_count = sa_attr_count; 865 1.1 chs } 866 1.1 chs 867 1.1 chs if (ostype == DMU_OST_ZFS && sa_attr_count == 0) 868 1.1 chs sa_attr_count += sa_legacy_attr_count; 869 1.1 chs 870 1.1 chs /* Allocate attribute numbers for attributes that aren't registered */ 871 1.1 chs for (i = 0; i != count; i++) { 872 1.1 chs boolean_t found = B_FALSE; 873 1.1 chs int j; 874 1.1 chs 875 1.1 chs if (ostype == DMU_OST_ZFS) { 876 1.1 chs for (j = 0; j != sa_legacy_attr_count; j++) { 877 1.1 chs if (strcmp(reg_attrs[i].sa_name, 878 1.1 chs sa_legacy_attrs[j].sa_name) == 0) { 879 1.1 chs sa->sa_user_table[i] = 880 1.1 chs sa_legacy_attrs[j].sa_attr; 881 1.1 chs found = B_TRUE; 882 1.1 chs } 883 1.1 chs } 884 1.1 chs } 885 1.1 chs if (found) 886 1.1 chs continue; 887 1.1 chs 888 1.1 chs if (sa->sa_reg_attr_obj) 889 1.1 chs error = zap_lookup(os, sa->sa_reg_attr_obj, 890 1.1 chs reg_attrs[i].sa_name, 8, 1, &attr_value); 891 1.1 chs else 892 1.1 chs error = SET_ERROR(ENOENT); 893 1.1 chs switch (error) { 894 1.1 chs case ENOENT: 895 1.1 chs sa->sa_user_table[i] = (sa_attr_type_t)sa_attr_count; 896 1.1 chs sa_attr_count++; 897 1.1 chs break; 898 1.1 chs case 0: 899 1.1 chs sa->sa_user_table[i] = ATTR_NUM(attr_value); 900 1.1 chs break; 901 1.1 chs default: 902 1.1 chs goto bail; 903 1.1 chs } 904 1.1 chs } 905 1.1 chs 906 1.1 chs sa->sa_num_attrs = sa_attr_count; 907 1.1 chs tb = sa->sa_attr_table = 908 1.1 chs kmem_zalloc(sizeof (sa_attr_table_t) * sa_attr_count, KM_SLEEP); 909 1.1 chs 910 1.1 chs /* 911 1.1 chs * Attribute table is constructed from requested attribute list, 912 1.1 chs * previously foreign registered attributes, and also the legacy 913 1.1 chs * ZPL set of attributes. 914 1.1 chs */ 915 1.1 chs 916 1.1 chs if (sa->sa_reg_attr_obj) { 917 1.1 chs for (zap_cursor_init(&zc, os, sa->sa_reg_attr_obj); 918 1.1 chs (error = zap_cursor_retrieve(&zc, &za)) == 0; 919 1.1 chs zap_cursor_advance(&zc)) { 920 1.1 chs uint64_t value; 921 1.1 chs value = za.za_first_integer; 922 1.1 chs 923 1.1 chs registered_count++; 924 1.1 chs tb[ATTR_NUM(value)].sa_attr = ATTR_NUM(value); 925 1.1 chs tb[ATTR_NUM(value)].sa_length = ATTR_LENGTH(value); 926 1.1 chs tb[ATTR_NUM(value)].sa_byteswap = ATTR_BSWAP(value); 927 1.1 chs tb[ATTR_NUM(value)].sa_registered = B_TRUE; 928 1.1 chs 929 1.1 chs if (tb[ATTR_NUM(value)].sa_name) { 930 1.1 chs continue; 931 1.1 chs } 932 1.1 chs tb[ATTR_NUM(value)].sa_name = 933 1.1 chs kmem_zalloc(strlen(za.za_name) +1, KM_SLEEP); 934 1.1 chs (void) strlcpy(tb[ATTR_NUM(value)].sa_name, za.za_name, 935 1.1 chs strlen(za.za_name) +1); 936 1.1 chs } 937 1.1 chs zap_cursor_fini(&zc); 938 1.1 chs /* 939 1.1 chs * Make sure we processed the correct number of registered 940 1.1 chs * attributes 941 1.1 chs */ 942 1.1 chs if (registered_count != sa_reg_count) { 943 1.1 chs ASSERT(error != 0); 944 1.1 chs goto bail; 945 1.1 chs } 946 1.1 chs 947 1.1 chs } 948 1.1 chs 949 1.1 chs if (ostype == DMU_OST_ZFS) { 950 1.1 chs for (i = 0; i != sa_legacy_attr_count; i++) { 951 1.1 chs if (tb[i].sa_name) 952 1.1 chs continue; 953 1.1 chs tb[i].sa_attr = sa_legacy_attrs[i].sa_attr; 954 1.1 chs tb[i].sa_length = sa_legacy_attrs[i].sa_length; 955 1.1 chs tb[i].sa_byteswap = sa_legacy_attrs[i].sa_byteswap; 956 1.1 chs tb[i].sa_registered = B_FALSE; 957 1.1 chs tb[i].sa_name = 958 1.1 chs kmem_zalloc(strlen(sa_legacy_attrs[i].sa_name) +1, 959 1.1 chs KM_SLEEP); 960 1.1 chs (void) strlcpy(tb[i].sa_name, 961 1.1 chs sa_legacy_attrs[i].sa_name, 962 1.1 chs strlen(sa_legacy_attrs[i].sa_name) + 1); 963 1.1 chs } 964 1.1 chs } 965 1.1 chs 966 1.1 chs for (i = 0; i != count; i++) { 967 1.1 chs sa_attr_type_t attr_id; 968 1.1 chs 969 1.1 chs attr_id = sa->sa_user_table[i]; 970 1.1 chs if (tb[attr_id].sa_name) 971 1.1 chs continue; 972 1.1 chs 973 1.1 chs tb[attr_id].sa_length = reg_attrs[i].sa_length; 974 1.1 chs tb[attr_id].sa_byteswap = reg_attrs[i].sa_byteswap; 975 1.1 chs tb[attr_id].sa_attr = attr_id; 976 1.1 chs tb[attr_id].sa_name = 977 1.1 chs kmem_zalloc(strlen(reg_attrs[i].sa_name) + 1, KM_SLEEP); 978 1.1 chs (void) strlcpy(tb[attr_id].sa_name, reg_attrs[i].sa_name, 979 1.1 chs strlen(reg_attrs[i].sa_name) + 1); 980 1.1 chs } 981 1.1 chs 982 1.1 chs sa->sa_need_attr_registration = 983 1.1 chs (sa_attr_count != registered_count); 984 1.1 chs 985 1.1 chs return (0); 986 1.1 chs bail: 987 1.1 chs kmem_free(sa->sa_user_table, count * sizeof (sa_attr_type_t)); 988 1.1 chs sa->sa_user_table = NULL; 989 1.1 chs sa_free_attr_table(sa); 990 1.1 chs return ((error != 0) ? error : EINVAL); 991 1.1 chs } 992 1.1 chs 993 1.1 chs int 994 1.1 chs sa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count, 995 1.1 chs sa_attr_type_t **user_table) 996 1.1 chs { 997 1.1 chs zap_cursor_t zc; 998 1.1 chs zap_attribute_t za; 999 1.1 chs sa_os_t *sa; 1000 1.1 chs dmu_objset_type_t ostype = dmu_objset_type(os); 1001 1.1 chs sa_attr_type_t *tb; 1002 1.1 chs int error; 1003 1.1 chs 1004 1.1 chs mutex_enter(&os->os_user_ptr_lock); 1005 1.1 chs if (os->os_sa) { 1006 1.1 chs mutex_enter(&os->os_sa->sa_lock); 1007 1.1 chs mutex_exit(&os->os_user_ptr_lock); 1008 1.1 chs tb = os->os_sa->sa_user_table; 1009 1.1 chs mutex_exit(&os->os_sa->sa_lock); 1010 1.1 chs *user_table = tb; 1011 1.1 chs return (0); 1012 1.1 chs } 1013 1.1 chs 1014 1.1 chs sa = kmem_zalloc(sizeof (sa_os_t), KM_SLEEP); 1015 1.1 chs mutex_init(&sa->sa_lock, NULL, MUTEX_DEFAULT, NULL); 1016 1.1 chs sa->sa_master_obj = sa_obj; 1017 1.1 chs 1018 1.1 chs os->os_sa = sa; 1019 1.1 chs mutex_enter(&sa->sa_lock); 1020 1.1 chs mutex_exit(&os->os_user_ptr_lock); 1021 1.1 chs avl_create(&sa->sa_layout_num_tree, layout_num_compare, 1022 1.1 chs sizeof (sa_lot_t), offsetof(sa_lot_t, lot_num_node)); 1023 1.1 chs avl_create(&sa->sa_layout_hash_tree, layout_hash_compare, 1024 1.1 chs sizeof (sa_lot_t), offsetof(sa_lot_t, lot_hash_node)); 1025 1.1 chs 1026 1.1 chs if (sa_obj) { 1027 1.1 chs error = zap_lookup(os, sa_obj, SA_LAYOUTS, 1028 1.1 chs 8, 1, &sa->sa_layout_attr_obj); 1029 1.1 chs if (error != 0 && error != ENOENT) 1030 1.1 chs goto fail; 1031 1.1 chs error = zap_lookup(os, sa_obj, SA_REGISTRY, 1032 1.1 chs 8, 1, &sa->sa_reg_attr_obj); 1033 1.1 chs if (error != 0 && error != ENOENT) 1034 1.1 chs goto fail; 1035 1.1 chs } 1036 1.1 chs 1037 1.1 chs if ((error = sa_attr_table_setup(os, reg_attrs, count)) != 0) 1038 1.1 chs goto fail; 1039 1.1 chs 1040 1.1 chs if (sa->sa_layout_attr_obj != 0) { 1041 1.1 chs uint64_t layout_count; 1042 1.1 chs 1043 1.1 chs error = zap_count(os, sa->sa_layout_attr_obj, 1044 1.1 chs &layout_count); 1045 1.1 chs 1046 1.1 chs /* 1047 1.1 chs * Layout number count should be > 0 1048 1.1 chs */ 1049 1.1 chs if (error || (error == 0 && layout_count == 0)) { 1050 1.1 chs if (error == 0) 1051 1.1 chs error = SET_ERROR(EINVAL); 1052 1.1 chs goto fail; 1053 1.1 chs } 1054 1.1 chs 1055 1.1 chs for (zap_cursor_init(&zc, os, sa->sa_layout_attr_obj); 1056 1.1 chs (error = zap_cursor_retrieve(&zc, &za)) == 0; 1057 1.1 chs zap_cursor_advance(&zc)) { 1058 1.1 chs sa_attr_type_t *lot_attrs; 1059 1.1 chs uint64_t lot_num; 1060 1.1 chs 1061 1.1 chs lot_attrs = kmem_zalloc(sizeof (sa_attr_type_t) * 1062 1.1 chs za.za_num_integers, KM_SLEEP); 1063 1.1 chs 1064 1.1 chs if ((error = (zap_lookup(os, sa->sa_layout_attr_obj, 1065 1.1 chs za.za_name, 2, za.za_num_integers, 1066 1.1 chs lot_attrs))) != 0) { 1067 1.1 chs kmem_free(lot_attrs, sizeof (sa_attr_type_t) * 1068 1.1 chs za.za_num_integers); 1069 1.1 chs break; 1070 1.1 chs } 1071 1.1 chs VERIFY(ddi_strtoull(za.za_name, NULL, 10, 1072 1.1 chs (unsigned long long *)&lot_num) == 0); 1073 1.1 chs 1074 1.1 chs (void) sa_add_layout_entry(os, lot_attrs, 1075 1.1 chs za.za_num_integers, lot_num, 1076 1.1 chs sa_layout_info_hash(lot_attrs, 1077 1.1 chs za.za_num_integers), B_FALSE, NULL); 1078 1.1 chs kmem_free(lot_attrs, sizeof (sa_attr_type_t) * 1079 1.1 chs za.za_num_integers); 1080 1.1 chs } 1081 1.1 chs zap_cursor_fini(&zc); 1082 1.1 chs 1083 1.1 chs /* 1084 1.1 chs * Make sure layout count matches number of entries added 1085 1.1 chs * to AVL tree 1086 1.1 chs */ 1087 1.1 chs if (avl_numnodes(&sa->sa_layout_num_tree) != layout_count) { 1088 1.1 chs ASSERT(error != 0); 1089 1.1 chs goto fail; 1090 1.1 chs } 1091 1.1 chs } 1092 1.1 chs 1093 1.1 chs /* Add special layout number for old ZNODES */ 1094 1.1 chs if (ostype == DMU_OST_ZFS) { 1095 1.1 chs (void) sa_add_layout_entry(os, sa_legacy_zpl_layout, 1096 1.1 chs sa_legacy_attr_count, 0, 1097 1.1 chs sa_layout_info_hash(sa_legacy_zpl_layout, 1098 1.1 chs sa_legacy_attr_count), B_FALSE, NULL); 1099 1.1 chs 1100 1.1 chs (void) sa_add_layout_entry(os, sa_dummy_zpl_layout, 0, 1, 1101 1.1 chs 0, B_FALSE, NULL); 1102 1.1 chs } 1103 1.1 chs *user_table = os->os_sa->sa_user_table; 1104 1.1 chs mutex_exit(&sa->sa_lock); 1105 1.1 chs return (0); 1106 1.1 chs fail: 1107 1.1 chs os->os_sa = NULL; 1108 1.1 chs sa_free_attr_table(sa); 1109 1.1 chs if (sa->sa_user_table) 1110 1.1 chs kmem_free(sa->sa_user_table, sa->sa_user_table_sz); 1111 1.1 chs mutex_exit(&sa->sa_lock); 1112 1.1 chs avl_destroy(&sa->sa_layout_hash_tree); 1113 1.1 chs avl_destroy(&sa->sa_layout_num_tree); 1114 1.1 chs mutex_destroy(&sa->sa_lock); 1115 1.1 chs kmem_free(sa, sizeof (sa_os_t)); 1116 1.1 chs return ((error == ECKSUM) ? EIO : error); 1117 1.1 chs } 1118 1.1 chs 1119 1.1 chs void 1120 1.1 chs sa_tear_down(objset_t *os) 1121 1.1 chs { 1122 1.1 chs sa_os_t *sa = os->os_sa; 1123 1.1 chs sa_lot_t *layout; 1124 1.1 chs void *cookie; 1125 1.1 chs 1126 1.1 chs kmem_free(sa->sa_user_table, sa->sa_user_table_sz); 1127 1.1 chs 1128 1.1 chs /* Free up attr table */ 1129 1.1 chs 1130 1.1 chs sa_free_attr_table(sa); 1131 1.1 chs 1132 1.1 chs cookie = NULL; 1133 1.1 chs while (layout = avl_destroy_nodes(&sa->sa_layout_hash_tree, &cookie)) { 1134 1.1 chs sa_idx_tab_t *tab; 1135 1.1 chs while (tab = list_head(&layout->lot_idx_tab)) { 1136 1.1 chs ASSERT(refcount_count(&tab->sa_refcount)); 1137 1.1 chs sa_idx_tab_rele(os, tab); 1138 1.1 chs } 1139 1.1 chs } 1140 1.1 chs 1141 1.1 chs cookie = NULL; 1142 1.1 chs while (layout = avl_destroy_nodes(&sa->sa_layout_num_tree, &cookie)) { 1143 1.2 chs #ifdef __NetBSD__ 1144 1.2 chs if (layout->lot_attr_count != 0) 1145 1.2 chs #endif 1146 1.1 chs kmem_free(layout->lot_attrs, 1147 1.1 chs sizeof (sa_attr_type_t) * layout->lot_attr_count); 1148 1.1 chs kmem_free(layout, sizeof (sa_lot_t)); 1149 1.1 chs } 1150 1.1 chs 1151 1.1 chs avl_destroy(&sa->sa_layout_hash_tree); 1152 1.1 chs avl_destroy(&sa->sa_layout_num_tree); 1153 1.1 chs mutex_destroy(&sa->sa_lock); 1154 1.1 chs 1155 1.1 chs kmem_free(sa, sizeof (sa_os_t)); 1156 1.1 chs os->os_sa = NULL; 1157 1.1 chs } 1158 1.1 chs 1159 1.1 chs void 1160 1.1 chs sa_build_idx_tab(void *hdr, void *attr_addr, sa_attr_type_t attr, 1161 1.1 chs uint16_t length, int length_idx, boolean_t var_length, void *userp) 1162 1.1 chs { 1163 1.1 chs sa_idx_tab_t *idx_tab = userp; 1164 1.1 chs 1165 1.1 chs if (var_length) { 1166 1.1 chs ASSERT(idx_tab->sa_variable_lengths); 1167 1.1 chs idx_tab->sa_variable_lengths[length_idx] = length; 1168 1.1 chs } 1169 1.1 chs TOC_ATTR_ENCODE(idx_tab->sa_idx_tab[attr], length_idx, 1170 1.1 chs (uint32_t)((uintptr_t)attr_addr - (uintptr_t)hdr)); 1171 1.1 chs } 1172 1.1 chs 1173 1.1 chs static void 1174 1.1 chs sa_attr_iter(objset_t *os, sa_hdr_phys_t *hdr, dmu_object_type_t type, 1175 1.1 chs sa_iterfunc_t func, sa_lot_t *tab, void *userp) 1176 1.1 chs { 1177 1.1 chs void *data_start; 1178 1.1 chs sa_lot_t *tb = tab; 1179 1.1 chs sa_lot_t search; 1180 1.1 chs avl_index_t loc; 1181 1.1 chs sa_os_t *sa = os->os_sa; 1182 1.1 chs int i; 1183 1.1 chs uint16_t *length_start = NULL; 1184 1.1 chs uint8_t length_idx = 0; 1185 1.1 chs 1186 1.1 chs if (tab == NULL) { 1187 1.1 chs search.lot_num = SA_LAYOUT_NUM(hdr, type); 1188 1.1 chs tb = avl_find(&sa->sa_layout_num_tree, &search, &loc); 1189 1.1 chs ASSERT(tb); 1190 1.1 chs } 1191 1.1 chs 1192 1.1 chs if (IS_SA_BONUSTYPE(type)) { 1193 1.1 chs data_start = (void *)P2ROUNDUP(((uintptr_t)hdr + 1194 1.1 chs offsetof(sa_hdr_phys_t, sa_lengths) + 1195 1.1 chs (sizeof (uint16_t) * tb->lot_var_sizes)), 8); 1196 1.1 chs length_start = hdr->sa_lengths; 1197 1.1 chs } else { 1198 1.1 chs data_start = hdr; 1199 1.1 chs } 1200 1.1 chs 1201 1.1 chs for (i = 0; i != tb->lot_attr_count; i++) { 1202 1.1 chs int attr_length, reg_length; 1203 1.1 chs uint8_t idx_len; 1204 1.1 chs 1205 1.1 chs reg_length = sa->sa_attr_table[tb->lot_attrs[i]].sa_length; 1206 1.1 chs if (reg_length) { 1207 1.1 chs attr_length = reg_length; 1208 1.1 chs idx_len = 0; 1209 1.1 chs } else { 1210 1.1 chs attr_length = length_start[length_idx]; 1211 1.1 chs idx_len = length_idx++; 1212 1.1 chs } 1213 1.1 chs 1214 1.1 chs func(hdr, data_start, tb->lot_attrs[i], attr_length, 1215 1.1 chs idx_len, reg_length == 0 ? B_TRUE : B_FALSE, userp); 1216 1.1 chs 1217 1.1 chs data_start = (void *)P2ROUNDUP(((uintptr_t)data_start + 1218 1.1 chs attr_length), 8); 1219 1.1 chs } 1220 1.1 chs } 1221 1.1 chs 1222 1.1 chs /*ARGSUSED*/ 1223 1.1 chs void 1224 1.1 chs sa_byteswap_cb(void *hdr, void *attr_addr, sa_attr_type_t attr, 1225 1.1 chs uint16_t length, int length_idx, boolean_t variable_length, void *userp) 1226 1.1 chs { 1227 1.1 chs sa_handle_t *hdl = userp; 1228 1.1 chs sa_os_t *sa = hdl->sa_os->os_sa; 1229 1.1 chs 1230 1.1 chs sa_bswap_table[sa->sa_attr_table[attr].sa_byteswap](attr_addr, length); 1231 1.1 chs } 1232 1.1 chs 1233 1.1 chs void 1234 1.1 chs sa_byteswap(sa_handle_t *hdl, sa_buf_type_t buftype) 1235 1.1 chs { 1236 1.1 chs sa_hdr_phys_t *sa_hdr_phys = SA_GET_HDR(hdl, buftype); 1237 1.1 chs dmu_buf_impl_t *db; 1238 1.1 chs sa_os_t *sa = hdl->sa_os->os_sa; 1239 1.1 chs int num_lengths = 1; 1240 1.1 chs int i; 1241 1.1 chs 1242 1.1 chs ASSERT(MUTEX_HELD(&sa->sa_lock)); 1243 1.1 chs if (sa_hdr_phys->sa_magic == SA_MAGIC) 1244 1.1 chs return; 1245 1.1 chs 1246 1.1 chs db = SA_GET_DB(hdl, buftype); 1247 1.1 chs 1248 1.1 chs if (buftype == SA_SPILL) { 1249 1.1 chs arc_release(db->db_buf, NULL); 1250 1.1 chs arc_buf_thaw(db->db_buf); 1251 1.1 chs } 1252 1.1 chs 1253 1.1 chs sa_hdr_phys->sa_magic = BSWAP_32(sa_hdr_phys->sa_magic); 1254 1.1 chs sa_hdr_phys->sa_layout_info = BSWAP_16(sa_hdr_phys->sa_layout_info); 1255 1.1 chs 1256 1.1 chs /* 1257 1.1 chs * Determine number of variable lenghts in header 1258 1.1 chs * The standard 8 byte header has one for free and a 1259 1.1 chs * 16 byte header would have 4 + 1; 1260 1.1 chs */ 1261 1.1 chs if (SA_HDR_SIZE(sa_hdr_phys) > 8) 1262 1.1 chs num_lengths += (SA_HDR_SIZE(sa_hdr_phys) - 8) >> 1; 1263 1.1 chs for (i = 0; i != num_lengths; i++) 1264 1.1 chs sa_hdr_phys->sa_lengths[i] = 1265 1.1 chs BSWAP_16(sa_hdr_phys->sa_lengths[i]); 1266 1.1 chs 1267 1.1 chs sa_attr_iter(hdl->sa_os, sa_hdr_phys, DMU_OT_SA, 1268 1.1 chs sa_byteswap_cb, NULL, hdl); 1269 1.1 chs 1270 1.1 chs if (buftype == SA_SPILL) 1271 1.1 chs arc_buf_freeze(((dmu_buf_impl_t *)hdl->sa_spill)->db_buf); 1272 1.1 chs } 1273 1.1 chs 1274 1.1 chs static int 1275 1.1 chs sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype) 1276 1.1 chs { 1277 1.1 chs sa_hdr_phys_t *sa_hdr_phys; 1278 1.1 chs dmu_buf_impl_t *db = SA_GET_DB(hdl, buftype); 1279 1.1 chs dmu_object_type_t bonustype = SA_BONUSTYPE_FROM_DB(db); 1280 1.1 chs sa_os_t *sa = hdl->sa_os->os_sa; 1281 1.1 chs sa_idx_tab_t *idx_tab; 1282 1.1 chs 1283 1.1 chs sa_hdr_phys = SA_GET_HDR(hdl, buftype); 1284 1.1 chs 1285 1.1 chs mutex_enter(&sa->sa_lock); 1286 1.1 chs 1287 1.1 chs /* Do we need to byteswap? */ 1288 1.1 chs 1289 1.1 chs /* only check if not old znode */ 1290 1.1 chs if (IS_SA_BONUSTYPE(bonustype) && sa_hdr_phys->sa_magic != SA_MAGIC && 1291 1.1 chs sa_hdr_phys->sa_magic != 0) { 1292 1.1 chs VERIFY(BSWAP_32(sa_hdr_phys->sa_magic) == SA_MAGIC); 1293 1.1 chs sa_byteswap(hdl, buftype); 1294 1.1 chs } 1295 1.1 chs 1296 1.1 chs idx_tab = sa_find_idx_tab(hdl->sa_os, bonustype, sa_hdr_phys); 1297 1.1 chs 1298 1.1 chs if (buftype == SA_BONUS) 1299 1.1 chs hdl->sa_bonus_tab = idx_tab; 1300 1.1 chs else 1301 1.1 chs hdl->sa_spill_tab = idx_tab; 1302 1.1 chs 1303 1.1 chs mutex_exit(&sa->sa_lock); 1304 1.1 chs return (0); 1305 1.1 chs } 1306 1.1 chs 1307 1.1 chs /*ARGSUSED*/ 1308 1.1 chs static void 1309 1.1 chs sa_evict_sync(void *dbu) 1310 1.1 chs { 1311 1.1 chs panic("evicting sa dbuf\n"); 1312 1.1 chs } 1313 1.1 chs 1314 1.1 chs static void 1315 1.1 chs sa_idx_tab_rele(objset_t *os, void *arg) 1316 1.1 chs { 1317 1.1 chs sa_os_t *sa = os->os_sa; 1318 1.1 chs sa_idx_tab_t *idx_tab = arg; 1319 1.1 chs 1320 1.1 chs if (idx_tab == NULL) 1321 1.1 chs return; 1322 1.1 chs 1323 1.1 chs mutex_enter(&sa->sa_lock); 1324 1.1 chs if (refcount_remove(&idx_tab->sa_refcount, NULL) == 0) { 1325 1.1 chs list_remove(&idx_tab->sa_layout->lot_idx_tab, idx_tab); 1326 1.1 chs if (idx_tab->sa_variable_lengths) 1327 1.1 chs kmem_free(idx_tab->sa_variable_lengths, 1328 1.1 chs sizeof (uint16_t) * 1329 1.1 chs idx_tab->sa_layout->lot_var_sizes); 1330 1.1 chs refcount_destroy(&idx_tab->sa_refcount); 1331 1.1 chs kmem_free(idx_tab->sa_idx_tab, 1332 1.1 chs sizeof (uint32_t) * sa->sa_num_attrs); 1333 1.1 chs kmem_free(idx_tab, sizeof (sa_idx_tab_t)); 1334 1.1 chs } 1335 1.1 chs mutex_exit(&sa->sa_lock); 1336 1.1 chs } 1337 1.1 chs 1338 1.1 chs static void 1339 1.1 chs sa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab) 1340 1.1 chs { 1341 1.1 chs sa_os_t *sa = os->os_sa; 1342 1.1 chs 1343 1.1 chs ASSERT(MUTEX_HELD(&sa->sa_lock)); 1344 1.1 chs (void) refcount_add(&idx_tab->sa_refcount, NULL); 1345 1.1 chs } 1346 1.1 chs 1347 1.1 chs void 1348 1.1 chs sa_handle_destroy(sa_handle_t *hdl) 1349 1.1 chs { 1350 1.1 chs dmu_buf_t *db = hdl->sa_bonus; 1351 1.1 chs 1352 1.1 chs mutex_enter(&hdl->sa_lock); 1353 1.1 chs (void) dmu_buf_remove_user(db, &hdl->sa_dbu); 1354 1.1 chs 1355 1.1 chs if (hdl->sa_bonus_tab) 1356 1.1 chs sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab); 1357 1.1 chs 1358 1.1 chs if (hdl->sa_spill_tab) 1359 1.1 chs sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab); 1360 1.1 chs 1361 1.1 chs dmu_buf_rele(hdl->sa_bonus, NULL); 1362 1.1 chs 1363 1.1 chs if (hdl->sa_spill) 1364 1.1 chs dmu_buf_rele((dmu_buf_t *)hdl->sa_spill, NULL); 1365 1.1 chs mutex_exit(&hdl->sa_lock); 1366 1.1 chs 1367 1.1 chs kmem_cache_free(sa_cache, hdl); 1368 1.1 chs } 1369 1.1 chs 1370 1.1 chs int 1371 1.1 chs sa_handle_get_from_db(objset_t *os, dmu_buf_t *db, void *userp, 1372 1.1 chs sa_handle_type_t hdl_type, sa_handle_t **handlepp) 1373 1.1 chs { 1374 1.1 chs int error = 0; 1375 1.1 chs dmu_object_info_t doi; 1376 1.1 chs sa_handle_t *handle = NULL; 1377 1.1 chs 1378 1.1 chs #ifdef ZFS_DEBUG 1379 1.1 chs dmu_object_info_from_db(db, &doi); 1380 1.1 chs ASSERT(doi.doi_bonus_type == DMU_OT_SA || 1381 1.1 chs doi.doi_bonus_type == DMU_OT_ZNODE); 1382 1.1 chs #endif 1383 1.1 chs /* find handle, if it exists */ 1384 1.1 chs /* if one doesn't exist then create a new one, and initialize it */ 1385 1.1 chs 1386 1.1 chs if (hdl_type == SA_HDL_SHARED) 1387 1.1 chs handle = dmu_buf_get_user(db); 1388 1.1 chs 1389 1.1 chs if (handle == NULL) { 1390 1.1 chs sa_handle_t *winner = NULL; 1391 1.1 chs 1392 1.1 chs handle = kmem_cache_alloc(sa_cache, KM_SLEEP); 1393 1.1 chs handle->sa_dbu.dbu_evict_func_sync = NULL; 1394 1.1 chs handle->sa_dbu.dbu_evict_func_async = NULL; 1395 1.1 chs handle->sa_userp = userp; 1396 1.1 chs handle->sa_bonus = db; 1397 1.1 chs handle->sa_os = os; 1398 1.1 chs handle->sa_spill = NULL; 1399 1.1 chs handle->sa_bonus_tab = NULL; 1400 1.1 chs handle->sa_spill_tab = NULL; 1401 1.1 chs 1402 1.1 chs error = sa_build_index(handle, SA_BONUS); 1403 1.1 chs 1404 1.1 chs if (hdl_type == SA_HDL_SHARED) { 1405 1.1 chs dmu_buf_init_user(&handle->sa_dbu, sa_evict_sync, NULL, 1406 1.1 chs NULL); 1407 1.1 chs winner = dmu_buf_set_user_ie(db, &handle->sa_dbu); 1408 1.1 chs } 1409 1.1 chs 1410 1.1 chs if (winner != NULL) { 1411 1.1 chs kmem_cache_free(sa_cache, handle); 1412 1.1 chs handle = winner; 1413 1.1 chs } 1414 1.1 chs } 1415 1.1 chs *handlepp = handle; 1416 1.1 chs 1417 1.1 chs return (error); 1418 1.1 chs } 1419 1.1 chs 1420 1.1 chs int 1421 1.1 chs sa_handle_get(objset_t *objset, uint64_t objid, void *userp, 1422 1.1 chs sa_handle_type_t hdl_type, sa_handle_t **handlepp) 1423 1.1 chs { 1424 1.1 chs dmu_buf_t *db; 1425 1.1 chs int error; 1426 1.1 chs 1427 1.1 chs if (error = dmu_bonus_hold(objset, objid, NULL, &db)) 1428 1.1 chs return (error); 1429 1.1 chs 1430 1.1 chs return (sa_handle_get_from_db(objset, db, userp, hdl_type, 1431 1.1 chs handlepp)); 1432 1.1 chs } 1433 1.1 chs 1434 1.1 chs int 1435 1.1 chs sa_buf_hold(objset_t *objset, uint64_t obj_num, void *tag, dmu_buf_t **db) 1436 1.1 chs { 1437 1.1 chs return (dmu_bonus_hold(objset, obj_num, tag, db)); 1438 1.1 chs } 1439 1.1 chs 1440 1.1 chs void 1441 1.1 chs sa_buf_rele(dmu_buf_t *db, void *tag) 1442 1.1 chs { 1443 1.1 chs dmu_buf_rele(db, tag); 1444 1.1 chs } 1445 1.1 chs 1446 1.1 chs int 1447 1.1 chs sa_lookup_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count) 1448 1.1 chs { 1449 1.1 chs ASSERT(hdl); 1450 1.1 chs ASSERT(MUTEX_HELD(&hdl->sa_lock)); 1451 1.1 chs return (sa_attr_op(hdl, bulk, count, SA_LOOKUP, NULL)); 1452 1.1 chs } 1453 1.1 chs 1454 1.1 chs int 1455 1.1 chs sa_lookup(sa_handle_t *hdl, sa_attr_type_t attr, void *buf, uint32_t buflen) 1456 1.1 chs { 1457 1.1 chs int error; 1458 1.1 chs sa_bulk_attr_t bulk; 1459 1.1 chs 1460 1.1 chs bulk.sa_attr = attr; 1461 1.1 chs bulk.sa_data = buf; 1462 1.1 chs bulk.sa_length = buflen; 1463 1.1 chs bulk.sa_data_func = NULL; 1464 1.1 chs 1465 1.1 chs ASSERT(hdl); 1466 1.1 chs mutex_enter(&hdl->sa_lock); 1467 1.1 chs error = sa_lookup_impl(hdl, &bulk, 1); 1468 1.1 chs mutex_exit(&hdl->sa_lock); 1469 1.1 chs return (error); 1470 1.1 chs } 1471 1.1 chs 1472 1.1 chs #ifdef _KERNEL 1473 1.1 chs int 1474 1.1 chs sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio) 1475 1.1 chs { 1476 1.1 chs int error; 1477 1.1 chs sa_bulk_attr_t bulk; 1478 1.1 chs 1479 1.1 chs bulk.sa_data = NULL; 1480 1.1 chs bulk.sa_attr = attr; 1481 1.1 chs bulk.sa_data_func = NULL; 1482 1.1 chs 1483 1.1 chs ASSERT(hdl); 1484 1.1 chs 1485 1.1 chs mutex_enter(&hdl->sa_lock); 1486 1.1 chs if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) == 0) { 1487 1.1 chs error = uiomove((void *)bulk.sa_addr, MIN(bulk.sa_size, 1488 1.1 chs uio->uio_resid), UIO_READ, uio); 1489 1.1 chs } 1490 1.1 chs mutex_exit(&hdl->sa_lock); 1491 1.1 chs return (error); 1492 1.1 chs 1493 1.1 chs } 1494 1.1 chs #endif 1495 1.1 chs 1496 1.1 chs void * 1497 1.1 chs sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype, void *data) 1498 1.1 chs { 1499 1.1 chs sa_idx_tab_t *idx_tab; 1500 1.1 chs sa_hdr_phys_t *hdr = (sa_hdr_phys_t *)data; 1501 1.1 chs sa_os_t *sa = os->os_sa; 1502 1.1 chs sa_lot_t *tb, search; 1503 1.1 chs avl_index_t loc; 1504 1.1 chs 1505 1.1 chs /* 1506 1.1 chs * Deterimine layout number. If SA node and header == 0 then 1507 1.1 chs * force the index table to the dummy "1" empty layout. 1508 1.1 chs * 1509 1.1 chs * The layout number would only be zero for a newly created file 1510 1.1 chs * that has not added any attributes yet, or with crypto enabled which 1511 1.1 chs * doesn't write any attributes to the bonus buffer. 1512 1.1 chs */ 1513 1.1 chs 1514 1.1 chs search.lot_num = SA_LAYOUT_NUM(hdr, bonustype); 1515 1.1 chs 1516 1.1 chs tb = avl_find(&sa->sa_layout_num_tree, &search, &loc); 1517 1.1 chs 1518 1.1 chs /* Verify header size is consistent with layout information */ 1519 1.1 chs ASSERT(tb); 1520 1.1 chs ASSERT(IS_SA_BONUSTYPE(bonustype) && 1521 1.1 chs SA_HDR_SIZE_MATCH_LAYOUT(hdr, tb) || !IS_SA_BONUSTYPE(bonustype) || 1522 1.1 chs (IS_SA_BONUSTYPE(bonustype) && hdr->sa_layout_info == 0)); 1523 1.1 chs 1524 1.1 chs /* 1525 1.1 chs * See if any of the already existing TOC entries can be reused? 1526 1.1 chs */ 1527 1.1 chs 1528 1.1 chs for (idx_tab = list_head(&tb->lot_idx_tab); idx_tab; 1529 1.1 chs idx_tab = list_next(&tb->lot_idx_tab, idx_tab)) { 1530 1.1 chs boolean_t valid_idx = B_TRUE; 1531 1.1 chs int i; 1532 1.1 chs 1533 1.1 chs if (tb->lot_var_sizes != 0 && 1534 1.1 chs idx_tab->sa_variable_lengths != NULL) { 1535 1.1 chs for (i = 0; i != tb->lot_var_sizes; i++) { 1536 1.1 chs if (hdr->sa_lengths[i] != 1537 1.1 chs idx_tab->sa_variable_lengths[i]) { 1538 1.1 chs valid_idx = B_FALSE; 1539 1.1 chs break; 1540 1.1 chs } 1541 1.1 chs } 1542 1.1 chs } 1543 1.1 chs if (valid_idx) { 1544 1.1 chs sa_idx_tab_hold(os, idx_tab); 1545 1.1 chs return (idx_tab); 1546 1.1 chs } 1547 1.1 chs } 1548 1.1 chs 1549 1.1 chs /* No such luck, create a new entry */ 1550 1.1 chs idx_tab = kmem_zalloc(sizeof (sa_idx_tab_t), KM_SLEEP); 1551 1.1 chs idx_tab->sa_idx_tab = 1552 1.1 chs kmem_zalloc(sizeof (uint32_t) * sa->sa_num_attrs, KM_SLEEP); 1553 1.1 chs idx_tab->sa_layout = tb; 1554 1.1 chs refcount_create(&idx_tab->sa_refcount); 1555 1.1 chs if (tb->lot_var_sizes) 1556 1.1 chs idx_tab->sa_variable_lengths = kmem_alloc(sizeof (uint16_t) * 1557 1.1 chs tb->lot_var_sizes, KM_SLEEP); 1558 1.1 chs 1559 1.1 chs sa_attr_iter(os, hdr, bonustype, sa_build_idx_tab, 1560 1.1 chs tb, idx_tab); 1561 1.1 chs sa_idx_tab_hold(os, idx_tab); /* one hold for consumer */ 1562 1.1 chs sa_idx_tab_hold(os, idx_tab); /* one for layout */ 1563 1.1 chs list_insert_tail(&tb->lot_idx_tab, idx_tab); 1564 1.1 chs return (idx_tab); 1565 1.1 chs } 1566 1.1 chs 1567 1.1 chs void 1568 1.1 chs sa_default_locator(void **dataptr, uint32_t *len, uint32_t total_len, 1569 1.1 chs boolean_t start, void *userdata) 1570 1.1 chs { 1571 1.1 chs ASSERT(start); 1572 1.1 chs 1573 1.1 chs *dataptr = userdata; 1574 1.1 chs *len = total_len; 1575 1.1 chs } 1576 1.1 chs 1577 1.1 chs static void 1578 1.1 chs sa_attr_register_sync(sa_handle_t *hdl, dmu_tx_t *tx) 1579 1.1 chs { 1580 1.1 chs uint64_t attr_value = 0; 1581 1.1 chs sa_os_t *sa = hdl->sa_os->os_sa; 1582 1.1 chs sa_attr_table_t *tb = sa->sa_attr_table; 1583 1.1 chs int i; 1584 1.1 chs 1585 1.1 chs mutex_enter(&sa->sa_lock); 1586 1.1 chs 1587 1.1 chs if (!sa->sa_need_attr_registration || sa->sa_master_obj == 0) { 1588 1.1 chs mutex_exit(&sa->sa_lock); 1589 1.1 chs return; 1590 1.1 chs } 1591 1.1 chs 1592 1.1 chs if (sa->sa_reg_attr_obj == 0) { 1593 1.1 chs sa->sa_reg_attr_obj = zap_create_link(hdl->sa_os, 1594 1.1 chs DMU_OT_SA_ATTR_REGISTRATION, 1595 1.1 chs sa->sa_master_obj, SA_REGISTRY, tx); 1596 1.1 chs } 1597 1.1 chs for (i = 0; i != sa->sa_num_attrs; i++) { 1598 1.1 chs if (sa->sa_attr_table[i].sa_registered) 1599 1.1 chs continue; 1600 1.1 chs ATTR_ENCODE(attr_value, tb[i].sa_attr, tb[i].sa_length, 1601 1.1 chs tb[i].sa_byteswap); 1602 1.1 chs VERIFY(0 == zap_update(hdl->sa_os, sa->sa_reg_attr_obj, 1603 1.1 chs tb[i].sa_name, 8, 1, &attr_value, tx)); 1604 1.1 chs tb[i].sa_registered = B_TRUE; 1605 1.1 chs } 1606 1.1 chs sa->sa_need_attr_registration = B_FALSE; 1607 1.1 chs mutex_exit(&sa->sa_lock); 1608 1.1 chs } 1609 1.1 chs 1610 1.1 chs /* 1611 1.1 chs * Replace all attributes with attributes specified in template. 1612 1.1 chs * If dnode had a spill buffer then those attributes will be 1613 1.1 chs * also be replaced, possibly with just an empty spill block 1614 1.1 chs * 1615 1.1 chs * This interface is intended to only be used for bulk adding of 1616 1.1 chs * attributes for a new file. It will also be used by the ZPL 1617 1.1 chs * when converting and old formatted znode to native SA support. 1618 1.1 chs */ 1619 1.1 chs int 1620 1.1 chs sa_replace_all_by_template_locked(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, 1621 1.1 chs int attr_count, dmu_tx_t *tx) 1622 1.1 chs { 1623 1.1 chs sa_os_t *sa = hdl->sa_os->os_sa; 1624 1.1 chs 1625 1.1 chs if (sa->sa_need_attr_registration) 1626 1.1 chs sa_attr_register_sync(hdl, tx); 1627 1.1 chs return (sa_build_layouts(hdl, attr_desc, attr_count, tx)); 1628 1.1 chs } 1629 1.1 chs 1630 1.1 chs int 1631 1.1 chs sa_replace_all_by_template(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, 1632 1.1 chs int attr_count, dmu_tx_t *tx) 1633 1.1 chs { 1634 1.1 chs int error; 1635 1.1 chs 1636 1.1 chs mutex_enter(&hdl->sa_lock); 1637 1.1 chs error = sa_replace_all_by_template_locked(hdl, attr_desc, 1638 1.1 chs attr_count, tx); 1639 1.1 chs mutex_exit(&hdl->sa_lock); 1640 1.1 chs return (error); 1641 1.1 chs } 1642 1.1 chs 1643 1.1 chs /* 1644 1.1 chs * Add/remove a single attribute or replace a variable-sized attribute value 1645 1.1 chs * with a value of a different size, and then rewrite the entire set 1646 1.1 chs * of attributes. 1647 1.1 chs * Same-length attribute value replacement (including fixed-length attributes) 1648 1.1 chs * is handled more efficiently by the upper layers. 1649 1.1 chs */ 1650 1.1 chs static int 1651 1.1 chs sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr, 1652 1.1 chs sa_data_op_t action, sa_data_locator_t *locator, void *datastart, 1653 1.1 chs uint16_t buflen, dmu_tx_t *tx) 1654 1.1 chs { 1655 1.1 chs sa_os_t *sa = hdl->sa_os->os_sa; 1656 1.1 chs dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus; 1657 1.1 chs dnode_t *dn; 1658 1.1 chs sa_bulk_attr_t *attr_desc; 1659 1.1 chs void *old_data[2]; 1660 1.1 chs int bonus_attr_count = 0; 1661 1.1 chs int bonus_data_size = 0; 1662 1.1 chs int spill_data_size = 0; 1663 1.1 chs int spill_attr_count = 0; 1664 1.1 chs int error; 1665 1.1 chs uint16_t length, reg_length; 1666 1.1 chs int i, j, k, length_idx; 1667 1.1 chs sa_hdr_phys_t *hdr; 1668 1.1 chs sa_idx_tab_t *idx_tab; 1669 1.1 chs int attr_count; 1670 1.1 chs int count; 1671 1.1 chs 1672 1.1 chs ASSERT(MUTEX_HELD(&hdl->sa_lock)); 1673 1.1 chs 1674 1.1 chs /* First make of copy of the old data */ 1675 1.1 chs 1676 1.1 chs DB_DNODE_ENTER(db); 1677 1.1 chs dn = DB_DNODE(db); 1678 1.1 chs if (dn->dn_bonuslen != 0) { 1679 1.1 chs bonus_data_size = hdl->sa_bonus->db_size; 1680 1.1 chs old_data[0] = kmem_alloc(bonus_data_size, KM_SLEEP); 1681 1.1 chs bcopy(hdl->sa_bonus->db_data, old_data[0], 1682 1.1 chs hdl->sa_bonus->db_size); 1683 1.1 chs bonus_attr_count = hdl->sa_bonus_tab->sa_layout->lot_attr_count; 1684 1.1 chs } else { 1685 1.1 chs old_data[0] = NULL; 1686 1.1 chs } 1687 1.1 chs DB_DNODE_EXIT(db); 1688 1.1 chs 1689 1.1 chs /* Bring spill buffer online if it isn't currently */ 1690 1.1 chs 1691 1.1 chs if ((error = sa_get_spill(hdl)) == 0) { 1692 1.1 chs spill_data_size = hdl->sa_spill->db_size; 1693 1.1 chs old_data[1] = kmem_alloc(spill_data_size, KM_SLEEP); 1694 1.1 chs bcopy(hdl->sa_spill->db_data, old_data[1], 1695 1.1 chs hdl->sa_spill->db_size); 1696 1.1 chs spill_attr_count = 1697 1.1 chs hdl->sa_spill_tab->sa_layout->lot_attr_count; 1698 1.1 chs } else if (error && error != ENOENT) { 1699 1.1 chs if (old_data[0]) 1700 1.1 chs kmem_free(old_data[0], bonus_data_size); 1701 1.1 chs return (error); 1702 1.1 chs } else { 1703 1.1 chs old_data[1] = NULL; 1704 1.1 chs } 1705 1.1 chs 1706 1.1 chs /* build descriptor of all attributes */ 1707 1.1 chs 1708 1.1 chs attr_count = bonus_attr_count + spill_attr_count; 1709 1.1 chs if (action == SA_ADD) 1710 1.1 chs attr_count++; 1711 1.1 chs else if (action == SA_REMOVE) 1712 1.1 chs attr_count--; 1713 1.1 chs 1714 1.1 chs attr_desc = kmem_zalloc(sizeof (sa_bulk_attr_t) * attr_count, KM_SLEEP); 1715 1.1 chs 1716 1.1 chs /* 1717 1.1 chs * loop through bonus and spill buffer if it exists, and 1718 1.1 chs * build up new attr_descriptor to reset the attributes 1719 1.1 chs */ 1720 1.1 chs k = j = 0; 1721 1.1 chs count = bonus_attr_count; 1722 1.1 chs hdr = SA_GET_HDR(hdl, SA_BONUS); 1723 1.1 chs idx_tab = SA_IDX_TAB_GET(hdl, SA_BONUS); 1724 1.1 chs for (; k != 2; k++) { 1725 1.1 chs /* 1726 1.1 chs * Iterate over each attribute in layout. Fetch the 1727 1.1 chs * size of variable-length attributes needing rewrite 1728 1.1 chs * from sa_lengths[]. 1729 1.1 chs */ 1730 1.1 chs for (i = 0, length_idx = 0; i != count; i++) { 1731 1.1 chs sa_attr_type_t attr; 1732 1.1 chs 1733 1.1 chs attr = idx_tab->sa_layout->lot_attrs[i]; 1734 1.1 chs reg_length = SA_REGISTERED_LEN(sa, attr); 1735 1.1 chs if (reg_length == 0) { 1736 1.1 chs length = hdr->sa_lengths[length_idx]; 1737 1.1 chs length_idx++; 1738 1.1 chs } else { 1739 1.1 chs length = reg_length; 1740 1.1 chs } 1741 1.1 chs if (attr == newattr) { 1742 1.1 chs /* 1743 1.1 chs * There is nothing to do for SA_REMOVE, 1744 1.1 chs * so it is just skipped. 1745 1.1 chs */ 1746 1.1 chs if (action == SA_REMOVE) 1747 1.1 chs continue; 1748 1.1 chs 1749 1.1 chs /* 1750 1.1 chs * Duplicate attributes are not allowed, so the 1751 1.1 chs * action can not be SA_ADD here. 1752 1.1 chs */ 1753 1.1 chs ASSERT3S(action, ==, SA_REPLACE); 1754 1.1 chs 1755 1.1 chs /* 1756 1.1 chs * Only a variable-sized attribute can be 1757 1.1 chs * replaced here, and its size must be changing. 1758 1.1 chs */ 1759 1.1 chs ASSERT3U(reg_length, ==, 0); 1760 1.1 chs ASSERT3U(length, !=, buflen); 1761 1.1 chs SA_ADD_BULK_ATTR(attr_desc, j, attr, 1762 1.1 chs locator, datastart, buflen); 1763 1.1 chs } else { 1764 1.1 chs SA_ADD_BULK_ATTR(attr_desc, j, attr, 1765 1.1 chs NULL, (void *) 1766 1.1 chs (TOC_OFF(idx_tab->sa_idx_tab[attr]) + 1767 1.1 chs (uintptr_t)old_data[k]), length); 1768 1.1 chs } 1769 1.1 chs } 1770 1.1 chs if (k == 0 && hdl->sa_spill) { 1771 1.1 chs hdr = SA_GET_HDR(hdl, SA_SPILL); 1772 1.1 chs idx_tab = SA_IDX_TAB_GET(hdl, SA_SPILL); 1773 1.1 chs count = spill_attr_count; 1774 1.1 chs } else { 1775 1.1 chs break; 1776 1.1 chs } 1777 1.1 chs } 1778 1.1 chs if (action == SA_ADD) { 1779 1.1 chs reg_length = SA_REGISTERED_LEN(sa, newattr); 1780 1.1 chs IMPLY(reg_length != 0, reg_length == buflen); 1781 1.1 chs SA_ADD_BULK_ATTR(attr_desc, j, newattr, locator, 1782 1.1 chs datastart, buflen); 1783 1.1 chs } 1784 1.1 chs ASSERT3U(j, ==, attr_count); 1785 1.1 chs 1786 1.1 chs error = sa_build_layouts(hdl, attr_desc, attr_count, tx); 1787 1.1 chs 1788 1.1 chs if (old_data[0]) 1789 1.1 chs kmem_free(old_data[0], bonus_data_size); 1790 1.1 chs if (old_data[1]) 1791 1.1 chs kmem_free(old_data[1], spill_data_size); 1792 1.1 chs kmem_free(attr_desc, sizeof (sa_bulk_attr_t) * attr_count); 1793 1.1 chs 1794 1.1 chs return (error); 1795 1.1 chs } 1796 1.1 chs 1797 1.1 chs static int 1798 1.1 chs sa_bulk_update_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count, 1799 1.1 chs dmu_tx_t *tx) 1800 1.1 chs { 1801 1.1 chs int error; 1802 1.1 chs sa_os_t *sa = hdl->sa_os->os_sa; 1803 1.1 chs dmu_object_type_t bonustype; 1804 1.1 chs 1805 1.1 chs bonustype = SA_BONUSTYPE_FROM_DB(SA_GET_DB(hdl, SA_BONUS)); 1806 1.1 chs 1807 1.1 chs ASSERT(hdl); 1808 1.1 chs ASSERT(MUTEX_HELD(&hdl->sa_lock)); 1809 1.1 chs 1810 1.1 chs /* sync out registration table if necessary */ 1811 1.1 chs if (sa->sa_need_attr_registration) 1812 1.1 chs sa_attr_register_sync(hdl, tx); 1813 1.1 chs 1814 1.1 chs error = sa_attr_op(hdl, bulk, count, SA_UPDATE, tx); 1815 1.1 chs if (error == 0 && !IS_SA_BONUSTYPE(bonustype) && sa->sa_update_cb) 1816 1.1 chs sa->sa_update_cb(hdl, tx); 1817 1.1 chs 1818 1.1 chs return (error); 1819 1.1 chs } 1820 1.1 chs 1821 1.1 chs /* 1822 1.1 chs * update or add new attribute 1823 1.1 chs */ 1824 1.1 chs int 1825 1.1 chs sa_update(sa_handle_t *hdl, sa_attr_type_t type, 1826 1.1 chs void *buf, uint32_t buflen, dmu_tx_t *tx) 1827 1.1 chs { 1828 1.1 chs int error; 1829 1.1 chs sa_bulk_attr_t bulk; 1830 1.1 chs 1831 1.1 chs bulk.sa_attr = type; 1832 1.1 chs bulk.sa_data_func = NULL; 1833 1.1 chs bulk.sa_length = buflen; 1834 1.1 chs bulk.sa_data = buf; 1835 1.1 chs 1836 1.1 chs mutex_enter(&hdl->sa_lock); 1837 1.1 chs error = sa_bulk_update_impl(hdl, &bulk, 1, tx); 1838 1.1 chs mutex_exit(&hdl->sa_lock); 1839 1.1 chs return (error); 1840 1.1 chs } 1841 1.1 chs 1842 1.1 chs int 1843 1.1 chs sa_update_from_cb(sa_handle_t *hdl, sa_attr_type_t attr, 1844 1.1 chs uint32_t buflen, sa_data_locator_t *locator, void *userdata, dmu_tx_t *tx) 1845 1.1 chs { 1846 1.1 chs int error; 1847 1.1 chs sa_bulk_attr_t bulk; 1848 1.1 chs 1849 1.1 chs bulk.sa_attr = attr; 1850 1.1 chs bulk.sa_data = userdata; 1851 1.1 chs bulk.sa_data_func = locator; 1852 1.1 chs bulk.sa_length = buflen; 1853 1.1 chs 1854 1.1 chs mutex_enter(&hdl->sa_lock); 1855 1.1 chs error = sa_bulk_update_impl(hdl, &bulk, 1, tx); 1856 1.1 chs mutex_exit(&hdl->sa_lock); 1857 1.1 chs return (error); 1858 1.1 chs } 1859 1.1 chs 1860 1.1 chs /* 1861 1.1 chs * Return size of an attribute 1862 1.1 chs */ 1863 1.1 chs 1864 1.1 chs int 1865 1.1 chs sa_size(sa_handle_t *hdl, sa_attr_type_t attr, int *size) 1866 1.1 chs { 1867 1.1 chs sa_bulk_attr_t bulk; 1868 1.1 chs int error; 1869 1.1 chs 1870 1.1 chs bulk.sa_data = NULL; 1871 1.1 chs bulk.sa_attr = attr; 1872 1.1 chs bulk.sa_data_func = NULL; 1873 1.1 chs 1874 1.1 chs ASSERT(hdl); 1875 1.1 chs mutex_enter(&hdl->sa_lock); 1876 1.1 chs if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) != 0) { 1877 1.1 chs mutex_exit(&hdl->sa_lock); 1878 1.1 chs return (error); 1879 1.1 chs } 1880 1.1 chs *size = bulk.sa_size; 1881 1.1 chs 1882 1.1 chs mutex_exit(&hdl->sa_lock); 1883 1.1 chs return (0); 1884 1.1 chs } 1885 1.1 chs 1886 1.1 chs int 1887 1.1 chs sa_bulk_lookup_locked(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count) 1888 1.1 chs { 1889 1.1 chs ASSERT(hdl); 1890 1.1 chs ASSERT(MUTEX_HELD(&hdl->sa_lock)); 1891 1.1 chs return (sa_lookup_impl(hdl, attrs, count)); 1892 1.1 chs } 1893 1.1 chs 1894 1.1 chs int 1895 1.1 chs sa_bulk_lookup(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count) 1896 1.1 chs { 1897 1.1 chs int error; 1898 1.1 chs 1899 1.1 chs ASSERT(hdl); 1900 1.1 chs mutex_enter(&hdl->sa_lock); 1901 1.1 chs error = sa_bulk_lookup_locked(hdl, attrs, count); 1902 1.1 chs mutex_exit(&hdl->sa_lock); 1903 1.1 chs return (error); 1904 1.1 chs } 1905 1.1 chs 1906 1.1 chs int 1907 1.1 chs sa_bulk_update(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count, dmu_tx_t *tx) 1908 1.1 chs { 1909 1.1 chs int error; 1910 1.1 chs 1911 1.1 chs ASSERT(hdl); 1912 1.1 chs mutex_enter(&hdl->sa_lock); 1913 1.1 chs error = sa_bulk_update_impl(hdl, attrs, count, tx); 1914 1.1 chs mutex_exit(&hdl->sa_lock); 1915 1.1 chs return (error); 1916 1.1 chs } 1917 1.1 chs 1918 1.1 chs int 1919 1.1 chs sa_remove(sa_handle_t *hdl, sa_attr_type_t attr, dmu_tx_t *tx) 1920 1.1 chs { 1921 1.1 chs int error; 1922 1.1 chs 1923 1.1 chs mutex_enter(&hdl->sa_lock); 1924 1.1 chs error = sa_modify_attrs(hdl, attr, SA_REMOVE, NULL, 1925 1.1 chs NULL, 0, tx); 1926 1.1 chs mutex_exit(&hdl->sa_lock); 1927 1.1 chs return (error); 1928 1.1 chs } 1929 1.1 chs 1930 1.1 chs void 1931 1.1 chs sa_object_info(sa_handle_t *hdl, dmu_object_info_t *doi) 1932 1.1 chs { 1933 1.1 chs dmu_object_info_from_db((dmu_buf_t *)hdl->sa_bonus, doi); 1934 1.1 chs } 1935 1.1 chs 1936 1.1 chs void 1937 1.1 chs sa_object_size(sa_handle_t *hdl, uint32_t *blksize, u_longlong_t *nblocks) 1938 1.1 chs { 1939 1.1 chs dmu_object_size_from_db((dmu_buf_t *)hdl->sa_bonus, 1940 1.1 chs blksize, nblocks); 1941 1.1 chs } 1942 1.1 chs 1943 1.1 chs void 1944 1.1 chs sa_set_userp(sa_handle_t *hdl, void *ptr) 1945 1.1 chs { 1946 1.1 chs hdl->sa_userp = ptr; 1947 1.1 chs } 1948 1.1 chs 1949 1.1 chs dmu_buf_t * 1950 1.1 chs sa_get_db(sa_handle_t *hdl) 1951 1.1 chs { 1952 1.1 chs return ((dmu_buf_t *)hdl->sa_bonus); 1953 1.1 chs } 1954 1.1 chs 1955 1.1 chs void * 1956 1.1 chs sa_get_userdata(sa_handle_t *hdl) 1957 1.1 chs { 1958 1.1 chs return (hdl->sa_userp); 1959 1.1 chs } 1960 1.1 chs 1961 1.1 chs void 1962 1.1 chs sa_register_update_callback_locked(objset_t *os, sa_update_cb_t *func) 1963 1.1 chs { 1964 1.1 chs ASSERT(MUTEX_HELD(&os->os_sa->sa_lock)); 1965 1.1 chs os->os_sa->sa_update_cb = func; 1966 1.1 chs } 1967 1.1 chs 1968 1.1 chs void 1969 1.1 chs sa_register_update_callback(objset_t *os, sa_update_cb_t *func) 1970 1.1 chs { 1971 1.1 chs 1972 1.1 chs mutex_enter(&os->os_sa->sa_lock); 1973 1.1 chs sa_register_update_callback_locked(os, func); 1974 1.1 chs mutex_exit(&os->os_sa->sa_lock); 1975 1.1 chs } 1976 1.1 chs 1977 1.1 chs uint64_t 1978 1.1 chs sa_handle_object(sa_handle_t *hdl) 1979 1.1 chs { 1980 1.1 chs return (hdl->sa_bonus->db_object); 1981 1.1 chs } 1982 1.1 chs 1983 1.1 chs boolean_t 1984 1.1 chs sa_enabled(objset_t *os) 1985 1.1 chs { 1986 1.1 chs return (os->os_sa == NULL); 1987 1.1 chs } 1988 1.1 chs 1989 1.1 chs int 1990 1.1 chs sa_set_sa_object(objset_t *os, uint64_t sa_object) 1991 1.1 chs { 1992 1.1 chs sa_os_t *sa = os->os_sa; 1993 1.1 chs 1994 1.1 chs if (sa->sa_master_obj) 1995 1.1 chs return (1); 1996 1.1 chs 1997 1.1 chs sa->sa_master_obj = sa_object; 1998 1.1 chs 1999 1.1 chs return (0); 2000 1.1 chs } 2001 1.1 chs 2002 1.1 chs int 2003 1.1 chs sa_hdrsize(void *arg) 2004 1.1 chs { 2005 1.1 chs sa_hdr_phys_t *hdr = arg; 2006 1.1 chs 2007 1.1 chs return (SA_HDR_SIZE(hdr)); 2008 1.1 chs } 2009 1.1 chs 2010 1.1 chs void 2011 1.1 chs sa_handle_lock(sa_handle_t *hdl) 2012 1.1 chs { 2013 1.1 chs ASSERT(hdl); 2014 1.1 chs mutex_enter(&hdl->sa_lock); 2015 1.1 chs } 2016 1.1 chs 2017 1.1 chs void 2018 1.1 chs sa_handle_unlock(sa_handle_t *hdl) 2019 1.1 chs { 2020 1.1 chs ASSERT(hdl); 2021 1.1 chs mutex_exit(&hdl->sa_lock); 2022 1.1 chs } 2023