1 /* $NetBSD: xbdback_xenbus.c,v 1.107 2024/06/20 15:17:27 bouyer Exp $ */ 2 3 /* 4 * Copyright (c) 2006,2024 Manuel Bouyer. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 */ 27 28 #include <sys/cdefs.h> 29 __KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.107 2024/06/20 15:17:27 bouyer Exp $"); 30 31 #include <sys/buf.h> 32 #include <sys/condvar.h> 33 #include <sys/conf.h> 34 #include <sys/disk.h> 35 #include <sys/device.h> 36 #include <sys/fcntl.h> 37 #include <sys/kauth.h> 38 #include <sys/kernel.h> 39 #include <sys/kmem.h> 40 #include <sys/kthread.h> 41 #include <sys/mutex.h> 42 #include <sys/param.h> 43 #include <sys/queue.h> 44 #include <sys/systm.h> 45 #include <sys/time.h> 46 #include <sys/types.h> 47 #include <sys/vnode.h> 48 49 #include <xen/intr.h> 50 #include <xen/hypervisor.h> 51 #include <xen/xen.h> 52 #include <xen/xen_shm.h> 53 #include <xen/evtchn.h> 54 #include <xen/xenbus.h> 55 #include <xen/xenring.h> 56 #include <xen/include/public/io/protocols.h> 57 58 /* #define XENDEBUG_VBD */ 59 #ifdef XENDEBUG_VBD 60 #define XENPRINTF(x) printf x 61 #else 62 #define XENPRINTF(x) 63 #endif 64 65 #define BLKIF_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) 66 67 /* 68 * Backend block device driver for Xen 69 */ 70 71 /* Values are expressed in 512-byte sectors */ 72 #define VBD_BSIZE 512 73 #define VBD_MAXSECT ((PAGE_SIZE / VBD_BSIZE) - 1) 74 75 #define VBD_VA_SIZE MAXPHYS 76 #define VBD_MAX_INDIRECT_SEGMENTS (VBD_VA_SIZE >> PAGE_SHIFT) 77 78 CTASSERT(XENSHM_MAX_PAGES_PER_REQUEST >= VBD_MAX_INDIRECT_SEGMENTS); 79 80 struct xbdback_instance; 81 82 /* 83 * status of a xbdback instance: 84 * WAITING: xbdback instance is connected, waiting for requests 85 * RUN: xbdi thread must be woken up, I/Os have to be processed 86 * DISCONNECTING: the instance is closing, no more I/Os can be scheduled 87 * DISCONNECTED: no I/Os, no ring, the thread should terminate. 88 */ 89 typedef enum {WAITING, RUN, DISCONNECTING, DISCONNECTED} xbdback_state_t; 90 91 /* 92 * Each xbdback instance is managed by a single thread that handles all 93 * the I/O processing. As there are a variety of conditions that can block, 94 * everything will be done in a sort of continuation-passing style. 95 * 96 * When the execution has to block to delay processing, for example to 97 * allow system to recover because of memory shortage (via shared memory 98 * callback), the return value of a continuation can be set to NULL. In that 99 * case, the thread will go back to sleeping and wait for the proper 100 * condition before it starts processing requests again from where it left. 101 * Continuation state is "stored" in the xbdback instance (xbdi_cont), 102 * and should only be manipulated by the instance thread. 103 * If a continuation has to be restarted from a specific point, 104 * the callback and argument can be stored in xbdi_cont_restart and 105 * xbdi_cont_restart_obj 106 * 107 * 108 * As xbdback(4) has to handle different sort of asynchronous events (Xen 109 * event channels, biointr() soft interrupts, xenbus commands), the xbdi_lock 110 * mutex is used to protect specific elements of the xbdback instance from 111 * concurrent access: thread status and ring access (when pushing responses). 112 * 113 * Here's how the call graph is supposed to be for a single I/O: 114 * 115 * xbdback_co_main() 116 * | --> xbdback_co_cache_flush() 117 * | | | 118 * | | -> xbdback_co_do_io() or NULL 119 * xbdback_co_main_loop()-| 120 * | |-> xbdback_co_main_done2() or NULL 121 * | | 122 * | --> xbdback_co_main_incr() -> xbdback_co_main_loop() 123 * | 124 * xbdback_co_io() -> xbdback_co_main_incr() -> xbdback_co_main_loop() 125 * | 126 * xbdback_co_io_gotio() -> xbdback_co_main_incr() -> xbdback_co_main_loop() 127 * | 128 * xbdback_co_do_io() 129 * | 130 * xbdback_co_main_incr() -> xbdback_co_main_loop() 131 */ 132 typedef void *(* xbdback_cont_t)(struct xbdback_instance *, void *); 133 134 enum xbdi_proto { 135 XBDIP_NATIVE, 136 XBDIP_32, 137 XBDIP_64 138 }; 139 140 struct xbdback_va { 141 SLIST_ENTRY(xbdback_va) xv_next; 142 vaddr_t xv_vaddr; 143 }; 144 145 /* 146 * For each I/O operation associated with one of those requests, an 147 * xbdback_io is allocated from a pool. It may correspond to multiple 148 * Xen disk requests, or parts of them, if several arrive at once that 149 * can be coalesced. 150 */ 151 struct xbdback_io { 152 SLIST_ENTRY(xbdback_io) xio_next; 153 /* The instance pointer is duplicated for convenience. */ 154 struct xbdback_instance *xio_xbdi; /* our xbd instance */ 155 /* _request state: track requests fetched from ring */ 156 blkif_request_t xio_xen_req; 157 /* array of segments[VBD_MAX_INDIRECT_SEGMENTS] allocated separately */ 158 struct blkif_request_segment *xio_seg; 159 bus_dmamap_t xio_seg_dmamap; 160 /* internal states */ 161 union { 162 struct { 163 struct buf xio_buf; /* our I/O */ 164 /* the virtual address to map the request at */ 165 vaddr_t xio_vaddr; 166 struct xbdback_va *xio_xv; 167 vaddr_t xio_start_offset; /* I/O start offset */ 168 /* grants to map */ 169 grant_ref_t xio_gref[VBD_MAX_INDIRECT_SEGMENTS]; 170 /* grants release */ 171 grant_handle_t xio_gh[VBD_MAX_INDIRECT_SEGMENTS]; 172 bool xio_need_bounce; /* request is not contiguous */ 173 } xio_rw; 174 } u; 175 }; 176 #define xio_buf u.xio_rw.xio_buf 177 #define xio_vaddr u.xio_rw.xio_vaddr 178 #define xio_start_offset u.xio_rw.xio_start_offset 179 #define xio_xv u.xio_rw.xio_xv 180 #define xio_gref u.xio_rw.xio_gref 181 #define xio_gh u.xio_rw.xio_gh 182 #define xio_need_bounce u.xio_rw.xio_need_bounce 183 184 /* we keep the xbdback instances in a linked list */ 185 struct xbdback_instance { 186 SLIST_ENTRY(xbdback_instance) next; 187 struct xenbus_device *xbdi_xbusd; /* our xenstore entry */ 188 struct xenbus_watch xbdi_watch; /* to watch our store */ 189 domid_t xbdi_domid; /* attached to this domain */ 190 uint32_t xbdi_handle; /* domain-specific handle */ 191 char xbdi_name[16]; /* name of this instance */ 192 /* mutex that protects concurrent access to the xbdback instance */ 193 kmutex_t xbdi_lock; 194 kcondvar_t xbdi_cv; /* wait channel for thread work */ 195 xbdback_state_t xbdi_status; /* thread's status */ 196 /* context and KVA for mapping transfers */ 197 struct xbdback_io xbdi_io[BLKIF_RING_SIZE]; 198 SLIST_HEAD(, xbdback_io) xbdi_io_free; 199 struct xbdback_va xbdi_va[BLKIF_RING_SIZE]; 200 SLIST_HEAD(, xbdback_va) xbdi_va_free; 201 /* segments structure allocated in page-aligned chunks */ 202 struct blkif_request_segment *xbdi_segs; 203 /* bounce buffer in case a transfer is not contiguous */ 204 vaddr_t xbdi_bouncebuf; 205 int xbdi_bouncebuf_use; /* is bounce buffer in use? */ 206 /* backing device parameters */ 207 dev_t xbdi_dev; 208 const struct bdevsw *xbdi_bdevsw; /* pointer to the device's bdevsw */ 209 struct vnode *xbdi_vp; 210 uint64_t xbdi_size; 211 bool xbdi_ro; /* is device read-only ? */ 212 /* parameters for the communication */ 213 unsigned int xbdi_evtchn; 214 struct intrhand *xbdi_ih; 215 /* private parameters for communication */ 216 blkif_back_ring_proto_t xbdi_ring; 217 enum xbdi_proto xbdi_proto; 218 grant_handle_t xbdi_ring_handle; /* to unmap the ring */ 219 vaddr_t xbdi_ring_va; /* to unmap the ring */ 220 /* disconnection must be postponed until all I/O is done */ 221 int xbdi_refcnt; 222 /* 223 * State for I/O processing/coalescing follows; this has to 224 * live here instead of on the stack because of the 225 * continuation-ness (see above). 226 */ 227 RING_IDX xbdi_req_prod; /* limit on request indices */ 228 xbdback_cont_t xbdi_cont; 229 /* if not NULL, will restart here after thread wakes up */ 230 xbdback_cont_t xbdi_cont_restart; 231 void *xbdi_cont_restart_obj; 232 /* other state */ 233 uint xbdi_pendingreqs; /* number of I/O in fly */ 234 struct timeval xbdi_lasterr_time; /* error time tracking */ 235 }; 236 /* Manipulation of the above reference count. */ 237 #define xbdi_get(xbdip) \ 238 do { \ 239 KASSERT(mutex_owned(&xbdip->xbdi_lock)); \ 240 (xbdip)->xbdi_refcnt++; \ 241 } while (0) 242 243 #define xbdi_put(xbdip) \ 244 do { \ 245 KASSERT(mutex_owned(&xbdip->xbdi_lock)); \ 246 if (--((xbdip)->xbdi_refcnt) == 0) \ 247 xbdback_finish_disconnect(xbdip); \ 248 } while (0) 249 250 static SLIST_HEAD(, xbdback_instance) xbdback_instances; 251 static kmutex_t xbdback_lock; 252 253 /* Interval between reports of I/O errors from frontend */ 254 static const struct timeval xbdback_err_intvl = { 1, 0 }; 255 256 void xbdbackattach(int); 257 static int xbdback_xenbus_create(struct xenbus_device *); 258 static int xbdback_xenbus_destroy(void *); 259 static void xbdback_frontend_changed(void *, XenbusState); 260 static void xbdback_backend_changed(struct xenbus_watch *, 261 const char **, unsigned int); 262 static int xbdback_evthandler(void *); 263 264 static int xbdback_connect(struct xbdback_instance *); 265 static void xbdback_disconnect(struct xbdback_instance *); 266 static void xbdback_finish_disconnect(struct xbdback_instance *); 267 268 static bool xbdif_lookup(domid_t, uint32_t); 269 270 static void *xbdback_co_main(struct xbdback_instance *, void *); 271 static void *xbdback_co_main_loop(struct xbdback_instance *, void *); 272 static void *xbdback_co_main_incr(struct xbdback_instance *, void *); 273 static void *xbdback_co_main_done2(struct xbdback_instance *, void *); 274 275 static void *xbdback_co_cache_flush(struct xbdback_instance *, void *); 276 277 static void *xbdback_co_io(struct xbdback_instance *, void *); 278 static void *xbdback_co_io_gotio(struct xbdback_instance *, void *); 279 280 static void *xbdback_co_do_io(struct xbdback_instance *, void *); 281 282 static void xbdback_io_error(struct xbdback_io *, int); 283 static void xbdback_iodone(struct buf *); 284 static void xbdback_iodone_locked(struct xbdback_instance *, 285 struct xbdback_io *, struct buf *); 286 static void xbdback_send_reply(struct xbdback_instance *, uint64_t , int , int); 287 288 static int xbdback_map_shm(struct xbdback_io *); 289 static void xbdback_unmap_shm(struct xbdback_io *); 290 291 static struct xbdback_io *xbdback_io_get(struct xbdback_instance *); 292 static void xbdback_io_put(struct xbdback_instance *, struct xbdback_io *); 293 static void xbdback_thread(void *); 294 static void xbdback_wakeup_thread(struct xbdback_instance *); 295 static void xbdback_trampoline(struct xbdback_instance *, void *); 296 297 static struct xenbus_backend_driver xbd_backend_driver = { 298 .xbakd_create = xbdback_xenbus_create, 299 .xbakd_type = "vbd" 300 }; 301 302 void 303 xbdbackattach(int n) 304 { 305 XENPRINTF(("xbdbackattach\n")); 306 307 /* 308 * initialize the backend driver, register the control message handler 309 * and send driver up message. 310 */ 311 SLIST_INIT(&xbdback_instances); 312 mutex_init(&xbdback_lock, MUTEX_DEFAULT, IPL_NONE); 313 314 xenbus_backend_register(&xbd_backend_driver); 315 } 316 317 static int 318 xbdback_xenbus_create(struct xenbus_device *xbusd) 319 { 320 struct xbdback_instance *xbdi; 321 long domid, handle; 322 int error, i; 323 int segalloc = 0; 324 char *ep; 325 326 if ((error = xenbus_read_ul(NULL, xbusd->xbusd_path, 327 "frontend-id", &domid, 10)) != 0) { 328 aprint_error("xbdback: can't read %s/frontend-id: %d\n", 329 xbusd->xbusd_path, error); 330 return error; 331 } 332 333 /* 334 * get handle: this is the last component of the path; which is 335 * a decimal number. $path/dev contains the device name, which is not 336 * appropriate. 337 */ 338 for (i = strlen(xbusd->xbusd_path); i > 0; i--) { 339 if (xbusd->xbusd_path[i] == '/') 340 break; 341 } 342 if (i == 0) { 343 aprint_error("xbdback: can't parse %s\n", 344 xbusd->xbusd_path); 345 return EFTYPE; 346 } 347 handle = strtoul(&xbusd->xbusd_path[i+1], &ep, 10); 348 if (*ep != '\0') { 349 aprint_error("xbdback: can't parse %s\n", 350 xbusd->xbusd_path); 351 return EFTYPE; 352 } 353 354 xbdi = kmem_zalloc(sizeof(*xbdi), KM_SLEEP); 355 356 xbdi->xbdi_domid = domid; 357 xbdi->xbdi_handle = handle; 358 snprintf(xbdi->xbdi_name, sizeof(xbdi->xbdi_name), "xbdb%di%d", 359 xbdi->xbdi_domid, xbdi->xbdi_handle); 360 361 mutex_enter(&xbdback_lock); 362 if (xbdif_lookup(domid, handle)) { 363 mutex_exit(&xbdback_lock); 364 kmem_free(xbdi, sizeof(*xbdi)); 365 return EEXIST; 366 } 367 SLIST_INSERT_HEAD(&xbdback_instances, xbdi, next); 368 mutex_exit(&xbdback_lock); 369 370 /* initialize status and reference counter */ 371 xbdi->xbdi_status = DISCONNECTED; 372 373 mutex_init(&xbdi->xbdi_lock, MUTEX_DEFAULT, IPL_BIO); 374 cv_init(&xbdi->xbdi_cv, xbdi->xbdi_name); 375 376 mutex_enter(&xbdi->xbdi_lock); 377 xbdi_get(xbdi); 378 mutex_exit(&xbdi->xbdi_lock); 379 380 xbusd->xbusd_u.b.b_cookie = xbdi; 381 xbusd->xbusd_u.b.b_detach = xbdback_xenbus_destroy; 382 xbusd->xbusd_otherend_changed = xbdback_frontend_changed; 383 xbdi->xbdi_xbusd = xbusd; 384 385 SLIST_INIT(&xbdi->xbdi_va_free); 386 for (i = 0; i < BLKIF_RING_SIZE; i++) { 387 xbdi->xbdi_va[i].xv_vaddr = uvm_km_alloc(kernel_map, 388 VBD_VA_SIZE, 0, UVM_KMF_VAONLY|UVM_KMF_WAITVA); 389 SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, &xbdi->xbdi_va[i], 390 xv_next); 391 } 392 393 /* 394 * allocate page-aligned memory for segments, so that for each 395 * xbdback_io its segments are in a single page. 396 * sizeof(struct blkif_request_segment) * VBD_MAX_INDIRECT_SEGMENTS 397 * is 128 so this helps us avoiding a page boundary withing a 398 * block of VBD_MAX_INDIRECT_SEGMENTS segments. 399 */ 400 CTASSERT(sizeof(struct blkif_request_segment) * VBD_MAX_INDIRECT_SEGMENTS == 128); 401 xbdi->xbdi_segs = (void *)uvm_km_alloc(kernel_map, round_page( 402 sizeof(struct blkif_request_segment) * VBD_MAX_INDIRECT_SEGMENTS * BLKIF_RING_SIZE), 403 PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_WAITVA); 404 405 SLIST_INIT(&xbdi->xbdi_io_free); 406 for (i = 0; i < BLKIF_RING_SIZE; i++) { 407 struct xbdback_io *xbd_io = &xbdi->xbdi_io[i]; 408 xbd_io->xio_seg = 409 &xbdi->xbdi_segs[i * VBD_MAX_INDIRECT_SEGMENTS]; 410 error = bus_dmamap_create(xbdi->xbdi_xbusd->xbusd_dmat, 411 PAGE_SIZE, 1, PAGE_SIZE, PAGE_SIZE, 412 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 413 &xbd_io->xio_seg_dmamap); 414 if (error != 0) { 415 printf("%s: can't create dma map for indirect segments %d\n", 416 xbdi->xbdi_name, i); 417 goto fail; 418 } 419 error = bus_dmamap_load(xbdi->xbdi_xbusd->xbusd_dmat, 420 xbd_io->xio_seg_dmamap, xbd_io->xio_seg, 421 sizeof(struct blkif_request_segment) * VBD_MAX_INDIRECT_SEGMENTS, 422 NULL, BUS_DMA_WAITOK); 423 if (error != 0) { 424 printf("%s: can't load dma map for indirect segments %d @%p (%d, %zu)\n", 425 xbdi->xbdi_name, i, xbd_io->xio_seg, error, sizeof(xbd_io->xio_seg)); 426 bus_dmamap_destroy(xbdi->xbdi_xbusd->xbusd_dmat, 427 xbd_io->xio_seg_dmamap); 428 goto fail; 429 } 430 KASSERT(xbd_io->xio_seg_dmamap->dm_nsegs == 1); 431 segalloc = i; 432 SLIST_INSERT_HEAD(&xbdi->xbdi_io_free, xbd_io, xio_next); 433 } 434 435 error = xenbus_watch_path2(xbusd, xbusd->xbusd_path, "physical-device", 436 &xbdi->xbdi_watch, xbdback_backend_changed); 437 if (error) { 438 printf("failed to watch on %s/physical-device: %d\n", 439 xbusd->xbusd_path, error); 440 goto fail; 441 } 442 xbdi->xbdi_watch.xbw_dev = xbusd; 443 error = xenbus_switch_state(xbusd, NULL, XenbusStateInitWait); 444 if (error) { 445 printf("failed to switch state on %s: %d\n", 446 xbusd->xbusd_path, error); 447 goto fail2; 448 } 449 450 xbdi->xbdi_bouncebuf = uvm_km_alloc(kernel_map, MAXPHYS, PAGE_SIZE, 451 UVM_KMF_WIRED | UVM_KMF_WAITVA); 452 return 0; 453 fail2: 454 unregister_xenbus_watch(&xbdi->xbdi_watch); 455 fail: 456 for (i = 0; i < segalloc; i++) { 457 struct xbdback_io *xbd_io = &xbdi->xbdi_io[i]; 458 bus_dmamap_unload(xbdi->xbdi_xbusd->xbusd_dmat, 459 xbd_io->xio_seg_dmamap); 460 bus_dmamap_destroy(xbdi->xbdi_xbusd->xbusd_dmat, 461 xbd_io->xio_seg_dmamap); 462 } 463 mutex_enter(&xbdback_lock); 464 SLIST_REMOVE(&xbdback_instances, xbdi, xbdback_instance, next); 465 mutex_exit(&xbdback_lock); 466 kmem_free(xbdi, sizeof(*xbdi)); 467 return error; 468 } 469 470 static int 471 xbdback_xenbus_destroy(void *arg) 472 { 473 struct xbdback_instance *xbdi = arg; 474 475 XENPRINTF(("xbdback_xenbus_destroy state %d\n", xbdi->xbdi_status)); 476 477 xbdback_disconnect(xbdi); 478 479 /* unregister watch */ 480 if (xbdi->xbdi_watch.node) 481 xenbus_unwatch_path(&xbdi->xbdi_watch); 482 /* unmap ring */ 483 if (xbdi->xbdi_ring_handle) { 484 xen_shm_unmap(xbdi->xbdi_ring_va, 1, &xbdi->xbdi_ring_handle); 485 } 486 487 if (xbdi->xbdi_ring_va != 0) { 488 uvm_km_free(kernel_map, xbdi->xbdi_ring_va, 489 PAGE_SIZE, UVM_KMF_VAONLY); 490 } 491 492 /* close device */ 493 if (xbdi->xbdi_size) { 494 const char *name; 495 struct dkwedge_info wi; 496 if (getdiskinfo(xbdi->xbdi_vp, &wi) == 0) 497 name = wi.dkw_devname; 498 else 499 name = "*unknown*"; 500 printf("xbd backend: detach device %s for domain %d\n", 501 name, xbdi->xbdi_domid); 502 vn_close(xbdi->xbdi_vp, FREAD, NOCRED); 503 } 504 mutex_enter(&xbdback_lock); 505 SLIST_REMOVE(&xbdback_instances, xbdi, xbdback_instance, next); 506 mutex_exit(&xbdback_lock); 507 508 for (int i = 0; i < BLKIF_RING_SIZE; i++) { 509 struct xbdback_io *xbd_io = &xbdi->xbdi_io[i]; 510 bus_dmamap_unload(xbdi->xbdi_xbusd->xbusd_dmat, 511 xbd_io->xio_seg_dmamap); 512 bus_dmamap_destroy(xbdi->xbdi_xbusd->xbusd_dmat, 513 xbd_io->xio_seg_dmamap); 514 if (xbdi->xbdi_va[i].xv_vaddr != 0) { 515 uvm_km_free(kernel_map, xbdi->xbdi_va[i].xv_vaddr, 516 VBD_VA_SIZE, UVM_KMF_VAONLY); 517 xbdi->xbdi_va[i].xv_vaddr = 0; 518 } 519 } 520 521 522 mutex_destroy(&xbdi->xbdi_lock); 523 cv_destroy(&xbdi->xbdi_cv); 524 kmem_free(xbdi, sizeof(*xbdi)); 525 return 0; 526 } 527 528 static int 529 xbdback_connect(struct xbdback_instance *xbdi) 530 { 531 int err; 532 evtchn_op_t evop; 533 grant_ref_t gring_ref; 534 u_long ring_ref, revtchn; 535 char xsproto[32]; 536 const char *proto; 537 struct xenbus_device *xbusd = xbdi->xbdi_xbusd; 538 539 XENPRINTF(("xbdback %s: connect\n", xbusd->xbusd_path)); 540 /* read comunication informations */ 541 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend, 542 "ring-ref", &ring_ref, 10); 543 if (err) { 544 xenbus_dev_fatal(xbusd, err, "reading %s/ring-ref", 545 xbusd->xbusd_otherend); 546 return -1; 547 } 548 XENPRINTF(("xbdback %s: connect ring-ref %lu\n", xbusd->xbusd_path, ring_ref)); 549 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend, 550 "event-channel", &revtchn, 10); 551 if (err) { 552 xenbus_dev_fatal(xbusd, err, "reading %s/event-channel", 553 xbusd->xbusd_otherend); 554 return -1; 555 } 556 XENPRINTF(("xbdback %s: connect revtchn %lu\n", xbusd->xbusd_path, revtchn)); 557 err = xenbus_read(NULL, xbusd->xbusd_otherend, "protocol", 558 xsproto, sizeof(xsproto)); 559 if (err) { 560 xbdi->xbdi_proto = XBDIP_NATIVE; 561 proto = "unspecified"; 562 XENPRINTF(("xbdback %s: connect no xsproto\n", xbusd->xbusd_path)); 563 } else { 564 XENPRINTF(("xbdback %s: connect xsproto %s\n", xbusd->xbusd_path, xsproto)); 565 if (strcmp(xsproto, XEN_IO_PROTO_ABI_NATIVE) == 0) { 566 xbdi->xbdi_proto = XBDIP_NATIVE; 567 proto = XEN_IO_PROTO_ABI_NATIVE; 568 } else if (strcmp(xsproto, XEN_IO_PROTO_ABI_X86_32) == 0) { 569 xbdi->xbdi_proto = XBDIP_32; 570 proto = XEN_IO_PROTO_ABI_X86_32; 571 } else if (strcmp(xsproto, XEN_IO_PROTO_ABI_X86_64) == 0) { 572 xbdi->xbdi_proto = XBDIP_64; 573 proto = XEN_IO_PROTO_ABI_X86_64; 574 } else { 575 aprint_error("xbd domain %d: unknown proto %s\n", 576 xbdi->xbdi_domid, xsproto); 577 return -1; 578 } 579 } 580 581 /* allocate VA space and map rings */ 582 xbdi->xbdi_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 583 UVM_KMF_VAONLY); 584 if (xbdi->xbdi_ring_va == 0) { 585 xenbus_dev_fatal(xbusd, ENOMEM, 586 "can't get VA for ring", xbusd->xbusd_otherend); 587 return -1; 588 } 589 XENPRINTF(("xbdback %s: connect va 0x%" PRIxVADDR "\n", xbusd->xbusd_path, xbdi->xbdi_ring_va)); 590 591 gring_ref = ring_ref; 592 if (xen_shm_map(1, xbdi->xbdi_domid, &gring_ref, xbdi->xbdi_ring_va, 593 &xbdi->xbdi_ring_handle, 0) != 0) { 594 aprint_error("xbdback %s: can't map grant ref\n", 595 xbusd->xbusd_path); 596 xenbus_dev_fatal(xbusd, EINVAL, 597 "can't map ring", xbusd->xbusd_otherend); 598 goto err1; 599 } 600 XENPRINTF(("xbdback %s: connect grhandle %d\n", xbusd->xbusd_path, xbdi->xbdi_ring_handle)); 601 602 switch(xbdi->xbdi_proto) { 603 case XBDIP_NATIVE: 604 { 605 blkif_sring_t *sring = (void *)xbdi->xbdi_ring_va; 606 BACK_RING_INIT(&xbdi->xbdi_ring.ring_n, sring, PAGE_SIZE); 607 break; 608 } 609 case XBDIP_32: 610 { 611 blkif_x86_32_sring_t *sring = (void *)xbdi->xbdi_ring_va; 612 BACK_RING_INIT(&xbdi->xbdi_ring.ring_32, sring, PAGE_SIZE); 613 break; 614 } 615 case XBDIP_64: 616 { 617 blkif_x86_64_sring_t *sring = (void *)xbdi->xbdi_ring_va; 618 BACK_RING_INIT(&xbdi->xbdi_ring.ring_64, sring, PAGE_SIZE); 619 break; 620 } 621 } 622 623 evop.cmd = EVTCHNOP_bind_interdomain; 624 evop.u.bind_interdomain.remote_dom = xbdi->xbdi_domid; 625 evop.u.bind_interdomain.remote_port = revtchn; 626 err = HYPERVISOR_event_channel_op(&evop); 627 if (err) { 628 aprint_error("blkback %s: " 629 "can't get event channel: %d\n", 630 xbusd->xbusd_otherend, err); 631 xenbus_dev_fatal(xbusd, err, 632 "can't bind event channel", xbusd->xbusd_otherend); 633 goto err2; 634 } 635 xbdi->xbdi_evtchn = evop.u.bind_interdomain.local_port; 636 XENPRINTF(("xbdback %s: connect evchannel %d\n", xbusd->xbusd_path, xbdi->xbdi_evtchn)); 637 638 xbdi->xbdi_ih = xen_intr_establish_xname(-1, &xen_pic, 639 xbdi->xbdi_evtchn, IST_LEVEL, IPL_BIO, xbdback_evthandler, xbdi, 640 true, xbdi->xbdi_name); 641 KASSERT(xbdi->xbdi_ih != NULL); 642 aprint_verbose("xbd backend domain %d handle %#x (%d) " 643 "using event channel %d, protocol %s\n", xbdi->xbdi_domid, 644 xbdi->xbdi_handle, xbdi->xbdi_handle, xbdi->xbdi_evtchn, proto); 645 646 /* enable the xbdback event handler machinery */ 647 xbdi->xbdi_status = WAITING; 648 hypervisor_unmask_event(xbdi->xbdi_evtchn); 649 hypervisor_notify_via_evtchn(xbdi->xbdi_evtchn); 650 651 if (kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, 652 xbdback_thread, xbdi, NULL, "%s", xbdi->xbdi_name) == 0) 653 return 0; 654 655 err2: 656 /* unmap ring */ 657 xen_shm_unmap(xbdi->xbdi_ring_va, 1, &xbdi->xbdi_ring_handle); 658 err1: 659 /* free ring VA space */ 660 uvm_km_free(kernel_map, xbdi->xbdi_ring_va, PAGE_SIZE, UVM_KMF_VAONLY); 661 return -1; 662 } 663 664 /* 665 * Signal a xbdback thread to disconnect. Done in 'xenwatch' thread context. 666 */ 667 static void 668 xbdback_disconnect(struct xbdback_instance *xbdi) 669 { 670 671 mutex_enter(&xbdi->xbdi_lock); 672 if (xbdi->xbdi_status == DISCONNECTED) { 673 mutex_exit(&xbdi->xbdi_lock); 674 return; 675 } 676 hypervisor_mask_event(xbdi->xbdi_evtchn); 677 678 /* signal thread that we want to disconnect, then wait for it */ 679 xbdi->xbdi_status = DISCONNECTING; 680 cv_signal(&xbdi->xbdi_cv); 681 682 while (xbdi->xbdi_status != DISCONNECTED) 683 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock); 684 685 mutex_exit(&xbdi->xbdi_lock); 686 xen_intr_disestablish(xbdi->xbdi_ih); 687 688 xenbus_switch_state(xbdi->xbdi_xbusd, NULL, XenbusStateClosing); 689 } 690 691 static void 692 xbdback_frontend_changed(void *arg, XenbusState new_state) 693 { 694 struct xbdback_instance *xbdi = arg; 695 struct xenbus_device *xbusd = xbdi->xbdi_xbusd; 696 697 XENPRINTF(("xbdback %s: new state %d\n", xbusd->xbusd_path, new_state)); 698 switch(new_state) { 699 case XenbusStateInitialising: 700 break; 701 case XenbusStateInitialised: 702 case XenbusStateConnected: 703 if (xbdi->xbdi_status == WAITING || xbdi->xbdi_status == RUN) 704 break; 705 xbdback_connect(xbdi); 706 break; 707 case XenbusStateClosing: 708 xbdback_disconnect(xbdi); 709 break; 710 case XenbusStateClosed: 711 /* otherend_changed() should handle it for us */ 712 panic("xbdback_frontend_changed: closed\n"); 713 case XenbusStateUnknown: 714 case XenbusStateInitWait: 715 default: 716 aprint_error("xbdback %s: invalid frontend state %d\n", 717 xbusd->xbusd_path, new_state); 718 } 719 return; 720 } 721 722 static void 723 xbdback_backend_changed(struct xenbus_watch *watch, 724 const char **vec, unsigned int len) 725 { 726 struct xenbus_device *xbusd = watch->xbw_dev; 727 struct xbdback_instance *xbdi = xbusd->xbusd_u.b.b_cookie; 728 int err; 729 long dev; 730 char mode[32]; 731 struct xenbus_transaction *xbt; 732 const char *devname; 733 int major; 734 735 err = xenbus_read_ul(NULL, xbusd->xbusd_path, "physical-device", 736 &dev, 10); 737 /* 738 * An error can occur as the watch can fire up just after being 739 * registered. So we have to ignore error :( 740 */ 741 if (err) 742 return; 743 /* 744 * we can also fire up after having opened the device, don't try 745 * to do it twice. 746 */ 747 if (xbdi->xbdi_vp != NULL) { 748 if (xbdi->xbdi_status == WAITING || xbdi->xbdi_status == RUN) { 749 if (xbdi->xbdi_dev != dev) { 750 printf("xbdback %s: changing physical device " 751 "from %#"PRIx64" to %#lx not supported\n", 752 xbusd->xbusd_path, xbdi->xbdi_dev, dev); 753 } 754 } 755 return; 756 } 757 xbdi->xbdi_dev = dev; 758 err = xenbus_read(NULL, xbusd->xbusd_path, "mode", mode, sizeof(mode)); 759 if (err) { 760 printf("xbdback: failed to read %s/mode: %d\n", 761 xbusd->xbusd_path, err); 762 return; 763 } 764 if (mode[0] == 'w') 765 xbdi->xbdi_ro = false; 766 else 767 xbdi->xbdi_ro = true; 768 major = major(xbdi->xbdi_dev); 769 devname = devsw_blk2name(major); 770 if (devname == NULL) { 771 printf("xbdback %s: unknown device 0x%"PRIx64"\n", 772 xbusd->xbusd_path, xbdi->xbdi_dev); 773 return; 774 } 775 xbdi->xbdi_bdevsw = bdevsw_lookup(xbdi->xbdi_dev); 776 if (xbdi->xbdi_bdevsw == NULL) { 777 printf("xbdback %s: no bdevsw for device 0x%"PRIx64"\n", 778 xbusd->xbusd_path, xbdi->xbdi_dev); 779 return; 780 } 781 err = bdevvp(xbdi->xbdi_dev, &xbdi->xbdi_vp); 782 if (err) { 783 printf("xbdback %s: can't open device 0x%"PRIx64": %d\n", 784 xbusd->xbusd_path, xbdi->xbdi_dev, err); 785 return; 786 } 787 err = vn_lock(xbdi->xbdi_vp, LK_EXCLUSIVE | LK_RETRY); 788 if (err) { 789 printf("xbdback %s: can't vn_lock device 0x%"PRIx64": %d\n", 790 xbusd->xbusd_path, xbdi->xbdi_dev, err); 791 vrele(xbdi->xbdi_vp); 792 return; 793 } 794 err = VOP_OPEN(xbdi->xbdi_vp, FREAD, NOCRED); 795 if (err) { 796 printf("xbdback %s: can't VOP_OPEN device 0x%"PRIx64": %d\n", 797 xbusd->xbusd_path, xbdi->xbdi_dev, err); 798 vput(xbdi->xbdi_vp); 799 return; 800 } 801 VOP_UNLOCK(xbdi->xbdi_vp); 802 803 /* dk device; get wedge data */ 804 struct dkwedge_info wi; 805 if ((err = getdiskinfo(xbdi->xbdi_vp, &wi)) == 0) { 806 xbdi->xbdi_size = wi.dkw_size; 807 printf("xbd backend: attach device %s (size %" PRIu64 ") " 808 "for domain %d\n", wi.dkw_devname, xbdi->xbdi_size, 809 xbdi->xbdi_domid); 810 } else { 811 /* If both Ioctls failed set device size to 0 and return */ 812 printf("xbdback %s: can't DIOCGWEDGEINFO device " 813 "0x%"PRIx64": %d\n", xbusd->xbusd_path, 814 xbdi->xbdi_dev, err); 815 xbdi->xbdi_size = xbdi->xbdi_dev = 0; 816 vn_close(xbdi->xbdi_vp, FREAD, NOCRED); 817 xbdi->xbdi_vp = NULL; 818 return; 819 } 820 again: 821 xbt = xenbus_transaction_start(); 822 if (xbt == NULL) { 823 printf("xbdback %s: can't start transaction\n", 824 xbusd->xbusd_path); 825 return; 826 } 827 err = xenbus_printf(xbt, xbusd->xbusd_path, "sectors", "%" PRIu64 , 828 xbdi->xbdi_size); 829 if (err) { 830 printf("xbdback: failed to write %s/sectors: %d\n", 831 xbusd->xbusd_path, err); 832 goto abort; 833 } 834 err = xenbus_printf(xbt, xbusd->xbusd_path, "info", "%u", 835 xbdi->xbdi_ro ? VDISK_READONLY : 0); 836 if (err) { 837 printf("xbdback: failed to write %s/info: %d\n", 838 xbusd->xbusd_path, err); 839 goto abort; 840 } 841 err = xenbus_printf(xbt, xbusd->xbusd_path, "sector-size", "%lu", 842 (u_long)DEV_BSIZE); 843 if (err) { 844 printf("xbdback: failed to write %s/sector-size: %d\n", 845 xbusd->xbusd_path, err); 846 goto abort; 847 } 848 err = xenbus_printf(xbt, xbusd->xbusd_path, "feature-flush-cache", 849 "%u", 1); 850 if (err) { 851 printf("xbdback: failed to write %s/feature-flush-cache: %d\n", 852 xbusd->xbusd_path, err); 853 goto abort; 854 } 855 err = xenbus_printf(xbt, xbusd->xbusd_path, 856 "feature-max-indirect-segments", "%u", VBD_MAX_INDIRECT_SEGMENTS); 857 if (err) { 858 printf("xbdback: failed to write %s/feature-indirect: %d\n", 859 xbusd->xbusd_path, err); 860 goto abort; 861 } 862 err = xenbus_transaction_end(xbt, 0); 863 if (err == EAGAIN) 864 goto again; 865 if (err) { 866 printf("xbdback %s: can't end transaction: %d\n", 867 xbusd->xbusd_path, err); 868 } 869 err = xenbus_switch_state(xbusd, NULL, XenbusStateConnected); 870 if (err) { 871 printf("xbdback %s: can't switch state: %d\n", 872 xbusd->xbusd_path, err); 873 } 874 return; 875 abort: 876 xenbus_transaction_end(xbt, 1); 877 } 878 879 /* 880 * Used by a xbdi thread to signal that it is now disconnected. 881 */ 882 static void 883 xbdback_finish_disconnect(struct xbdback_instance *xbdi) 884 { 885 KASSERT(mutex_owned(&xbdi->xbdi_lock)); 886 KASSERT(xbdi->xbdi_status == DISCONNECTING); 887 888 xbdi->xbdi_status = DISCONNECTED; 889 890 cv_broadcast(&xbdi->xbdi_cv); 891 } 892 893 static bool 894 xbdif_lookup(domid_t dom , uint32_t handle) 895 { 896 struct xbdback_instance *xbdi; 897 bool found = false; 898 899 KASSERT(mutex_owned(&xbdback_lock)); 900 901 SLIST_FOREACH(xbdi, &xbdback_instances, next) { 902 if (xbdi->xbdi_domid == dom && xbdi->xbdi_handle == handle) { 903 found = true; 904 break; 905 } 906 } 907 908 return found; 909 } 910 911 static int 912 xbdback_evthandler(void *arg) 913 { 914 struct xbdback_instance *xbdi = arg; 915 916 XENPRINTF(("xbdback_evthandler domain %d: cont %p\n", 917 xbdi->xbdi_domid, xbdi->xbdi_cont)); 918 919 mutex_enter(&xbdi->xbdi_lock); 920 xbdback_wakeup_thread(xbdi); 921 mutex_exit(&xbdi->xbdi_lock); 922 923 return 1; 924 } 925 926 /* 927 * Main thread routine for one xbdback instance. Woken up by 928 * xbdback_evthandler when a domain has I/O work scheduled in a I/O ring. 929 */ 930 static void 931 xbdback_thread(void *arg) 932 { 933 struct xbdback_instance *xbdi = arg; 934 void *obj; 935 936 mutex_enter(&xbdi->xbdi_lock); 937 for (;;) { 938 switch (xbdi->xbdi_status) { 939 case WAITING: 940 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock); 941 break; 942 case RUN: 943 xbdi->xbdi_status = WAITING; /* reset state */ 944 obj = xbdi; 945 if (xbdi->xbdi_cont_restart != NULL) { 946 KASSERT(xbdi->xbdi_cont == NULL); 947 xbdi->xbdi_cont = xbdi->xbdi_cont_restart; 948 obj = xbdi->xbdi_cont_restart_obj; 949 xbdi->xbdi_cont_restart = NULL; 950 xbdi->xbdi_cont_restart_obj = NULL; 951 } 952 if (xbdi->xbdi_cont == NULL) { 953 xbdi->xbdi_cont = xbdback_co_main; 954 } 955 956 xbdback_trampoline(xbdi, obj); 957 break; 958 case DISCONNECTING: 959 if (xbdi->xbdi_pendingreqs > 0) { 960 /* there are pending I/Os. Wait for them. */ 961 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock); 962 continue; 963 } 964 965 /* All I/Os should have been processed by now, 966 * xbdi_refcnt should drop to 0 */ 967 xbdi_put(xbdi); 968 KASSERT(xbdi->xbdi_refcnt == 0); 969 goto out; 970 /* NOTREACHED */ 971 default: 972 panic("%s: invalid state %d", 973 xbdi->xbdi_name, xbdi->xbdi_status); 974 } 975 } 976 out: 977 mutex_exit(&xbdi->xbdi_lock); 978 979 kthread_exit(0); 980 } 981 982 static void * 983 xbdback_co_main(struct xbdback_instance *xbdi, void *obj) 984 { 985 (void)obj; 986 987 xbdi->xbdi_req_prod = xbdi->xbdi_ring.ring_n.sring->req_prod; 988 xen_rmb(); /* ensure we see all requests up to req_prod */ 989 /* 990 * note that we'll eventually get a full ring of request. 991 * in this case, MASK_BLKIF_IDX(req_cons) == MASK_BLKIF_IDX(req_prod) 992 */ 993 xbdi->xbdi_cont = xbdback_co_main_loop; 994 return xbdi; 995 } 996 997 /* 998 * Fetch a blkif request from the ring, and pass control to the appropriate 999 * continuation. 1000 * If someone asked for disconnection, do not fetch any more request from 1001 * the ring. 1002 */ 1003 static void * 1004 xbdback_co_main_loop(struct xbdback_instance *xbdi, void *obj __unused) 1005 { 1006 blkif_request_t *req, *reqn; 1007 blkif_x86_32_request_t *req32; 1008 blkif_x86_64_request_t *req64; 1009 blkif_request_indirect_t *rinn; 1010 blkif_x86_32_request_indirect_t *rin32; 1011 blkif_x86_64_request_indirect_t *rin64; 1012 1013 if (xbdi->xbdi_ring.ring_n.req_cons != xbdi->xbdi_req_prod) { 1014 struct xbdback_io *xbd_io = xbdback_io_get(xbdi); 1015 uint8_t real_op = 0xff; 1016 1017 if (xbd_io == NULL) { 1018 /* retry after iodone */ 1019 xbdi->xbdi_cont = NULL; 1020 return NULL; 1021 } 1022 memset(&xbd_io->u, 0, sizeof(xbd_io->u)); 1023 1024 buf_init(&xbd_io->xio_buf); 1025 xbd_io->xio_xbdi = xbdi; 1026 1027 req = &xbd_io->xio_xen_req; 1028 memset(req, 0, sizeof(*req)); 1029 1030 switch(xbdi->xbdi_proto) { 1031 case XBDIP_NATIVE: 1032 reqn = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n, 1033 xbdi->xbdi_ring.ring_n.req_cons); 1034 real_op = req->operation = reqn->operation; 1035 if (real_op == BLKIF_OP_INDIRECT) { 1036 rinn = (blkif_request_indirect_t *)reqn; 1037 real_op = rinn->indirect_op; 1038 } 1039 req->id = reqn->id; 1040 break; 1041 case XBDIP_32: 1042 req32 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_32, 1043 xbdi->xbdi_ring.ring_n.req_cons); 1044 real_op = req->operation = req32->operation; 1045 if (real_op == BLKIF_OP_INDIRECT) { 1046 rin32 = (blkif_x86_32_request_indirect_t*)req32; 1047 real_op = rin32->indirect_op; 1048 } 1049 req->id = req32->id; 1050 break; 1051 case XBDIP_64: 1052 req64 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_64, 1053 xbdi->xbdi_ring.ring_n.req_cons); 1054 real_op = req->operation = req64->operation; 1055 if (real_op == BLKIF_OP_INDIRECT) { 1056 rin64 = (blkif_x86_64_request_indirect_t*)req64; 1057 real_op = rin64->indirect_op; 1058 } 1059 req->id = req64->id; 1060 break; 1061 } 1062 __insn_barrier(); 1063 XENPRINTF(("xbdback op %d req_cons 0x%x req_prod 0x%x " 1064 "resp_prod 0x%x id %" PRIu64 "\n", req->operation, 1065 xbdi->xbdi_ring.ring_n.req_cons, 1066 xbdi->xbdi_req_prod, 1067 xbdi->xbdi_ring.ring_n.rsp_prod_pvt, 1068 req->id)); 1069 switch (req->operation) { 1070 case BLKIF_OP_INDIRECT: 1071 /* just check indirect_op, rest is handled later */ 1072 if (real_op != BLKIF_OP_READ && 1073 real_op != BLKIF_OP_WRITE) { 1074 if (ratecheck(&xbdi->xbdi_lasterr_time, 1075 &xbdback_err_intvl)) { 1076 printf("%s: unknown ind operation %d\n", 1077 xbdi->xbdi_name, 1078 real_op); 1079 } 1080 goto fail; 1081 } 1082 /* FALLTHROUGH */ 1083 case BLKIF_OP_READ: 1084 case BLKIF_OP_WRITE: 1085 xbdi->xbdi_cont = xbdback_co_io; 1086 return xbd_io; 1087 case BLKIF_OP_FLUSH_DISKCACHE: 1088 xbdi->xbdi_cont = xbdback_co_cache_flush; 1089 return xbd_io; 1090 default: 1091 if (ratecheck(&xbdi->xbdi_lasterr_time, 1092 &xbdback_err_intvl)) { 1093 printf("%s: unknown operation %d\n", 1094 xbdi->xbdi_name, req->operation); 1095 } 1096 fail: 1097 xbdback_send_reply(xbdi, req->id, real_op, 1098 BLKIF_RSP_ERROR); 1099 xbdi->xbdi_cont = xbdback_co_main_incr; 1100 return xbdi; 1101 } 1102 } else { 1103 xbdi->xbdi_cont = xbdback_co_main_done2; 1104 return xbdi; 1105 } 1106 } 1107 1108 /* 1109 * Increment consumer index and move on to the next request. In case 1110 * we want to disconnect, leave continuation now. 1111 */ 1112 static void * 1113 xbdback_co_main_incr(struct xbdback_instance *xbdi, void *obj __unused) 1114 { 1115 KASSERT(mutex_owned(&xbdi->xbdi_lock)); 1116 1117 blkif_back_ring_t *ring = &xbdi->xbdi_ring.ring_n; 1118 1119 ring->req_cons++; 1120 1121 if (xbdi->xbdi_status == DISCONNECTING) 1122 xbdi->xbdi_cont = NULL; 1123 else 1124 xbdi->xbdi_cont = xbdback_co_main_loop; 1125 1126 return xbdi; 1127 } 1128 1129 /* 1130 * Check for requests in the instance's ring. In case there are, start again 1131 * from the beginning. If not, stall. 1132 */ 1133 static void * 1134 xbdback_co_main_done2(struct xbdback_instance *xbdi, void *obj) 1135 { 1136 int work_to_do; 1137 1138 xen_wmb(); 1139 RING_FINAL_CHECK_FOR_REQUESTS(&xbdi->xbdi_ring.ring_n, work_to_do); 1140 if (work_to_do) 1141 xbdi->xbdi_cont = xbdback_co_main; 1142 else 1143 xbdi->xbdi_cont = NULL; 1144 1145 return xbdi; 1146 } 1147 1148 /* 1149 * Frontend requested a cache flush operation. 1150 */ 1151 static void * 1152 xbdback_co_cache_flush(struct xbdback_instance *xbdi, void *obj) 1153 { 1154 struct xbdback_io *xbd_io = obj; 1155 KASSERT(xbd_io->xio_xen_req.operation == BLKIF_OP_FLUSH_DISKCACHE); 1156 if (xbdi->xbdi_pendingreqs > 0) { 1157 /* 1158 * There are pending requests. 1159 * Event or iodone() will restart processing 1160 */ 1161 xbdi->xbdi_cont_restart = xbdback_co_cache_flush; 1162 xbdi->xbdi_cont_restart_obj = xbd_io; 1163 xbdi->xbdi_cont = NULL; 1164 return NULL; 1165 } 1166 xbdi_get(xbdi); 1167 xbdi->xbdi_cont = xbdback_co_do_io; 1168 return xbd_io; 1169 } 1170 1171 /* 1172 * A read or write I/O request must be processed. Do some checks first, 1173 * then get the segment information directly from the ring request. 1174 */ 1175 static void * 1176 xbdback_co_io(struct xbdback_instance *xbdi, void *obj) 1177 { 1178 int i, error; 1179 blkif_request_t *req, *reqn; 1180 blkif_x86_32_request_t *req32; 1181 blkif_x86_64_request_t *req64; 1182 blkif_request_indirect_t *rinn; 1183 blkif_x86_32_request_indirect_t *rin32; 1184 blkif_x86_64_request_indirect_t *rin64; 1185 const char *errstr; 1186 struct xbdback_io *xbd_io = obj; 1187 grant_ref_t in_gntref = 0; 1188 1189 req = &xbd_io->xio_xen_req; 1190 1191 /* some sanity checks */ 1192 KASSERT(req->operation == BLKIF_OP_READ || 1193 req->operation == BLKIF_OP_WRITE || 1194 req->operation == BLKIF_OP_INDIRECT); 1195 1196 /* copy request segments */ 1197 switch (xbdi->xbdi_proto) { 1198 case XBDIP_NATIVE: 1199 reqn = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n, 1200 xbdi->xbdi_ring.ring_n.req_cons); 1201 req->handle = reqn->handle; 1202 req->sector_number = reqn->sector_number; 1203 if (reqn->operation == BLKIF_OP_INDIRECT) { 1204 rinn = (blkif_request_indirect_t *)reqn; 1205 req->operation = rinn->indirect_op; 1206 req->nr_segments = (uint8_t)rinn->nr_segments; 1207 if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS) { 1208 errstr = "too many indirect segments"; 1209 goto bad_segments; 1210 } 1211 in_gntref = rinn->indirect_grefs[0]; 1212 /* first_sect and segment grefs fetched later */ 1213 } else { 1214 req->nr_segments = reqn->nr_segments; 1215 if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) { 1216 errstr = "too many segments"; 1217 goto bad_segments; 1218 } 1219 for (i = 0; i < req->nr_segments; i++) 1220 xbd_io->xio_seg[i] = reqn->seg[i]; 1221 } 1222 break; 1223 case XBDIP_32: 1224 req32 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_32, 1225 xbdi->xbdi_ring.ring_n.req_cons); 1226 req->handle = req32->handle; 1227 req->sector_number = req32->sector_number; 1228 if (req32->operation == BLKIF_OP_INDIRECT) { 1229 rin32 = (blkif_x86_32_request_indirect_t *)req32; 1230 req->operation = rin32->indirect_op; 1231 req->nr_segments = (uint8_t)rin32->nr_segments; 1232 if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS) { 1233 errstr = "too many indirect segments"; 1234 goto bad_segments; 1235 } 1236 in_gntref = rin32->indirect_grefs[0]; 1237 /* first_sect and segment grefs fetched later */ 1238 } else { 1239 req->nr_segments = req32->nr_segments; 1240 if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) { 1241 errstr = "too many segments"; 1242 goto bad_segments; 1243 } 1244 for (i = 0; i < req->nr_segments; i++) 1245 xbd_io->xio_seg[i] = req32->seg[i]; 1246 } 1247 break; 1248 case XBDIP_64: 1249 req64 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_64, 1250 xbdi->xbdi_ring.ring_n.req_cons); 1251 req->handle = req64->handle; 1252 req->sector_number = req64->sector_number; 1253 if (req64->operation == BLKIF_OP_INDIRECT) { 1254 rin64 = (blkif_x86_64_request_indirect_t *)req64; 1255 req->nr_segments = (uint8_t)rin64->nr_segments; 1256 if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS) { 1257 errstr = "too many indirect segments"; 1258 goto bad_segments; 1259 } 1260 in_gntref = rin64->indirect_grefs[0]; 1261 /* first_sect and segment grefs fetched later */ 1262 } else { 1263 req->nr_segments = req64->nr_segments; 1264 if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) { 1265 errstr = "too many segments"; 1266 goto bad_segments; 1267 } 1268 for (i = 0; i < req->nr_segments; i++) 1269 xbd_io->xio_seg[i] = req64->seg[i]; 1270 } 1271 break; 1272 } 1273 1274 if (req->operation == BLKIF_OP_WRITE) { 1275 if (xbdi->xbdi_ro) { 1276 error = EROFS; 1277 goto end; 1278 } 1279 } 1280 1281 /* Max value checked already earlier */ 1282 if (req->nr_segments < 1) { 1283 errstr = "invalid number of segments"; 1284 goto bad_segments; 1285 } 1286 1287 /* If segments are on an indirect page, copy them now */ 1288 if (in_gntref) { 1289 gnttab_copy_t gop; 1290 paddr_t ma; 1291 1292 gop.flags = GNTCOPY_source_gref; 1293 gop.len = req->nr_segments 1294 * sizeof(struct blkif_request_segment); 1295 1296 gop.source.u.ref = in_gntref; 1297 gop.source.offset = 0; 1298 gop.source.domid = xbdi->xbdi_domid; 1299 1300 ma = xbd_io->xio_seg_dmamap->dm_segs[0].ds_addr; 1301 gop.dest.offset = ma & PAGE_MASK; 1302 gop.dest.domid = DOMID_SELF; 1303 gop.dest.u.gmfn = ma >> PAGE_SHIFT; 1304 1305 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, &gop, 1) != 0) { 1306 errstr = "GNTTABOP_copy failed"; 1307 goto bad_segments; 1308 } 1309 } 1310 1311 xbdi_get(xbdi); 1312 xbdi->xbdi_cont = xbdback_co_io_gotio; 1313 return xbd_io; 1314 1315 bad_segments: 1316 if (ratecheck(&xbdi->xbdi_lasterr_time, &xbdback_err_intvl)) { 1317 printf("%s: %s\n", xbdi->xbdi_name, errstr); 1318 } 1319 error = EINVAL; 1320 /* FALLTHROUGH */ 1321 1322 end: 1323 xbdback_send_reply(xbdi, req->id, req->operation, 1324 (error == EROFS) ? BLKIF_RSP_EOPNOTSUPP : BLKIF_RSP_ERROR); 1325 xbdi->xbdi_cont = xbdback_co_main_incr; 1326 return xbdi; 1327 } 1328 1329 /* Prepare an I/O buffer for a xbdback instance */ 1330 static void * 1331 xbdback_co_io_gotio(struct xbdback_instance *xbdi, void *obj) 1332 { 1333 struct xbdback_io *xbd_io = obj; 1334 int buf_flags; 1335 size_t bcount; 1336 blkif_request_t *req = &xbd_io->xio_xen_req; 1337 uint8_t last_sect; 1338 int error; 1339 1340 KASSERT(mutex_owned(&xbdi->xbdi_lock)); 1341 KASSERT(xbdi->xbdi_refcnt > 0); 1342 1343 /* Process segments */ 1344 bcount = 0; 1345 for (int i = 0; i < req->nr_segments; i++) { 1346 struct blkif_request_segment *seg = &xbd_io->xio_seg[i]; 1347 if (seg->last_sect > VBD_MAXSECT || 1348 seg->first_sect > VBD_MAXSECT) { 1349 if (ratecheck(&xbdi->xbdi_lasterr_time, 1350 &xbdback_err_intvl)) { 1351 printf("%s: invalid segment sectors %d %d\n", 1352 xbdi->xbdi_name, 1353 seg->first_sect, seg->last_sect); 1354 } 1355 xbdi->xbdi_pendingreqs++; /* xbdback_io_error will -- */ 1356 xbdback_io_error(xbd_io, EINVAL); 1357 /* do not retry */ 1358 xbdi->xbdi_cont = xbdback_co_main_incr; 1359 return xbdi; 1360 } 1361 1362 if (i > 0) { 1363 if (last_sect != VBD_MAXSECT || 1364 seg->first_sect != 0) { 1365 xbd_io->xio_need_bounce = 1; 1366 } 1367 } 1368 last_sect = seg->last_sect; 1369 xbd_io->xio_gref[i] = seg->gref; 1370 bcount += (seg->last_sect - seg->first_sect + 1) 1371 * VBD_BSIZE; 1372 } 1373 xbd_io->xio_start_offset = xbd_io->xio_seg[0].first_sect * VBD_BSIZE; 1374 1375 KASSERT(bcount <= MAXPHYS); 1376 KASSERT(xbd_io->xio_start_offset < PAGE_SIZE); 1377 KASSERT(bcount + xbd_io->xio_start_offset <= VBD_VA_SIZE); 1378 1379 /* Fill-in the buf */ 1380 if (req->operation == BLKIF_OP_WRITE) { 1381 buf_flags = B_WRITE; 1382 } else { 1383 buf_flags = B_READ; 1384 } 1385 1386 xbd_io->xio_buf.b_flags = buf_flags; 1387 xbd_io->xio_buf.b_cflags = 0; 1388 xbd_io->xio_buf.b_oflags = 0; 1389 xbd_io->xio_buf.b_iodone = xbdback_iodone; 1390 xbd_io->xio_buf.b_proc = NULL; 1391 xbd_io->xio_buf.b_vp = xbdi->xbdi_vp; 1392 xbd_io->xio_buf.b_objlock = xbdi->xbdi_vp->v_interlock; 1393 xbd_io->xio_buf.b_dev = xbdi->xbdi_dev; 1394 xbd_io->xio_buf.b_blkno = req->sector_number; 1395 xbd_io->xio_buf.b_bcount = bcount; 1396 if (__predict_false(xbd_io->xio_need_bounce)) { 1397 if (__predict_false(xbdi->xbdi_bouncebuf_use)) { 1398 KASSERT(xbdi->xbdi_pendingreqs > 1); 1399 /* retry later */ 1400 xbdi->xbdi_cont_restart = xbdback_co_io_gotio; 1401 xbdi->xbdi_cont_restart_obj = xbd_io; 1402 xbdi->xbdi_cont = NULL; 1403 return NULL; 1404 } 1405 xbdi->xbdi_bouncebuf_use++; 1406 KASSERT(xbdi->xbdi_bouncebuf_use == 1); 1407 xbd_io->xio_buf.b_data = (void *)xbdi->xbdi_bouncebuf; 1408 } 1409 xbdi->xbdi_pendingreqs++; 1410 if ((error = xbdback_map_shm(xbd_io)) != 0) { 1411 xbdback_io_error(xbd_io, error); 1412 /* do not retry */ 1413 xbdi->xbdi_cont = xbdback_co_main_incr; 1414 return xbdi; 1415 } 1416 if (__predict_true(xbd_io->xio_need_bounce == 0)) { 1417 xbd_io->xio_buf.b_data = (void *) 1418 (xbd_io->xio_vaddr + xbd_io->xio_start_offset); 1419 } 1420 1421 1422 xbd_io->xio_buf.b_private = xbd_io; 1423 1424 xbdi->xbdi_cont = xbdback_co_do_io; 1425 return xbd_io; 1426 } 1427 1428 static void 1429 xbdback_io_error(struct xbdback_io *xbd_io, int error) 1430 { 1431 KASSERT(mutex_owned(&xbd_io->xio_xbdi->xbdi_lock)); 1432 1433 struct buf *bp = &xbd_io->xio_buf; 1434 1435 bp->b_error = error; 1436 xbdback_iodone_locked(xbd_io->xio_xbdi, xbd_io, bp); 1437 } 1438 1439 /* 1440 * Main xbdback I/O routine. It can either perform a flush operation or 1441 * schedule a read/write operation. 1442 */ 1443 static void * 1444 xbdback_co_do_io(struct xbdback_instance *xbdi, void *obj) 1445 { 1446 struct xbdback_io *xbd_io = obj; 1447 blkif_request_t *req = &xbd_io->xio_xen_req; 1448 1449 KASSERT(xbdi->xbdi_refcnt > 0); 1450 1451 switch (req->operation) { 1452 case BLKIF_OP_FLUSH_DISKCACHE: 1453 { 1454 int error; 1455 int force = 1; 1456 1457 KASSERT(mutex_owned(&xbdi->xbdi_lock)); 1458 mutex_exit(&xbdi->xbdi_lock); 1459 error = VOP_IOCTL(xbdi->xbdi_vp, DIOCCACHESYNC, &force, FWRITE, 1460 kauth_cred_get()); 1461 mutex_enter(&xbdi->xbdi_lock); 1462 if (error) { 1463 aprint_error("xbdback %s: DIOCCACHESYNC returned %d\n", 1464 xbdi->xbdi_xbusd->xbusd_path, error); 1465 if (error == EOPNOTSUPP || error == ENOTTY) 1466 error = BLKIF_RSP_EOPNOTSUPP; 1467 else 1468 error = BLKIF_RSP_ERROR; 1469 } else 1470 error = BLKIF_RSP_OKAY; 1471 xbdback_send_reply(xbdi, req->id, req->operation, error); 1472 xbdback_io_put(xbdi, xbd_io); 1473 xbdi_put(xbdi); 1474 xbdi->xbdi_cont = xbdback_co_main_incr; 1475 return xbdi; 1476 } 1477 case BLKIF_OP_READ: 1478 case BLKIF_OP_WRITE: 1479 if (__predict_false(xbd_io->xio_need_bounce) && 1480 req->operation == BLKIF_OP_WRITE) { 1481 vaddr_t boffset = 0; 1482 for (int i = 0; i < req->nr_segments; i++) { 1483 struct blkif_request_segment *seg = 1484 &xbd_io->xio_seg[i]; 1485 vaddr_t segoffset = seg->first_sect * VBD_BSIZE; 1486 size_t segbcount = 1487 (seg->last_sect - seg->first_sect + 1) * 1488 VBD_BSIZE; 1489 KASSERT(segoffset + segbcount <= PAGE_SIZE); 1490 KASSERT(boffset + segbcount < MAXPHYS); 1491 segoffset += PAGE_SIZE * i; 1492 memcpy( 1493 (void *)(xbdi->xbdi_bouncebuf + boffset), 1494 (void *)(xbd_io->xio_vaddr + segoffset), 1495 segbcount); 1496 boffset += segbcount; 1497 } 1498 } 1499 KASSERT(mutex_owned(&xbdi->xbdi_lock)); 1500 mutex_exit(&xbdi->xbdi_lock); 1501 if ((xbd_io->xio_buf.b_flags & B_READ) == 0) { 1502 mutex_enter(xbd_io->xio_buf.b_vp->v_interlock); 1503 xbd_io->xio_buf.b_vp->v_numoutput++; 1504 mutex_exit(xbd_io->xio_buf.b_vp->v_interlock); 1505 } 1506 /* will call xbdback_iodone() asynchronously when done */ 1507 bdev_strategy(&xbd_io->xio_buf); 1508 mutex_enter(&xbdi->xbdi_lock); 1509 xbdi->xbdi_cont = xbdback_co_main_incr; 1510 return xbdi; 1511 default: 1512 /* Should never happen */ 1513 panic("xbdback_co_do_io: unsupported operation %d", 1514 req->operation); 1515 } 1516 } 1517 1518 /* 1519 * Called from softint(9) context when an I/O is done: for each request, send 1520 * back the associated reply to the domain. 1521 */ 1522 static void 1523 xbdback_iodone(struct buf *bp) 1524 { 1525 struct xbdback_io *xbd_io; 1526 struct xbdback_instance *xbdi; 1527 1528 xbd_io = bp->b_private; 1529 KASSERT(bp == &xbd_io->xio_buf); 1530 xbdi = xbd_io->xio_xbdi; 1531 1532 mutex_enter(&xbdi->xbdi_lock); 1533 xbdback_iodone_locked(xbdi, xbd_io, bp); 1534 mutex_exit(&xbdi->xbdi_lock); 1535 } 1536 1537 /* 1538 * This gets reused by xbdback_io_error to report errors from other sources. 1539 */ 1540 static void 1541 xbdback_iodone_locked(struct xbdback_instance *xbdi, struct xbdback_io *xbd_io, 1542 struct buf *bp) 1543 { 1544 int status; 1545 blkif_request_t *req = &xbd_io->xio_xen_req; 1546 1547 XENPRINTF(("xbdback_io domain %d: iodone ptr 0x%lx\n", 1548 xbdi->xbdi_domid, (long)xbd_io)); 1549 1550 KASSERT(mutex_owned(&xbdi->xbdi_lock)); 1551 1552 KASSERT(bp->b_error != 0 || xbd_io->xio_xv != NULL); 1553 if (__predict_false(xbd_io->xio_need_bounce)) { 1554 KASSERT(xbd_io->xio_buf.b_data == (void *)xbdi->xbdi_bouncebuf); 1555 1556 KASSERT(req->operation == BLKIF_OP_WRITE || 1557 req->operation == BLKIF_OP_READ); 1558 1559 if (req->operation == BLKIF_OP_READ && bp->b_error == 0) { 1560 vaddr_t boffset = 0; 1561 for (int i = 0; i < req->nr_segments; i++) { 1562 struct blkif_request_segment *seg = 1563 &xbd_io->xio_seg[i]; 1564 vaddr_t segoffset = seg->first_sect * VBD_BSIZE; 1565 size_t segbcount = 1566 (seg->last_sect - seg->first_sect + 1) * 1567 VBD_BSIZE; 1568 KASSERT(segoffset + segbcount <= PAGE_SIZE); 1569 KASSERT(boffset + segbcount < MAXPHYS); 1570 segoffset += PAGE_SIZE * i; 1571 memcpy( 1572 (void *)(xbd_io->xio_vaddr + segoffset), 1573 (void *)(xbdi->xbdi_bouncebuf + boffset), 1574 segbcount); 1575 boffset += segbcount; 1576 } 1577 } 1578 KASSERT(xbdi->xbdi_bouncebuf_use == 1); 1579 xbdi->xbdi_bouncebuf_use--; 1580 } 1581 if (xbd_io->xio_xv != NULL) 1582 xbdback_unmap_shm(xbd_io); 1583 1584 if (bp->b_error != 0) { 1585 printf("xbd IO domain %d: error %d\n", 1586 xbdi->xbdi_domid, bp->b_error); 1587 status = BLKIF_RSP_ERROR; 1588 } else 1589 status = BLKIF_RSP_OKAY; 1590 1591 xbdback_send_reply(xbdi, req->id, req->operation, status); 1592 1593 xbdi_put(xbdi); 1594 KASSERT(xbdi->xbdi_pendingreqs > 0); 1595 xbdi->xbdi_pendingreqs--; 1596 buf_destroy(&xbd_io->xio_buf); 1597 xbdback_io_put(xbdi, xbd_io); 1598 1599 xbdback_wakeup_thread(xbdi); 1600 } 1601 1602 /* 1603 * Wake up the per xbdback instance thread. 1604 */ 1605 static void 1606 xbdback_wakeup_thread(struct xbdback_instance *xbdi) 1607 { 1608 KASSERT(mutex_owned(&xbdi->xbdi_lock)); 1609 1610 /* only set RUN state when we are WAITING for work */ 1611 if (xbdi->xbdi_status == WAITING) 1612 xbdi->xbdi_status = RUN; 1613 cv_signal(&xbdi->xbdi_cv); 1614 } 1615 1616 /* 1617 * called once a request has completed. Place the reply in the ring and 1618 * notify the guest OS. 1619 */ 1620 static void 1621 xbdback_send_reply(struct xbdback_instance *xbdi, uint64_t id, 1622 int op, int status) 1623 { 1624 blkif_response_t *resp_n; 1625 blkif_x86_32_response_t *resp32; 1626 blkif_x86_64_response_t *resp64; 1627 int notify; 1628 1629 KASSERT(mutex_owned(&xbdi->xbdi_lock)); 1630 1631 /* 1632 * The ring can be accessed by the xbdback thread, xbdback_iodone() 1633 * handler, or any handler that triggered the shm callback. So 1634 * protect ring access via the xbdi_lock mutex. 1635 */ 1636 switch (xbdi->xbdi_proto) { 1637 case XBDIP_NATIVE: 1638 resp_n = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_n, 1639 xbdi->xbdi_ring.ring_n.rsp_prod_pvt); 1640 resp_n->id = id; 1641 resp_n->operation = op; 1642 resp_n->status = status; 1643 break; 1644 case XBDIP_32: 1645 resp32 = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_32, 1646 xbdi->xbdi_ring.ring_n.rsp_prod_pvt); 1647 resp32->id = id; 1648 resp32->operation = op; 1649 resp32->status = status; 1650 break; 1651 case XBDIP_64: 1652 resp64 = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_64, 1653 xbdi->xbdi_ring.ring_n.rsp_prod_pvt); 1654 resp64->id = id; 1655 resp64->operation = op; 1656 resp64->status = status; 1657 break; 1658 } 1659 xbdi->xbdi_ring.ring_n.rsp_prod_pvt++; 1660 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xbdi->xbdi_ring.ring_n, notify); 1661 1662 if (notify) { 1663 XENPRINTF(("xbdback_send_reply notify %d\n", xbdi->xbdi_domid)); 1664 hypervisor_notify_via_evtchn(xbdi->xbdi_evtchn); 1665 } 1666 } 1667 1668 /* 1669 * Map multiple entries of an I/O request into backend's VA space. 1670 * The xbd_io->xio_gref array has to be filled out by the caller. 1671 */ 1672 static int 1673 xbdback_map_shm(struct xbdback_io *xbd_io) 1674 { 1675 struct xbdback_instance *xbdi = xbd_io->xio_xbdi; 1676 blkif_request_t *req = &xbd_io->xio_xen_req; 1677 int error; 1678 1679 #ifdef XENDEBUG_VBD 1680 int i; 1681 printf("xbdback_map_shm map grant "); 1682 for (i = 0; i < req->nr_segments; i++) { 1683 printf("%u ", (u_int)xbd_io->xio_gref[i]); 1684 } 1685 #endif 1686 1687 KASSERT(mutex_owned(&xbdi->xbdi_lock)); 1688 KASSERT(xbd_io->xio_xv == NULL); 1689 1690 xbd_io->xio_xv = SLIST_FIRST(&xbdi->xbdi_va_free); 1691 KASSERT(xbd_io->xio_xv != NULL); 1692 SLIST_REMOVE_HEAD(&xbdi->xbdi_va_free, xv_next); 1693 xbd_io->xio_vaddr = xbd_io->xio_xv->xv_vaddr; 1694 1695 error = xen_shm_map(req->nr_segments, xbdi->xbdi_domid, 1696 xbd_io->xio_gref, xbd_io->xio_vaddr, xbd_io->xio_gh, 1697 (req->operation == BLKIF_OP_WRITE) ? XSHM_RO : 0); 1698 1699 switch(error) { 1700 case 0: 1701 #ifdef XENDEBUG_VBD 1702 printf("handle"); 1703 for (i = 0; i < req->nr_segments; i++) { 1704 printf(" %u ", (u_int)xbd_io->xio_gh[i]); 1705 } 1706 printf("\n"); 1707 #endif 1708 return 0; 1709 default: 1710 /* reset xio_xv so error handling won't try to unmap it */ 1711 SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, xbd_io->xio_xv, xv_next); 1712 xbd_io->xio_xv = NULL; 1713 return error; 1714 } 1715 } 1716 1717 /* unmap a request from our virtual address space (request is done) */ 1718 static void 1719 xbdback_unmap_shm(struct xbdback_io *xbd_io) 1720 { 1721 struct xbdback_instance *xbdi = xbd_io->xio_xbdi; 1722 blkif_request_t *req = &xbd_io->xio_xen_req; 1723 1724 #ifdef XENDEBUG_VBD 1725 int i; 1726 printf("xbdback_unmap_shm handle "); 1727 for (i = 0; i < req->nr_segments; i++) { 1728 printf("%u ", (u_int)xbd_io->xio_gh[i]); 1729 } 1730 printf("\n"); 1731 #endif 1732 1733 KASSERT(xbd_io->xio_xv != NULL); 1734 xen_shm_unmap(xbd_io->xio_vaddr, req->nr_segments, 1735 xbd_io->xio_gh); 1736 SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, xbd_io->xio_xv, xv_next); 1737 xbd_io->xio_xv = NULL; 1738 xbd_io->xio_vaddr = -1; 1739 } 1740 1741 /* Obtain memory from a pool */ 1742 static struct xbdback_io * 1743 xbdback_io_get(struct xbdback_instance *xbdi) 1744 { 1745 struct xbdback_io *xbd_io = SLIST_FIRST(&xbdi->xbdi_io_free); 1746 SLIST_REMOVE_HEAD(&xbdi->xbdi_io_free, xio_next); 1747 return xbd_io; 1748 } 1749 1750 /* Restore memory to a pool */ 1751 static void 1752 xbdback_io_put(struct xbdback_instance *xbdi, struct xbdback_io *xbd_io) 1753 { 1754 KASSERT(xbd_io->xio_xv == NULL); 1755 KASSERT(xbd_io != NULL); 1756 SLIST_INSERT_HEAD(&xbdi->xbdi_io_free, xbd_io, xio_next); 1757 } 1758 1759 /* 1760 * Trampoline routine. Calls continuations in a loop and only exits when 1761 * either the returned object or the next callback is NULL. 1762 */ 1763 static void 1764 xbdback_trampoline(struct xbdback_instance *xbdi, void *obj) 1765 { 1766 xbdback_cont_t cont; 1767 1768 while(obj != NULL && xbdi->xbdi_cont != NULL) { 1769 KASSERT(xbdi->xbdi_cont_restart == NULL); 1770 KASSERT(xbdi->xbdi_cont_restart_obj == NULL); 1771 cont = xbdi->xbdi_cont; 1772 #ifdef DIAGNOSTIC 1773 xbdi->xbdi_cont = (xbdback_cont_t)0xDEADBEEF; 1774 #endif 1775 obj = (*cont)(xbdi, obj); 1776 #ifdef DIAGNOSTIC 1777 if (xbdi->xbdi_cont == (xbdback_cont_t)0xDEADBEEF) { 1778 printf("xbdback_trampoline: 0x%lx didn't set " 1779 "xbdi->xbdi_cont!\n", (long)cont); 1780 panic("xbdback_trampoline: bad continuation"); 1781 } 1782 if (xbdi->xbdi_cont_restart != NULL || 1783 xbdi->xbdi_cont_restart_obj != NULL) { 1784 KASSERT(xbdi->xbdi_cont_restart != NULL); 1785 KASSERT(xbdi->xbdi_cont_restart_obj != NULL); 1786 KASSERT(xbdi->xbdi_cont == NULL); 1787 KASSERT(obj == NULL); 1788 } 1789 #endif 1790 } 1791 } 1792