viomb.c revision 1.1.8.2 1 /* $NetBSD: viomb.c,v 1.1.8.2 2012/01/25 21:18:15 riz Exp $ */
2
3 /*
4 * Copyright (c) 2010 Minoura Makoto.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: viomb.c,v 1.1.8.2 2012/01/25 21:18:15 riz Exp $");
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/bus.h>
35 #include <sys/condvar.h>
36 #include <sys/device.h>
37 #include <sys/kthread.h>
38 #include <sys/mutex.h>
39 #include <sys/sysctl.h>
40 #include <uvm/uvm_extern.h>
41
42 #include <dev/pci/pcidevs.h>
43 #include <dev/pci/pcireg.h>
44 #include <dev/pci/pcivar.h>
45
46 #include <dev/pci/virtioreg.h>
47 #include <dev/pci/virtiovar.h>
48
49 /* Configuration registers */
50 #define VIRTIO_BALLOON_CONFIG_NUM_PAGES 0 /* 32bit */
51 #define VIRTIO_BALLOON_CONFIG_ACTUAL 4 /* 32bit */
52
53 /* Feature bits */
54 #define VIRTIO_BALLOON_F_MUST_TELL_HOST (1<<0)
55 #define VIRTIO_BALLOON_F_STATS_VQ (1<<1)
56
57 #define PGS_PER_REQ (256) /* 1MB, 4KB/page */
58
59 CTASSERT((PAGE_SIZE) == (VIRTIO_PAGE_SIZE)); /* XXX */
60
61 struct balloon_req {
62 bus_dmamap_t bl_dmamap;
63 struct pglist bl_pglist;
64 int bl_nentries;
65 uint32_t bl_pages[PGS_PER_REQ];
66 };
67
68 struct viomb_softc {
69 device_t sc_dev;
70
71 struct virtio_softc *sc_virtio;
72 struct virtqueue sc_vq[2];
73
74 unsigned int sc_npages;
75 unsigned int sc_actual;
76 int sc_inflight;
77 struct balloon_req sc_req;
78 struct pglist sc_balloon_pages;
79
80 int sc_inflate_done;
81 int sc_deflate_done;
82
83 kcondvar_t sc_wait;
84 kmutex_t sc_waitlock;
85 };
86
87 static int balloon_initialized = 0; /* multiple balloon is not allowed */
88
89 static int viomb_match(device_t, cfdata_t, void *);
90 static void viomb_attach(device_t, device_t, void *);
91 static void viomb_read_config(struct viomb_softc *);
92 static int viomb_config_change(struct virtio_softc *);
93 static int inflate(struct viomb_softc *);
94 static int inflateq_done(struct virtqueue *);
95 static int inflate_done(struct viomb_softc *);
96 static int deflate(struct viomb_softc *);
97 static int deflateq_done(struct virtqueue *);
98 static int deflate_done(struct viomb_softc *);
99 static void viomb_thread(void *);
100
101 CFATTACH_DECL_NEW(viomb, sizeof(struct viomb_softc),
102 viomb_match, viomb_attach, NULL, NULL);
103
104 static int
105 viomb_match(device_t parent, cfdata_t match, void *aux)
106 {
107 struct virtio_softc *vsc = aux;
108
109 if (vsc->sc_childdevid == PCI_PRODUCT_VIRTIO_BALLOON)
110 return 1;
111
112 return 0;
113 }
114
115 static void
116 viomb_attach(device_t parent, device_t self, void *aux)
117 {
118 struct viomb_softc *sc = device_private(self);
119 struct virtio_softc *vsc = device_private(parent);
120 const struct sysctlnode *node;
121
122 if (vsc->sc_child != NULL) {
123 aprint_normal(": child already attached for %s; "
124 "something wrong...\n",
125 device_xname(parent));
126 return;
127 }
128 if (balloon_initialized++) {
129 aprint_normal(": balloon already exists; something wrong...\n");
130 goto err_none;
131 }
132 aprint_normal("\n");
133
134 sc->sc_dev = self;
135 sc->sc_virtio = vsc;
136
137 vsc->sc_child = self;
138 vsc->sc_ipl = IPL_VM;
139 vsc->sc_vqs = &sc->sc_vq[0];
140 vsc->sc_nvqs = 2;
141 vsc->sc_config_change = viomb_config_change;
142 vsc->sc_intrhand = virtio_vq_intr;
143
144 virtio_negotiate_features(vsc,
145 VIRTIO_CONFIG_DEVICE_FEATURES);
146 if ((virtio_alloc_vq(vsc, &sc->sc_vq[0], 0,
147 sizeof(uint32_t)*PGS_PER_REQ, 1,
148 "inflate") != 0) ||
149 (virtio_alloc_vq(vsc, &sc->sc_vq[1], 1,
150 sizeof(uint32_t)*PGS_PER_REQ, 1,
151 "deflate") != 0)) {
152 goto err_none;
153 }
154 sc->sc_vq[0].vq_done = inflateq_done;
155 sc->sc_vq[1].vq_done = deflateq_done;
156
157 viomb_read_config(sc);
158 sc->sc_inflight = 0;
159 TAILQ_INIT(&sc->sc_balloon_pages);
160
161 if (bus_dmamap_create(vsc->sc_dmat, sizeof(uint32_t)*PGS_PER_REQ,
162 1, sizeof(uint32_t)*PGS_PER_REQ, 0,
163 BUS_DMA_NOWAIT, &sc->sc_req.bl_dmamap)) {
164 aprint_error_dev(sc->sc_dev, "dmamap creation failed.\n");
165 goto err_vq;
166 }
167 if (bus_dmamap_load(vsc->sc_dmat, sc->sc_req.bl_dmamap,
168 &sc->sc_req.bl_pages[0],
169 sizeof(uint32_t) * PGS_PER_REQ,
170 NULL, BUS_DMA_NOWAIT)) {
171 aprint_error_dev(sc->sc_dev, "dmamap load failed.\n");
172 goto err_dmamap;
173 }
174
175 sc->sc_inflate_done = sc->sc_deflate_done = 0;
176 mutex_init(&sc->sc_waitlock, MUTEX_DEFAULT, IPL_VM); /* spin */
177 cv_init(&sc->sc_wait, "balloon");
178
179 if (kthread_create(PRI_IDLE, KTHREAD_MPSAFE, NULL,
180 viomb_thread, sc, NULL, "viomb")) {
181 aprint_error_dev(sc->sc_dev, "cannot create kthread.\n");
182 goto err_mutex;
183 }
184
185 sysctl_createv(NULL, 0, NULL, &node, 0, CTLTYPE_NODE,
186 "viomb", SYSCTL_DESCR("VirtIO Balloon status"),
187 NULL, 0, NULL, 0,
188 CTL_HW, CTL_CREATE, CTL_EOL);
189 sysctl_createv(NULL, 0, NULL, NULL, 0, CTLTYPE_INT,
190 "npages", SYSCTL_DESCR("VirtIO Balloon npages value"),
191 NULL, 0, &sc->sc_npages, 0,
192 CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL);
193 sysctl_createv(NULL, 0, NULL, NULL, 0, CTLTYPE_INT,
194 "actual", SYSCTL_DESCR("VirtIO Balloon actual value"),
195 NULL, 0, &sc->sc_actual, 0,
196 CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL);
197 return;
198
199 err_mutex:
200 cv_destroy(&sc->sc_wait);
201 mutex_destroy(&sc->sc_waitlock);
202 err_dmamap:
203 bus_dmamap_destroy(vsc->sc_dmat, sc->sc_req.bl_dmamap);
204 err_vq:
205 virtio_free_vq(vsc, &sc->sc_vq[1]);
206 virtio_free_vq(vsc, &sc->sc_vq[0]);
207 err_none:
208 vsc->sc_child = (void*)1;
209 return;
210 }
211
212 static void
213 viomb_read_config(struct viomb_softc *sc)
214 {
215 unsigned int reg;
216
217 /* these values are explicitly specified as little-endian */
218 reg = virtio_read_device_config_4(sc->sc_virtio,
219 VIRTIO_BALLOON_CONFIG_NUM_PAGES);
220 sc->sc_npages = le32toh(reg);
221
222 reg = virtio_read_device_config_4(sc->sc_virtio,
223 VIRTIO_BALLOON_CONFIG_ACTUAL);
224 sc->sc_actual = le32toh(reg);
225 }
226
227 /*
228 * Config change callback: wakeup the kthread.
229 */
230 static int
231 viomb_config_change(struct virtio_softc *vsc)
232 {
233 struct viomb_softc *sc = device_private(vsc->sc_child);
234 unsigned int old;
235
236 old = sc->sc_npages;
237 viomb_read_config(sc);
238 mutex_enter(&sc->sc_waitlock);
239 cv_signal(&sc->sc_wait);
240 mutex_exit(&sc->sc_waitlock);
241 if (sc->sc_npages > old)
242 printf("%s: inflating balloon from %u to %u.\n",
243 device_xname(sc->sc_dev), old, sc->sc_npages);
244 else if (sc->sc_npages < old)
245 printf("%s: deflating balloon from %u to %u.\n",
246 device_xname(sc->sc_dev), old, sc->sc_npages);
247
248 return 1;
249 }
250
251 /*
252 * Inflate: consume some amount of physical memory.
253 */
254 static int
255 inflate(struct viomb_softc *sc)
256 {
257 struct virtio_softc *vsc = sc->sc_virtio;
258 int i, slot;
259 uint64_t nvpages, nhpages;
260 struct balloon_req *b;
261 struct vm_page *p;
262 struct virtqueue *vq = &sc->sc_vq[0];
263
264 if (sc->sc_inflight)
265 return 0;
266 nvpages = sc->sc_npages - sc->sc_actual;
267 if (nvpages > PGS_PER_REQ)
268 nvpages = PGS_PER_REQ;
269 nhpages = nvpages * VIRTIO_PAGE_SIZE / PAGE_SIZE;
270
271 b = &sc->sc_req;
272 if (uvm_pglistalloc(nhpages*PAGE_SIZE, 0, UINT32_MAX*PAGE_SIZE,
273 0, 0, &b->bl_pglist, nhpages, 1)) {
274 printf("%s: %" PRIu64 " pages of physical memory "
275 "could not be allocated, retrying...\n",
276 device_xname(sc->sc_dev), nhpages);
277 return 1; /* sleep longer */
278 }
279
280 b->bl_nentries = nvpages;
281 i = 0;
282 TAILQ_FOREACH(p, &b->bl_pglist, pageq.queue) {
283 b->bl_pages[i++] = p->phys_addr / VIRTIO_PAGE_SIZE;
284 }
285 KASSERT(i == nvpages);
286
287 if (virtio_enqueue_prep(vsc, vq, &slot) != 0) {
288 printf("%s: inflate enqueue failed.\n",
289 device_xname(sc->sc_dev));
290 uvm_pglistfree(&b->bl_pglist);
291 return 0;
292 }
293 if (virtio_enqueue_reserve(vsc, vq, slot, 1)) {
294 printf("%s: inflate enqueue failed.\n",
295 device_xname(sc->sc_dev));
296 uvm_pglistfree(&b->bl_pglist);
297 return 0;
298 }
299 bus_dmamap_sync(vsc->sc_dmat, b->bl_dmamap, 0, sizeof(uint32_t)*nvpages,
300 BUS_DMASYNC_PREWRITE);
301 virtio_enqueue(vsc, vq, slot, b->bl_dmamap, true);
302 virtio_enqueue_commit(vsc, vq, slot, true);
303 sc->sc_inflight += nvpages;
304
305 return 0;
306 }
307
308 static int
309 inflateq_done(struct virtqueue *vq)
310 {
311 struct virtio_softc *vsc = vq->vq_owner;
312 struct viomb_softc *sc = device_private(vsc->sc_child);
313
314 mutex_enter(&sc->sc_waitlock);
315 sc->sc_inflate_done = 1;
316 cv_signal(&sc->sc_wait);
317 mutex_exit(&sc->sc_waitlock);
318
319 return 1;
320 }
321
322 static int
323 inflate_done(struct viomb_softc *sc)
324 {
325 struct virtio_softc *vsc = sc->sc_virtio;
326 struct virtqueue *vq = &sc->sc_vq[0];
327 struct balloon_req *b;
328 int r, slot;
329 uint64_t nvpages;
330 struct vm_page *p;
331
332 r = virtio_dequeue(vsc, vq, &slot, NULL);
333 if (r != 0) {
334 printf("%s: inflate dequeue failed, errno %d.\n",
335 device_xname(sc->sc_dev), r);
336 return 1;
337 }
338 virtio_dequeue_commit(vsc, vq, slot);
339
340 b = &sc->sc_req;
341 nvpages = b->bl_nentries;
342 bus_dmamap_sync(vsc->sc_dmat, b->bl_dmamap,
343 offsetof(struct balloon_req, bl_pages),
344 sizeof(uint32_t)*nvpages,
345 BUS_DMASYNC_POSTWRITE);
346 while (!TAILQ_EMPTY(&b->bl_pglist)) {
347 p = TAILQ_FIRST(&b->bl_pglist);
348 TAILQ_REMOVE(&b->bl_pglist, p, pageq.queue);
349 TAILQ_INSERT_TAIL(&sc->sc_balloon_pages, p, pageq.queue);
350 }
351
352 sc->sc_inflight -= nvpages;
353 virtio_write_device_config_4(vsc,
354 VIRTIO_BALLOON_CONFIG_ACTUAL,
355 sc->sc_actual + nvpages);
356 viomb_read_config(sc);
357
358 return 1;
359 }
360
361 /*
362 * Deflate: free previously allocated memory.
363 */
364 static int
365 deflate(struct viomb_softc *sc)
366 {
367 struct virtio_softc *vsc = sc->sc_virtio;
368 int i, slot;
369 uint64_t nvpages, nhpages;
370 struct balloon_req *b;
371 struct vm_page *p;
372 struct virtqueue *vq = &sc->sc_vq[1];
373
374 nvpages = (sc->sc_actual + sc->sc_inflight) - sc->sc_npages;
375 if (nvpages > PGS_PER_REQ)
376 nvpages = PGS_PER_REQ;
377 nhpages = nvpages * VIRTIO_PAGE_SIZE / PAGE_SIZE;
378
379 b = &sc->sc_req;
380
381 b->bl_nentries = nvpages;
382 TAILQ_INIT(&b->bl_pglist);
383 for (i = 0; i < nhpages; i++) {
384 p = TAILQ_FIRST(&sc->sc_balloon_pages);
385 TAILQ_REMOVE(&sc->sc_balloon_pages, p, pageq.queue);
386 TAILQ_INSERT_TAIL(&b->bl_pglist, p, pageq.queue);
387 b->bl_pages[i] = p->phys_addr / VIRTIO_PAGE_SIZE;
388 }
389
390 if (virtio_enqueue_prep(vsc, vq, &slot) != 0) {
391 printf("%s: deflate enqueue failed.\n",
392 device_xname(sc->sc_dev));
393 TAILQ_FOREACH_REVERSE(p, &b->bl_pglist, pglist, pageq.queue) {
394 TAILQ_REMOVE(&b->bl_pglist, p, pageq.queue);
395 TAILQ_INSERT_HEAD(&sc->sc_balloon_pages, p, pageq.queue);
396 }
397 return 0;
398 }
399 if (virtio_enqueue_reserve(vsc, vq, slot, 1) != 0) {
400 printf("%s: deflate enqueue failed.\n",
401 device_xname(sc->sc_dev));
402 TAILQ_FOREACH_REVERSE(p, &b->bl_pglist, pglist, pageq.queue) {
403 TAILQ_REMOVE(&b->bl_pglist, p, pageq.queue);
404 TAILQ_INSERT_HEAD(&sc->sc_balloon_pages, p, pageq.queue);
405 }
406 return 0;
407 }
408 bus_dmamap_sync(vsc->sc_dmat, b->bl_dmamap, 0, sizeof(uint32_t)*nvpages,
409 BUS_DMASYNC_PREWRITE);
410 virtio_enqueue(vsc, vq, slot, b->bl_dmamap, true);
411 virtio_enqueue_commit(vsc, vq, slot, true);
412 sc->sc_inflight -= nvpages;
413
414 if (!(vsc->sc_features & VIRTIO_BALLOON_F_MUST_TELL_HOST))
415 uvm_pglistfree(&b->bl_pglist);
416
417 return 0;
418 }
419
420 static int
421 deflateq_done(struct virtqueue *vq)
422 {
423 struct virtio_softc *vsc = vq->vq_owner;
424 struct viomb_softc *sc = device_private(vsc->sc_child);
425
426 mutex_enter(&sc->sc_waitlock);
427 sc->sc_deflate_done = 1;
428 cv_signal(&sc->sc_wait);
429 mutex_exit(&sc->sc_waitlock);
430
431 return 1;
432 }
433
434 static int
435 deflate_done(struct viomb_softc *sc)
436 {
437 struct virtio_softc *vsc = sc->sc_virtio;
438 struct virtqueue *vq = &sc->sc_vq[1];
439 struct balloon_req *b;
440 int r, slot;
441 uint64_t nvpages, nhpages;
442
443 r = virtio_dequeue(vsc, vq, &slot, NULL);
444 if (r != 0) {
445 printf("%s: deflate dequeue failed, errno %d\n",
446 device_xname(sc->sc_dev), r);
447 return 1;
448 }
449 virtio_dequeue_commit(vsc, vq, slot);
450
451 b = &sc->sc_req;
452 nvpages = b->bl_nentries;
453 nhpages = nvpages * VIRTIO_PAGE_SIZE / PAGE_SIZE;
454 bus_dmamap_sync(vsc->sc_dmat, b->bl_dmamap,
455 offsetof(struct balloon_req, bl_pages),
456 sizeof(uint32_t)*nvpages,
457 BUS_DMASYNC_POSTWRITE);
458
459 if (vsc->sc_features & VIRTIO_BALLOON_F_MUST_TELL_HOST)
460 uvm_pglistfree(&b->bl_pglist);
461
462 sc->sc_inflight += nvpages;
463 virtio_write_device_config_4(vsc,
464 VIRTIO_BALLOON_CONFIG_ACTUAL,
465 sc->sc_actual - nvpages);
466 viomb_read_config(sc);
467
468 return 1;
469 }
470
471 /*
472 * Kthread: sleeps, eventually inflate and deflate.
473 */
474 static void
475 viomb_thread(void *arg)
476 {
477 struct viomb_softc *sc = arg;
478 int sleeptime, r;
479
480 for ( ; ; ) {
481 sleeptime = 30000;
482 if (sc->sc_npages > sc->sc_actual + sc->sc_inflight) {
483 if (sc->sc_inflight == 0) {
484 r = inflate(sc);
485 if (r != 0)
486 sleeptime = 10000;
487 else
488 sleeptime = 1000;
489 } else
490 sleeptime = 100;
491 } else if (sc->sc_npages < sc->sc_actual + sc->sc_inflight) {
492 if (sc->sc_inflight == 0)
493 r = deflate(sc);
494 sleeptime = 100;
495 }
496
497 again:
498 mutex_enter(&sc->sc_waitlock);
499 if (sc->sc_inflate_done) {
500 sc->sc_inflate_done = 0;
501 mutex_exit(&sc->sc_waitlock);
502 inflate_done(sc);
503 goto again;
504 }
505 if (sc->sc_deflate_done) {
506 sc->sc_deflate_done = 0;
507 mutex_exit(&sc->sc_waitlock);
508 deflate_done(sc);
509 goto again;
510 }
511 cv_timedwait(&sc->sc_wait, &sc->sc_waitlock,
512 mstohz(sleeptime));
513 mutex_exit(&sc->sc_waitlock);
514 }
515 }
516