device-mapper.c revision 1.1.2.10 1 /* $NetBSD: device-mapper.c,v 1.1.2.10 2008/09/03 22:50:17 haad Exp $ */
2
3 /*
4 * Copyright (c) 1996, 1997, 1998, 1999, 2002 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Adam Hamsik.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * I want to say thank you to all people who helped me with this project.
34 */
35
36 #include <sys/types.h>
37 #include <sys/param.h>
38
39 #include <sys/atomic.h>
40 #include <sys/buf.h>
41 #include <sys/conf.h>
42 #include <sys/dkio.h>
43 #include <sys/disk.h>
44 #include <sys/disklabel.h>
45 #include <sys/ioctl.h>
46 #include <sys/ioccom.h>
47 #include <sys/kmem.h>
48 #include <sys/module.h>
49 #include <sys/mutex.h>
50
51
52 #include "netbsd-dm.h"
53 #include "dm.h"
54
55 static dev_type_open(dmopen);
56 static dev_type_close(dmclose);
57 static dev_type_read(dmread);
58 static dev_type_write(dmwrite);
59 static dev_type_ioctl(dmioctl);
60 static dev_type_strategy(dmstrategy);
61 static dev_type_dump(dmdump);
62 static dev_type_size(dmsize);
63
64 /* attach and detach routines */
65 int dmattach(void);
66 int dmdestroy(void);
67
68 static int dm_cmd_to_fun(prop_dictionary_t);
69 static int disk_ioctl_switch(dev_t, u_long, void *);
70 static int dm_ioctl_switch(u_long);
71 static void dmminphys(struct buf *);
72 static void dmgetdisklabel(struct dm_dev *, dev_t);
73 /* Called to initialize disklabel values for readdisklabel. */
74 static void dmgetdefaultdisklabel(struct dm_dev *, dev_t);
75
76 /* ***Variable-definitions*** */
77 const struct bdevsw dm_bdevsw = {
78 dmopen, dmclose, dmstrategy, dmioctl, dmdump, dmsize,
79 D_DISK
80 };
81
82 const struct cdevsw dm_cdevsw = {
83 dmopen, dmclose, dmread, dmwrite, dmioctl,
84 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
85 };
86
87 /* Info about all devices */
88 struct dm_softc *dm_sc;
89
90 kmutex_t dm_ioctl_mtx;
91
92 /*
93 * This array is used to translate cmd to function pointer.
94 *
95 * Interface between libdevmapper and lvm2tools uses different
96 * names for one IOCTL call because libdevmapper do another thing
97 * then. When I run "info" or "mknodes" libdevmapper will send same
98 * ioctl to kernel but will do another things in userspace.
99 *
100 */
101 struct cmd_function cmd_fn[] = {
102 {"version", dm_get_version_ioctl},
103 {"targets", dm_list_versions_ioctl},
104 {"create", dm_dev_create_ioctl},
105 {"info", dm_dev_status_ioctl},
106 {"mknodes", dm_dev_status_ioctl},
107 {"names", dm_dev_list_ioctl},
108 {"suspend", dm_dev_suspend_ioctl},
109 {"remove", dm_dev_remove_ioctl},
110 {"rename", dm_dev_rename_ioctl},
111 {"resume", dm_dev_resume_ioctl},
112 {"clear", dm_table_clear_ioctl},
113 {"deps", dm_table_deps_ioctl},
114 {"reload", dm_table_load_ioctl},
115 {"status", dm_table_status_ioctl},
116 {"table", dm_table_status_ioctl},
117 {NULL, NULL}
118 };
119
120
121 MODULE(MODULE_CLASS_MISC, dm, NULL);
122
123 /* New module handle routine */
124 static int
125 dm_modcmd(modcmd_t cmd, void *arg)
126 {
127 #ifdef _MODULE
128 int bmajor = -1, cmajor = -1;
129
130 printf(" Modcmd called \n");
131
132 switch (cmd) {
133 case MODULE_CMD_INIT:
134 dmattach();
135 return devsw_attach("dm", &dm_bdevsw, &bmajor,
136 &dm_cdevsw, &cmajor);
137 break;
138
139 case MODULE_CMD_FINI:
140 dmdestroy();
141 return devsw_detach(&dm_bdevsw, &dm_cdevsw);
142 break;
143
144 case MODULE_CMD_STAT:
145 printf("DM module stat called\n");
146 return ENOTTY;
147
148 default:
149 return ENOTTY;
150 }
151
152 return 0;
153 #else
154
155 if (cmd == MODULE_CMD_INIT)
156 return 0;
157 return ENOTTY;
158
159 #endif /* _MODULE */
160 }
161
162
163 /* attach routine */
164 int
165 dmattach(void)
166 {
167
168 printf("AttACh called\n");
169
170 dm_sc = (struct dm_softc *)kmem_alloc(sizeof(struct dm_softc), KM_NOSLEEP);
171
172 if (dm_sc == NULL){
173 aprint_error("Not enough memory for dm device.\n");
174 return(ENOMEM);
175 }
176
177 dm_sc->sc_minor_num = 0;
178 dm_sc->sc_ref_count = 0;
179
180 dm_target_init();
181
182 dm_dev_init();
183
184 dm_pdev_init();
185
186 mutex_init(&dm_ioctl_mtx, MUTEX_DEFAULT, IPL_NONE);
187
188 return 0;
189 }
190
191 /* Destroy routine */
192 int
193 dmdestroy(void)
194 {
195
196 printf("destroy called\n");
197 (void)kmem_free(dm_sc, sizeof(struct dm_softc));
198
199 dm_dev_destroy();
200
201 dm_pdev_destroy();
202
203 dm_target_destroy();
204
205 mutex_destroy(&dm_ioctl_mtx);
206
207 return 0;
208 }
209
210 static int
211 dmopen(dev_t dev, int flags, int mode, struct lwp *l)
212 {
213
214 struct dm_dev *dmv;
215
216 aprint_verbose("open routine called %d\n", minor(dev));
217
218 if ((dmv = dm_dev_lookup_minor(minor(dev))) != NULL) {
219 if (dmv->dm_dklabel == NULL)
220 dmgetdisklabel(dmv, dev);
221
222 dmv->ref_cnt++;
223 }
224
225 return 0;
226 }
227
228
229 static int
230 dmclose(dev_t dev, int flags, int mode, struct lwp *l)
231 {
232 struct dm_dev *dmv;
233
234 if ((dmv = dm_dev_lookup_minor(minor(dev))) != NULL)
235 dmv->ref_cnt--;
236
237 aprint_verbose("CLOSE routine called\n");
238
239 return 0;
240 }
241
242 /*
243 * Called after ioctl call on mapper/control or dm device.
244 * Locking: Use dm_ioctl_mtx to synchronise access to dm driver.
245 * Only one ioctl can be in dm driver in time. Ioctl's are not
246 * run many times and they performance is not critical, therefore
247 * I can do it this way.
248 */
249 static int
250 dmioctl(dev_t dev, const u_long cmd, void *data, int flag, struct lwp *l)
251 {
252 int r;
253 prop_dictionary_t dm_dict_in;
254
255 r = 0;
256
257 if (data == NULL)
258 return(EINVAL);
259
260 mutex_enter(&dm_ioctl_mtx);
261
262 if (disk_ioctl_switch(dev, cmd, data) != 0) {
263 struct plistref *pref = (struct plistref *) data;
264
265 r = prop_dictionary_copyin_ioctl(pref, cmd, &dm_dict_in);
266 if (r)
267 goto out;
268
269 dm_check_version(dm_dict_in);
270
271 /* call cmd selected function */
272 r = dm_ioctl_switch(cmd);
273 if (r < 0)
274 goto out;
275
276 char *xml;
277 xml = prop_dictionary_externalize(dm_dict_in);
278 aprint_verbose("%s\n",xml);
279
280 r = dm_cmd_to_fun(dm_dict_in);
281 if (r != 0)
282 goto out;
283
284 r = prop_dictionary_copyout_ioctl(pref, cmd, dm_dict_in);
285 }
286 out:
287 mutex_exit(&dm_ioctl_mtx);
288 return r;
289 }
290
291 /*
292 * Translate command sent from libdevmapper to func.
293 */
294 static int
295 dm_cmd_to_fun(prop_dictionary_t dm_dict){
296 int i,len,slen;
297 int r;
298 const char *command;
299
300 r = 0;
301
302 (void)prop_dictionary_get_cstring_nocopy(dm_dict, DM_IOCTL_COMMAND,
303 &command);
304
305 len = strlen(command);
306
307 for(i=0;cmd_fn[i].cmd != NULL;i++){
308 slen = strlen(cmd_fn[i].cmd);
309
310 if (len != slen)
311 continue;
312
313 if ((strncmp(command, cmd_fn[i].cmd, slen)) == 0) {
314 aprint_verbose("ioctl command: %s\n", command);
315 r = cmd_fn[i].fn(dm_dict);
316 break;
317 }
318 }
319
320 return r;
321 }
322
323 /* Call apropriate ioctl handler function. */
324 static int
325 dm_ioctl_switch(u_long cmd)
326 {
327 int r;
328
329 r = 0;
330
331 switch(cmd) {
332
333 case NETBSD_DM_IOCTL:
334 aprint_verbose("NetBSD_DM_IOCTL called\n");
335 break;
336
337 default:
338 aprint_verbose("unknown ioctl called\n");
339 return EPASSTHROUGH;
340 break; /* NOT REACHED */
341 }
342
343 return r;
344 }
345
346 /*
347 * Check for disk specific ioctls.
348 */
349
350 static int
351 disk_ioctl_switch(dev_t dev, u_long cmd, void *data)
352 {
353 struct dm_dev *dmv;
354
355 if ((dmv = dm_dev_lookup_minor(minor(dev))) == NULL)
356 return 1;
357
358 switch(cmd) {
359
360 case DIOCGWEDGEINFO:
361 {
362 struct dkwedge_info *dkw = (void *) data;
363
364 aprint_verbose("DIOCGWEDGEINFO ioctl called\n");
365
366 strlcpy(dkw->dkw_devname, dmv->name, 16);
367 strlcpy(dkw->dkw_wname, dmv->name, DM_NAME_LEN);
368 strlcpy(dkw->dkw_parent, dmv->name, 16);
369
370 dkw->dkw_offset = 0;
371 dkw->dkw_size = dmsize(dev);
372 strcpy(dkw->dkw_ptype, DKW_PTYPE_FFS);
373
374 break;
375 }
376
377 case DIOCGDINFO:
378 *(struct disklabel *)data = *(dmv->dm_dklabel);
379 break;
380
381 case DIOCGPART:
382 case DIOCWDINFO:
383 case DIOCSDINFO:
384 case DIOCKLABEL:
385 case DIOCWLABEL:
386 case DIOCGDEFLABEL:
387
388 default:
389 aprint_verbose("unknown disk_ioctl called\n");
390 return 1;
391 break; /* NOT REACHED */
392 }
393
394 return 0;
395 }
396
397 /*
398 * Do all IO operations on dm logical devices.
399 */
400 static void
401 dmstrategy(struct buf *bp)
402 {
403
404 struct dm_dev *dmv;
405 struct dm_table *tbl;
406
407 struct dm_table_entry *table_en;
408
409 struct buf *nestbuf;
410
411 uint32_t dev_type;
412
413 uint64_t table_start;
414 uint64_t table_end;
415
416 uint64_t buf_start;
417 uint64_t buf_len;
418
419 uint64_t start;
420 uint64_t end;
421
422 uint64_t issued_len;
423
424 buf_start = bp->b_blkno * DEV_BSIZE;
425 buf_len = bp->b_bcount;
426
427 tbl = NULL;
428
429 table_end = 0;
430 dev_type = 0;
431 issued_len = 0;
432
433 /* dm_dev are guarded by their own mutex */
434 if ((dmv = dm_dev_lookup_minor(minor(bp->b_dev))) == NULL) {
435 bp->b_error = EIO;
436 bp->b_resid = bp->b_bcount;
437 biodone(bp);
438 return;
439 }
440 /*
441 * Test if deleting flag is not set if it is set fail io
442 * operation. There is small window between rw_enter and
443 * dev_rem in dm_dev_remove_ioctl when new IO can be started
444 * on device (it will wait on dev_rwlock) which will be
445 * destroyed.
446 */
447 dev_type = atomic_and_32_nv(&dmv->dev_type, DM_DELETING_DEV);
448
449 if (dev_type & DM_DELETING_DEV) {
450 bp->b_error = EIO;
451 bp->b_resid = bp->b_bcount;
452 biodone(bp);
453 return;
454 }
455
456 /* Read lock per device rwlock so device can't be changed. */
457 rw_enter(&dmv->dev_rwlock, RW_READER);
458
459 /* Select active table */
460 tbl = &dmv->tables[dmv->cur_active_table];
461
462 /* Nested buffers count down to zero therefore I have
463 to set bp->b_resid to maximal value. */
464 bp->b_resid = bp->b_bcount;
465
466 /*
467 * Find out what tables I want to select.
468 */
469 SLIST_FOREACH(table_en, tbl, next)
470 {
471
472 /* I need need number of bytes not blocks. */
473 table_start = table_en->start * DEV_BSIZE;
474 /*
475 * I have to sub 1 from table_en->length to prevent
476 * off by one error
477 */
478 table_end = table_start + (table_en->length)* DEV_BSIZE;
479
480 start = MAX(table_start, buf_start);
481
482 end = MIN(table_end, buf_start + buf_len);
483
484 aprint_debug("----------------------------------------\n");
485 aprint_debug("table_start %010" PRIu64", table_end %010"
486 PRIu64 "\n", table_start, table_end);
487 aprint_debug("buf_start %010" PRIu64", buf_len %010"
488 PRIu64"\n", buf_start, buf_len);
489 aprint_debug("start-buf_start %010"PRIu64", end %010"
490 PRIu64"\n", start - buf_start, end);
491 aprint_debug("end-start %010" PRIu64 "\n", end - start);
492 aprint_debug("\n----------------------------------------\n");
493
494 if (start < end) {
495 /* create nested buffer */
496 nestbuf = getiobuf(NULL, true);
497
498 nestiobuf_setup(bp, nestbuf, start - buf_start,
499 (end-start));
500
501 issued_len += end-start;
502
503 /* I need number of blocks. */
504 nestbuf->b_blkno = (start - table_start) / DEV_BSIZE;
505
506 table_en->target->strategy(table_en, nestbuf);
507 }
508 }
509
510 if (issued_len < buf_len)
511 nestiobuf_done(bp, buf_len - issued_len, EINVAL);
512
513 rw_exit(&dmv->dev_rwlock);
514
515 return;
516 }
517
518
519 static int
520 dmread(dev_t dev, struct uio *uio, int flag)
521 {
522 return (physio(dmstrategy, NULL, dev, B_READ, dmminphys, uio));
523 }
524
525 static int
526 dmwrite(dev_t dev, struct uio *uio, int flag)
527 {
528 return (physio(dmstrategy, NULL, dev, B_WRITE, dmminphys, uio));
529 }
530
531 static int
532 dmdump(dev_t dev, daddr_t blkno, void *va, size_t size)
533 {
534 return ENODEV;
535 }
536
537 static int
538 dmsize(dev_t dev)
539 {
540 struct dm_dev *dmv;
541 struct dm_table *tbl;
542 struct dm_table_entry *table_en;
543
544 uint64_t length;
545
546 length = 0;
547
548 aprint_debug("dmsize routine called %d\n", minor(dev));
549
550 if ( (dmv = dm_dev_lookup_minor(minor(dev))) == NULL)
551 return ENODEV;
552
553 /* Select active table */
554 tbl = &dmv->tables[dmv->cur_active_table];
555
556 rw_enter(&dmv->dev_rwlock, RW_READER);
557
558 /*
559 * Find out what tables I want to select.
560 * if length => rawblkno then we should used that table.
561 */
562 SLIST_FOREACH(table_en, tbl, next)
563 length += table_en->length;
564
565 rw_exit(&dmv->dev_rwlock);
566
567 return length;
568 }
569
570 static void
571 dmminphys(struct buf *bp)
572 {
573 bp->b_bcount = MIN(bp->b_bcount, MAXPHYS);
574 }
575 /*
576 * Load the label information on the named device
577 * Actually fabricate a disklabel
578 *
579 * EVENTUALLY take information about different
580 * data tracks from the TOC and put it in the disklabel
581 */
582
583
584 static void
585 dmgetdisklabel(struct dm_dev *dmv, dev_t dev)
586 {
587 struct cpu_disklabel cpulp;
588 struct dm_pdev *dmp;
589
590 if ((dmv->dm_dklabel = kmem_zalloc(sizeof(struct disklabel), KM_NOSLEEP))
591 == NULL)
592 return;
593
594 memset(&cpulp, 0, sizeof(cpulp));
595
596 dmp = SLIST_FIRST(&dmv->pdevs);
597
598 dmgetdefaultdisklabel(dmv, dev);
599
600 return;
601 }
602
603 /*
604 * Initialize disklabel values, so we can use it for readdisklabel.
605 */
606 static void
607 dmgetdefaultdisklabel(struct dm_dev *dmv, dev_t dev)
608 {
609 struct disklabel *lp = dmv->dm_dklabel;
610 struct partition *pp;
611 int dmp_size;
612
613 dmp_size = dmsize(dev);
614
615 /*
616 * Size must be at least 2048 DEV_BSIZE blocks
617 * (1M) in order to use this geometry.
618 */
619
620 lp->d_secperunit = dmp_size;
621 lp->d_secsize = DEV_BSIZE;
622 lp->d_nsectors = 32;
623 lp->d_ntracks = 64;
624 lp->d_ncylinders = dmp_size / (lp->d_nsectors * lp->d_ntracks);
625 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
626
627 strncpy(lp->d_typename, "lvm", sizeof(lp->d_typename));
628 lp->d_type = DTYPE_DM;
629 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
630 lp->d_rpm = 3600;
631 lp->d_interleave = 1;
632 lp->d_flags = 0;
633
634 pp = &lp->d_partitions[RAW_PART];
635 /*
636 * This is logical offset and therefore it can be 0
637 * I will consider table offsets later in dmstrategy.
638 */
639 pp->p_offset = 0;
640 pp->p_size = lp->d_secperunit;
641 pp->p_fstype = FS_BSDFFS; /* default value */
642 lp->d_npartitions = RAW_PART + 1;
643
644 lp->d_magic = DISKMAGIC;
645 lp->d_magic2 = DISKMAGIC;
646 lp->d_checksum = dkcksum(lp);
647 }
648