locking.txt revision 1.4
11.1Shaad
21.1Shaad				Device-mapper Locking architecture
31.1Shaad
41.1ShaadOverview
51.1Shaad
61.1ShaadThere are 2 users in device-mapper driver 
71.1Shaad      a) Users who uses disk drives 
81.1Shaad      b) Users who uses ioctl management interface
91.1Shaad
101.1ShaadManagement is done by dm_dev_*_ioctl and dm_table_*_ioctl routines. There are 
111.1Shaadtwo major structures used in these routines/device-mapper. 
121.1Shaad
131.1ShaadTable entry:
141.1Shaad
151.1Shaadtypedef struct dm_table_entry {
161.1Shaad        struct dm_dev *dm_dev;          /* backlink */
171.1Shaad        uint64_t start;
181.1Shaad        uint64_t length;
191.1Shaad
201.1Shaad        struct dm_target *target;      /* Link to table target. */
211.1Shaad        void *target_config;           /* Target specific data. */
221.1Shaad        SLIST_ENTRY(dm_table_entry) next;
231.1Shaad} dm_table_entry_t;
241.1Shaad
251.1ShaadThis structure stores every target part of dm device. Every device can have
261.1Shaadmore than one target mapping entries stored in a list. This structure describe
271.1Shaadmapping between logical/physical blocks in dm device. 
281.1Shaad
291.1Shaadstart  length target block device offset
301.1Shaad0 	   102400 linear /dev/wd1a     384
311.1Shaad102400 204800 linear /dev/wd2a     384
321.1Shaad204800 409600 linear /dev/wd3a     384
331.1Shaad
341.1ShaadEvery device has at least two tables ACTIVE and INACTIVE. Only ACTIVE table is 
351.1Shaadused during IO. Every IO operation on dm device have to walk through dm_table_entries list. 
361.1Shaad
371.1ShaadDevice entry:
381.1Shaad
391.1Shaadtypedef struct dm_dev {
401.1Shaad        char name[DM_NAME_LEN];
411.1Shaad        char uuid[DM_UUID_LEN];
421.1Shaad
431.1Shaad        int minor;
441.1Shaad        uint32_t flags; /* store communication protocol flags */
451.1Shaad
461.4Sandvar        kmutex_t dev_mtx; /* mutex for general device lock */
471.1Shaad        kcondvar_t dev_cv; /* cv for ioctl synchronisation */
481.1Shaad
491.1Shaad        uint32_t event_nr;
501.1Shaad        uint32_t ref_cnt;
511.1Shaad
521.1Shaad        dm_table_head_t table_head;
531.1Shaad
541.1Shaad        struct dm_dev_head upcalls;
551.1Shaad
561.1Shaad        struct disklabel *dk_label;    /* Disklabel for this table. */
571.1Shaad
581.1Shaad        TAILQ_ENTRY(dm_dev) next_upcall; /* LIST of mirrored, snapshoted devices. */
591.1Shaad
601.1Shaad        TAILQ_ENTRY(dm_dev) next_devlist; /* Major device list. */
611.1Shaad} dm_dev_t;
621.1Shaad
631.1ShaadEvery device created in dm device-mapper is represented with this structure. 
641.1ShaadAll devices are stored in a list. Every ioctl routine have to work with this 
651.1Shaadstructure.
661.1Shaad
671.1Shaad	Locking in dm driver
681.1Shaad	
691.1ShaadLocking must be done in two ways. Synchronisation between ioctl routines and 
701.1Shaadbetween IO operations and ioctl. Table entries are read during IO and during some ioctl routines. There are only few routines which manipulates table lists.
711.1Shaad
721.1ShaadRead access to table list:
731.1Shaad
741.1Shaaddmsize 
751.1Shaaddmstrategy
761.1Shaaddm_dev_status_ioctl
771.1Shaaddm_table_info_ioctl
781.1Shaaddm_table_deps_ioctl
791.1Shaaddm_disk_ioctl 		-> DIOCCACHESYNC ioctl 
801.1Shaad
811.1ShaadWrite access to table list:
821.1Shaaddm_dev_remove_ioctl        -> remove device from list, this routine have to 		 
831.1Shaad							  remove all tables.
841.1Shaaddm_dev_resume_ioctl		   -> Switch tables on suspended device, switch INACTIVE 
851.1Shaad							  and ACTIVE tables.
861.1Shaaddm_table_clear_ioctl  	   -> Remove INACTIVE table from table list.
871.1Shaad
881.1Shaad
891.1ShaadSynchronisation between readers and writers in table list
901.1Shaad
911.1ShaadI moved everything needed for table synchronisation to struct dm_table_head.
921.1Shaad
931.1Shaadtypedef struct dm_table_head {
941.1Shaad        /* Current active table is selected with this. */
951.1Shaad        int cur_active_table;
961.1Shaad        struct dm_table tables[2];
971.1Shaad
981.1Shaad        kmutex_t   table_mtx;
991.1Shaad        kcondvar_t table_cv; /*IO waiting cv */
1001.1Shaad
1011.1Shaad        uint32_t io_cnt;
1021.1Shaad} dm_table_head_t;
1031.1Shaad
1041.1Shaaddm_table_head_t is used as entry for every dm_table synchronisation routine.
1051.1Shaad
1061.1ShaadBecause every table user have to get list to table list head I have implemented
1071.1Shaadthese routines to manage access to table lists. 
1081.1Shaad
1091.4Sandvar/*
1101.4Sandvar * Destroy all table data. This function can run when there are no
1111.4Sandvar * readers on table lists.
1121.1Shaad */
1131.1Shaadint dm_table_destroy(dm_table_head_t *, uint8_t);
1141.1Shaad
1151.4Sandvar/*
1161.4Sandvar * Return length of active table in device.
1171.1Shaad */
1181.1Shaaduint64_t dm_table_size(dm_table_head_t *);
1191.1Shaad
1201.4Sandvar/*
1211.4Sandvar * Return current active table to caller, increment io_cnt reference counter.
1221.1Shaad */
1231.3Stkusumistruct dm_table *dm_table_get_entry(dm_table_head_t *, uint8_t);
1241.1Shaad
1251.4Sandvar/*
1261.4Sandvar * Return > 0 if table is at least one table entry (returns number of entries)
1271.4Sandvar * and return 0 if there is not. Target count returned from this function
1281.4Sandvar * doesn't need to be true when userspace user receives it (after return
1291.4Sandvar * there can be dm_dev_resume_ioctl), therefore this is only informative.
1301.1Shaad */
1311.1Shaadint dm_table_get_target_count(dm_table_head_t *, uint8_t);
1321.1Shaad
1331.4Sandvar/*
1341.4Sandvar * Decrement io reference counter and wake up all callers, with table_head cv.
1351.1Shaad */
1361.1Shaadvoid dm_table_release(dm_table_head_t *, uint8_t s);
1371.1Shaad
1381.4Sandvar/*
1391.4Sandvar * Switch table from inactive to active mode. Have to wait until io_cnt is 0.
1401.1Shaad */
1411.1Shaadvoid dm_table_switch_tables(dm_table_head_t *);
1421.1Shaad
1431.4Sandvar/*
1441.4Sandvar * Initialize table_head structures, I'm trying to keep this structure as
1451.4Sandvar * opaque as possible.
1461.1Shaad */
1471.1Shaadvoid dm_table_head_init(dm_table_head_t *);
1481.1Shaad
1491.4Sandvar/*
1501.4Sandvar * Destroy all variables in table_head
1511.1Shaad */
1521.1Shaadvoid dm_table_head_destroy(dm_table_head_t *);
1531.1Shaad
1541.1ShaadInternal table synchronisation protocol
1551.1Shaad
1561.1ShaadReaders:
1571.1Shaaddm_table_size
1581.1Shaaddm_table_get_target_count
1591.1Shaaddm_table_get_target_count
1601.1Shaad
1611.1ShaadReaders with hold reference counter:
1621.1Shaaddm_table_get_entry
1631.1Shaaddm_table_release
1641.1Shaad
1651.1ShaadWriter:
1661.1Shaaddm_table_destroy
1671.1Shaaddm_table_switch_tables
1681.1Shaad
1691.1ShaadFor managing synchronisation to table lists I use these routines. Every reader 
1701.1Shaaduses dm_table_busy routine to hold reference counter during work and dm_table_unbusy for reference counter release. Every writer have to wait while 
1711.1Shaadis reference counter 0 and only then it can work with device. It will sleep on 
1721.1Shaadhead->table_cv while there are other readers. dm_table_get_entry is specific in that it will return table with hold reference counter. After dm_table_get_entry 
1731.1Shaadevery caller must call dm_table_release when it doesn't want to work with it. 
1741.1Shaad
1751.4Sandvar/*
1761.4Sandvar * Function to increment table user reference counter. Return id
1771.4Sandvar * of table_id table.
1781.4Sandvar * DM_TABLE_ACTIVE will return active table id.
1791.4Sandvar * DM_TABLE_INACTIVE will return inactive table id.
1801.1Shaad */
1811.1Shaadstatic int
1821.1Shaaddm_table_busy(dm_table_head_t *head, uint8_t table_id)
1831.1Shaad{
1841.1Shaad        uint8_t id;
1851.1Shaad
1861.1Shaad        id = 0;
1871.1Shaad
1881.1Shaad        mutex_enter(&head->table_mtx);
1891.1Shaad
1901.1Shaad        if (table_id == DM_TABLE_ACTIVE)
1911.1Shaad                id = head->cur_active_table;
1921.1Shaad        else
1931.1Shaad                id = 1 - head->cur_active_table;
1941.1Shaad
1951.1Shaad        head->io_cnt++;
1961.1Shaad
1971.1Shaad        mutex_exit(&head->table_mtx);
1981.1Shaad        return id;
1991.1Shaad}
2001.1Shaad
2011.4Sandvar/*
2021.4Sandvar * Function release table lock and eventually wakeup all waiters.
2031.1Shaad */
2041.1Shaadstatic void
2051.1Shaaddm_table_unbusy(dm_table_head_t *head)
2061.1Shaad{
2071.1Shaad        KASSERT(head->io_cnt != 0);
2081.1Shaad
2091.1Shaad        mutex_enter(&head->table_mtx);
2101.1Shaad
2111.1Shaad        if (--head->io_cnt == 0)
2121.1Shaad                cv_broadcast(&head->table_cv);
2131.1Shaad
2141.1Shaad        mutex_exit(&head->table_mtx);
2151.1Shaad}
2161.1Shaad
2171.1ShaadDevice-mapper betwwen ioctl device synchronisation 
2181.1Shaad
2191.1Shaad
2201.1ShaadEvery ioctl user have to find dm_device with name, uuid, minor number. 
2211.1ShaadFor this dm_dev_lookup is used. This routine returns device with hold reference 
2221.1Shaadcounter. 
2231.1Shaad
2241.1Shaadvoid
2251.1Shaaddm_dev_busy(dm_dev_t *dmv)
2261.1Shaad{
2271.1Shaad        mutex_enter(&dmv->dev_mtx);
2281.1Shaad        dmv->ref_cnt++;
2291.1Shaad        mutex_exit(&dmv->dev_mtx);
2301.1Shaad}
2311.1Shaad
2321.1Shaadvoid
2331.1Shaaddm_dev_unbusy(dm_dev_t *dmv)
2341.1Shaad{
2351.1Shaad        KASSERT(dmv->ref_cnt != 0);
2361.1Shaad
2371.1Shaad        mutex_enter(&dmv->dev_mtx);
2381.1Shaad        if (--dmv->ref_cnt == 0)
2391.1Shaad                cv_broadcast(&dmv->dev_cv);
2401.1Shaad        mutex_exit(&dmv->dev_mtx);
2411.1Shaad}
2421.1Shaad
2431.1ShaadBefore returning from ioctl routine must release reference counter with 
2441.1Shaaddm_dev_unbusy.
2451.1Shaad
2461.1Shaaddm_dev_remove_ioctl routine have to remove dm_dev from global device list,
2471.1Shaadand wait until all ioctl users from dm_dev are gone. 
2481.1Shaad
249