X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?p=pandora-kernel.git;a=blobdiff_plain;f=drivers%2Fmd%2Fdm-snap.c;h=0821a2b68a73a98719cb931c82196a74bfe471d4;hp=8eea0ddbf5ec7dad5d4b012bd251dfeca83cd286;hb=88032b322a38b37335c8cb2e3473a45c81d280eb;hpb=d7fa2589bbe7ab53fd5eb20e8c7e388d5aff6f16 diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 8eea0ddbf5ec..0821a2b68a73 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -7,7 +7,6 @@ */ #include -#include #include #include #include @@ -40,6 +39,9 @@ */ #define SNAPSHOT_PAGES 256 +static struct workqueue_struct *ksnapd; +static void flush_queued_bios(struct work_struct *work); + struct pending_exception { struct exception e; @@ -57,7 +59,7 @@ struct pending_exception { /* * The primary pending_exception is the one that holds - * the sibling_count and the list of origin_bios for a + * the ref_count and the list of origin_bios for a * group of pending_exceptions. It is always last to get freed. * These fields get set up when writing to the origin. */ @@ -70,7 +72,7 @@ struct pending_exception { * the sibling concerned and not pe->primary_pe->snap->lock unless * they are the same. */ - atomic_t sibling_count; + atomic_t ref_count; /* Pointer back to snapshot context */ struct dm_snapshot *snap; @@ -86,8 +88,8 @@ struct pending_exception { * Hash table mapping origin volumes to lists of snapshots and * a lock to protect it */ -static kmem_cache_t *exception_cache; -static kmem_cache_t *pending_cache; +static struct kmem_cache *exception_cache; +static struct kmem_cache *pending_cache; static mempool_t *pending_pool; /* @@ -226,7 +228,7 @@ static int init_exception_table(struct exception_table *et, uint32_t size) return 0; } -static void exit_exception_table(struct exception_table *et, kmem_cache_t *mem) +static void exit_exception_table(struct exception_table *et, struct kmem_cache *mem) { struct list_head *slot; struct exception *ex, *next; @@ -388,15 +390,46 @@ static inline ulong round_up(ulong n, ulong size) return (n + size) & ~size; } -static void read_snapshot_metadata(struct dm_snapshot *s) +static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg, + char **error) { - if (s->store.read_metadata(&s->store)) { - down_write(&s->lock); - s->valid = 0; - up_write(&s->lock); + unsigned long chunk_size; + char *value; + + chunk_size = simple_strtoul(chunk_size_arg, &value, 10); + if (*chunk_size_arg == '\0' || *value != '\0') { + *error = "Invalid chunk size"; + return -EINVAL; + } - dm_table_event(s->table); + if (!chunk_size) { + s->chunk_size = s->chunk_mask = s->chunk_shift = 0; + return 0; } + + /* + * Chunk size must be multiple of page size. Silently + * round up if it's not. + */ + chunk_size = round_up(chunk_size, PAGE_SIZE >> 9); + + /* Check chunk_size is a power of 2 */ + if (chunk_size & (chunk_size - 1)) { + *error = "Chunk size is not a power of 2"; + return -EINVAL; + } + + /* Validate the chunk size against the device block size */ + if (chunk_size % (bdev_hardsect_size(s->cow->bdev) >> 9)) { + *error = "Chunk size is not a multiple of device blocksize"; + return -EINVAL; + } + + s->chunk_size = chunk_size; + s->chunk_mask = chunk_size - 1; + s->chunk_shift = ffs(chunk_size) - 1; + + return 0; } /* @@ -405,15 +438,12 @@ static void read_snapshot_metadata(struct dm_snapshot *s) static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct dm_snapshot *s; - unsigned long chunk_size; int r = -EINVAL; char persistent; char *origin_path; char *cow_path; - char *value; - int blocksize; - if (argc < 4) { + if (argc != 4) { ti->error = "requires exactly 4 arguments"; r = -EINVAL; goto bad1; @@ -429,13 +459,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad1; } - chunk_size = simple_strtoul(argv[3], &value, 10); - if (chunk_size == 0 || value == NULL) { - ti->error = "Invalid chunk size"; - r = -EINVAL; - goto bad1; - } - s = kmalloc(sizeof(*s), GFP_KERNEL); if (s == NULL) { ti->error = "Cannot allocate snapshot context private " @@ -458,36 +481,17 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad2; } - /* - * Chunk size must be multiple of page size. Silently - * round up if it's not. - */ - chunk_size = round_up(chunk_size, PAGE_SIZE >> 9); - - /* Validate the chunk size against the device block size */ - blocksize = s->cow->bdev->bd_disk->queue->hardsect_size; - if (chunk_size % (blocksize >> 9)) { - ti->error = "Chunk size is not a multiple of device blocksize"; - r = -EINVAL; - goto bad3; - } - - /* Check chunk_size is a power of 2 */ - if (chunk_size & (chunk_size - 1)) { - ti->error = "Chunk size is not a power of 2"; - r = -EINVAL; + r = set_chunk_size(s, argv[3], &ti->error); + if (r) goto bad3; - } - s->chunk_size = chunk_size; - s->chunk_mask = chunk_size - 1; s->type = persistent; - s->chunk_shift = ffs(chunk_size) - 1; s->valid = 1; s->active = 0; s->last_percent = 0; init_rwsem(&s->lock); + spin_lock_init(&s->pe_lock); s->table = ti->table; /* Allocate hash table for COW data */ @@ -497,16 +501,12 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad3; } - /* - * Check the persistent flag - done here because we need the iobuf - * to check the LV header - */ s->store.snap = s; if (persistent == 'P') - r = dm_create_persistent(&s->store, chunk_size); + r = dm_create_persistent(&s->store); else - r = dm_create_transient(&s->store, s, blocksize); + r = dm_create_transient(&s->store); if (r) { ti->error = "Couldn't create exception store"; @@ -521,7 +521,14 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) } /* Metadata must only be loaded into one table at once */ - read_snapshot_metadata(s); + r = s->store.read_metadata(&s->store); + if (r) { + ti->error = "Failed to read snapshot metadata"; + goto bad6; + } + + bio_list_init(&s->queued_bios); + INIT_WORK(&s->queued_bios_work, flush_queued_bios); /* Add snapshot to the list of snapshots for this origin */ /* Exceptions aren't triggered till snapshot_resume() is called */ @@ -557,21 +564,28 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) return r; } +static void __free_exceptions(struct dm_snapshot *s) +{ + kcopyd_client_destroy(s->kcopyd_client); + s->kcopyd_client = NULL; + + exit_exception_table(&s->pending, pending_cache); + exit_exception_table(&s->complete, exception_cache); + + s->store.destroy(&s->store); +} + static void snapshot_dtr(struct dm_target *ti) { struct dm_snapshot *s = (struct dm_snapshot *) ti->private; + flush_workqueue(ksnapd); + /* Prevent further origin writes from using this snapshot. */ /* After this returns there can be no new kcopyd jobs. */ unregister_snapshot(s); - kcopyd_client_destroy(s->kcopyd_client); - - exit_exception_table(&s->pending, pending_cache); - exit_exception_table(&s->complete, exception_cache); - - /* Deallocate memory used */ - s->store.destroy(&s->store); + __free_exceptions(s); dm_put_device(ti, s->origin); dm_put_device(ti, s->cow); @@ -594,6 +608,20 @@ static void flush_bios(struct bio *bio) } } +static void flush_queued_bios(struct work_struct *work) +{ + struct dm_snapshot *s = + container_of(work, struct dm_snapshot, queued_bios_work); + struct bio *queued_bios; + unsigned long flags; + + spin_lock_irqsave(&s->pe_lock, flags); + queued_bios = bio_list_get(&s->queued_bios); + spin_unlock_irqrestore(&s->pe_lock, flags); + + flush_bios(queued_bios); +} + /* * Error a list of buffers. */ @@ -609,28 +637,7 @@ static void error_bios(struct bio *bio) } } -static inline void error_snapshot_bios(struct pending_exception *pe) -{ - error_bios(bio_list_get(&pe->snapshot_bios)); -} - -static struct bio *__flush_bios(struct pending_exception *pe) -{ - /* - * If this pe is involved in a write to the origin and - * it is the last sibling to complete then release - * the bios for the original write to the origin. - */ - - if (pe->primary_pe && - atomic_dec_and_test(&pe->primary_pe->sibling_count)) - return bio_list_get(&pe->primary_pe->origin_bios); - - return NULL; -} - -static void __invalidate_snapshot(struct dm_snapshot *s, - struct pending_exception *pe, int err) +static void __invalidate_snapshot(struct dm_snapshot *s, int err) { if (!s->valid) return; @@ -640,9 +647,6 @@ static void __invalidate_snapshot(struct dm_snapshot *s, else if (err == -ENOMEM) DMERR("Invalidating snapshot: Unable to allocate exception."); - if (pe) - remove_exception(&pe->e); - if (s->store.drop_snapshot) s->store.drop_snapshot(&s->store); @@ -651,78 +655,95 @@ static void __invalidate_snapshot(struct dm_snapshot *s, dm_table_event(s->table); } +static void get_pending_exception(struct pending_exception *pe) +{ + atomic_inc(&pe->ref_count); +} + +static struct bio *put_pending_exception(struct pending_exception *pe) +{ + struct pending_exception *primary_pe; + struct bio *origin_bios = NULL; + + primary_pe = pe->primary_pe; + + /* + * If this pe is involved in a write to the origin and + * it is the last sibling to complete then release + * the bios for the original write to the origin. + */ + if (primary_pe && + atomic_dec_and_test(&primary_pe->ref_count)) + origin_bios = bio_list_get(&primary_pe->origin_bios); + + /* + * Free the pe if it's not linked to an origin write or if + * it's not itself a primary pe. + */ + if (!primary_pe || primary_pe != pe) + free_pending_exception(pe); + + /* + * Free the primary pe if nothing references it. + */ + if (primary_pe && !atomic_read(&primary_pe->ref_count)) + free_pending_exception(primary_pe); + + return origin_bios; +} + static void pending_complete(struct pending_exception *pe, int success) { struct exception *e; - struct pending_exception *primary_pe; struct dm_snapshot *s = pe->snap; - struct bio *flush = NULL; + struct bio *origin_bios = NULL; + struct bio *snapshot_bios = NULL; + int error = 0; if (!success) { /* Read/write error - snapshot is unusable */ down_write(&s->lock); - __invalidate_snapshot(s, pe, -EIO); - flush = __flush_bios(pe); - up_write(&s->lock); - - error_snapshot_bios(pe); + __invalidate_snapshot(s, -EIO); + error = 1; goto out; } e = alloc_exception(); if (!e) { down_write(&s->lock); - __invalidate_snapshot(s, pe, -ENOMEM); - flush = __flush_bios(pe); - up_write(&s->lock); - - error_snapshot_bios(pe); + __invalidate_snapshot(s, -ENOMEM); + error = 1; goto out; } *e = pe->e; - /* - * Add a proper exception, and remove the - * in-flight exception from the list. - */ down_write(&s->lock); if (!s->valid) { - flush = __flush_bios(pe); - up_write(&s->lock); - free_exception(e); - - error_snapshot_bios(pe); + error = 1; goto out; } + /* + * Add a proper exception, and remove the + * in-flight exception from the list. + */ insert_exception(&s->complete, e); + + out: remove_exception(&pe->e); - flush = __flush_bios(pe); + snapshot_bios = bio_list_get(&pe->snapshot_bios); + origin_bios = put_pending_exception(pe); up_write(&s->lock); /* Submit any pending write bios */ - flush_bios(bio_list_get(&pe->snapshot_bios)); - - out: - primary_pe = pe->primary_pe; - - /* - * Free the pe if it's not linked to an origin write or if - * it's not itself a primary pe. - */ - if (!primary_pe || primary_pe != pe) - free_pending_exception(pe); - - /* - * Free the primary pe if nothing references it. - */ - if (primary_pe && !atomic_read(&primary_pe->sibling_count)) - free_pending_exception(primary_pe); + if (error) + error_bios(snapshot_bios); + else + flush_bios(snapshot_bios); - if (flush) - flush_bios(flush); + flush_bios(origin_bios); } static void commit_callback(void *context, int success) @@ -823,7 +844,7 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio) bio_list_init(&pe->origin_bios); bio_list_init(&pe->snapshot_bios); pe->primary_pe = NULL; - atomic_set(&pe->sibling_count, 1); + atomic_set(&pe->ref_count, 0); pe->snap = s; pe->started = 0; @@ -832,6 +853,7 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio) return NULL; } + get_pending_exception(pe); insert_exception(&s->pending, &pe->e); out: @@ -851,8 +873,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, { struct exception *e; struct dm_snapshot *s = (struct dm_snapshot *) ti->private; - int copy_needed = 0; - int r = 1; + int r = DM_MAPIO_REMAPPED; chunk_t chunk; struct pending_exception *pe = NULL; @@ -866,32 +887,31 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, if (unlikely(bio_barrier(bio))) return -EOPNOTSUPP; + /* FIXME: should only take write lock if we need + * to copy an exception */ + down_write(&s->lock); + + if (!s->valid) { + r = -EIO; + goto out_unlock; + } + + /* If the block is already remapped - use that, else remap it */ + e = lookup_exception(&s->complete, chunk); + if (e) { + remap_exception(s, e, bio); + goto out_unlock; + } + /* * Write to snapshot - higher level takes care of RW/RO * flags so we should only get this if we are * writeable. */ if (bio_rw(bio) == WRITE) { - - /* FIXME: should only take write lock if we need - * to copy an exception */ - down_write(&s->lock); - - if (!s->valid) { - r = -EIO; - goto out_unlock; - } - - /* If the block is already remapped - use that, else remap it */ - e = lookup_exception(&s->complete, chunk); - if (e) { - remap_exception(s, e, bio); - goto out_unlock; - } - pe = __find_pending_exception(s, bio); if (!pe) { - __invalidate_snapshot(s, pe, -ENOMEM); + __invalidate_snapshot(s, -ENOMEM); r = -EIO; goto out_unlock; } @@ -899,45 +919,27 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, remap_exception(s, &pe->e, bio); bio_list_add(&pe->snapshot_bios, bio); + r = DM_MAPIO_SUBMITTED; + if (!pe->started) { /* this is protected by snap->lock */ pe->started = 1; - copy_needed = 1; - } - - r = 0; - - out_unlock: - up_write(&s->lock); - - if (copy_needed) + up_write(&s->lock); start_copy(pe); - } else { + goto out; + } + } else /* * FIXME: this read path scares me because we * always use the origin when we have a pending * exception. However I can't think of a * situation where this is wrong - ejt. */ + bio->bi_bdev = s->origin->bdev; - /* Do reads */ - down_read(&s->lock); - - if (!s->valid) { - up_read(&s->lock); - return -EIO; - } - - /* See if it it has been remapped */ - e = lookup_exception(&s->complete, chunk); - if (e) - remap_exception(s, e, bio); - else - bio->bi_bdev = s->origin->bdev; - - up_read(&s->lock); - } - + out_unlock: + up_write(&s->lock); + out: return r; } @@ -995,7 +997,7 @@ static int snapshot_status(struct dm_target *ti, status_type_t type, *---------------------------------------------------------------*/ static int __origin_write(struct list_head *snapshots, struct bio *bio) { - int r = 1, first = 0; + int r = DM_MAPIO_REMAPPED, first = 0; struct dm_snapshot *snap; struct exception *e; struct pending_exception *pe, *next_pe, *primary_pe = NULL; @@ -1026,7 +1028,7 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio) * is already remapped in this snapshot * and trigger an exception if not. * - * sibling_count is initialised to 1 so pending_complete() + * ref_count is initialised to 1 so pending_complete() * won't destroy the primary_pe while we're inside this loop. */ e = lookup_exception(&snap->complete, chunk); @@ -1035,7 +1037,7 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio) pe = __find_pending_exception(snap, bio); if (!pe) { - __invalidate_snapshot(snap, pe, ENOMEM); + __invalidate_snapshot(snap, -ENOMEM); goto next_snapshot; } @@ -1053,12 +1055,12 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio) bio_list_add(&primary_pe->origin_bios, bio); - r = 0; + r = DM_MAPIO_SUBMITTED; } if (!pe->primary_pe) { - atomic_inc(&primary_pe->sibling_count); pe->primary_pe = primary_pe; + get_pending_exception(primary_pe); } if (!pe->started) { @@ -1071,20 +1073,20 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio) } if (!primary_pe) - goto out; + return r; /* * If this is the first time we're processing this chunk and - * sibling_count is now 1 it means all the pending exceptions + * ref_count is now 1 it means all the pending exceptions * got completed while we were in the loop above, so it falls to * us here to remove the primary_pe and submit any origin_bios. */ - if (first && atomic_dec_and_test(&primary_pe->sibling_count)) { + if (first && atomic_dec_and_test(&primary_pe->ref_count)) { flush_bios(bio_list_get(&primary_pe->origin_bios)); free_pending_exception(primary_pe); /* If we got here, pe_queue is necessarily empty. */ - goto out; + return r; } /* @@ -1093,7 +1095,6 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio) list_for_each_entry_safe(pe, next_pe, &pe_queue, list) start_copy(pe); - out: return r; } @@ -1103,7 +1104,7 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio) static int do_origin(struct dm_dev *origin, struct bio *bio) { struct origin *o; - int r = 1; + int r = DM_MAPIO_REMAPPED; down_read(&_origins_lock); o = __lookup_origin(origin->bdev); @@ -1160,7 +1161,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio, return -EOPNOTSUPP; /* Only tell snapshots if this is a write */ - return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : 1; + return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : DM_MAPIO_REMAPPED; } #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) @@ -1206,7 +1207,7 @@ static int origin_status(struct dm_target *ti, status_type_t type, char *result, static struct target_type origin_target = { .name = "snapshot-origin", - .version = {1, 4, 0}, + .version = {1, 5, 0}, .module = THIS_MODULE, .ctr = origin_ctr, .dtr = origin_dtr, @@ -1217,7 +1218,7 @@ static struct target_type origin_target = { static struct target_type snapshot_target = { .name = "snapshot", - .version = {1, 4, 0}, + .version = {1, 5, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, @@ -1276,8 +1277,17 @@ static int __init dm_snapshot_init(void) goto bad5; } + ksnapd = create_singlethread_workqueue("ksnapd"); + if (!ksnapd) { + DMERR("Failed to create ksnapd workqueue."); + r = -ENOMEM; + goto bad6; + } + return 0; + bad6: + mempool_destroy(pending_pool); bad5: kmem_cache_destroy(pending_cache); bad4: @@ -1295,6 +1305,8 @@ static void __exit dm_snapshot_exit(void) { int r; + destroy_workqueue(ksnapd); + r = dm_unregister_target(&snapshot_target); if (r) DMERR("snapshot unregister failed %d", r);