}
}
-static int grow_buffers(struct stripe_head *sh)
+static int grow_buffers(struct stripe_head *sh, gfp_t gfp)
{
int i;
int num = sh->raid_conf->pool_size;
for (i = 0; i < num; i++) {
struct page *page;
- if (!(page = alloc_page(GFP_KERNEL))) {
+ if (!(page = alloc_page(gfp))) {
return 1;
}
sh->dev[i].page = page;
*(conf->hash_locks + hash));
sh = __find_stripe(conf, sector, conf->generation - previous);
if (!sh) {
- if (!conf->inactive_blocked)
+ if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) {
sh = get_free_stripe(conf, hash);
+ if (!sh && llist_empty(&conf->released_stripes) &&
+ !test_bit(R5_DID_ALLOC, &conf->cache_state))
+ set_bit(R5_ALLOC_MORE,
+ &conf->cache_state);
+ }
if (noblock && sh == NULL)
break;
if (!sh) {
- conf->inactive_blocked = 1;
+ set_bit(R5_INACTIVE_BLOCKED,
+ &conf->cache_state);
wait_event_lock_irq(
conf->wait_for_stripe,
!list_empty(conf->inactive_list + hash) &&
(atomic_read(&conf->active_stripes)
< (conf->max_nr_stripes * 3 / 4)
- || !conf->inactive_blocked),
+ || !test_bit(R5_INACTIVE_BLOCKED,
+ &conf->cache_state)),
*(conf->hash_locks + hash));
- conf->inactive_blocked = 0;
+ clear_bit(R5_INACTIVE_BLOCKED,
+ &conf->cache_state);
} else {
init_stripe(sh, sector, previous);
atomic_inc(&sh->count);
put_cpu();
}
-static int grow_one_stripe(struct r5conf *conf, int hash)
+static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
{
struct stripe_head *sh;
- sh = kmem_cache_zalloc(conf->slab_cache, GFP_KERNEL);
+ sh = kmem_cache_zalloc(conf->slab_cache, gfp);
if (!sh)
return 0;
spin_lock_init(&sh->stripe_lock);
- if (grow_buffers(sh)) {
+ if (grow_buffers(sh, gfp)) {
shrink_buffers(sh);
kmem_cache_free(conf->slab_cache, sh);
return 0;
}
- sh->hash_lock_index = hash;
+ sh->hash_lock_index =
+ conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS;
/* we just created an active stripe so... */
atomic_set(&sh->count, 1);
atomic_inc(&conf->active_stripes);
INIT_LIST_HEAD(&sh->batch_list);
sh->batch_head = NULL;
release_stripe(sh);
+ conf->max_nr_stripes++;
return 1;
}
{
struct kmem_cache *sc;
int devs = max(conf->raid_disks, conf->previous_raid_disks);
- int hash;
if (conf->mddev->gendisk)
sprintf(conf->cache_name[0],
return 1;
conf->slab_cache = sc;
conf->pool_size = devs;
- hash = conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS;
- while (num--) {
- if (!grow_one_stripe(conf, hash))
+ while (num--)
+ if (!grow_one_stripe(conf, GFP_KERNEL))
return 1;
- conf->max_nr_stripes++;
- hash = (hash + 1) % NR_STRIPE_HASH_LOCKS;
- }
+
return 0;
}
return err;
}
-static int drop_one_stripe(struct r5conf *conf, int hash)
+static int drop_one_stripe(struct r5conf *conf)
{
struct stripe_head *sh;
+ int hash = (conf->max_nr_stripes - 1) % NR_STRIPE_HASH_LOCKS;
spin_lock_irq(conf->hash_locks + hash);
sh = get_free_stripe(conf, hash);
shrink_buffers(sh);
kmem_cache_free(conf->slab_cache, sh);
atomic_dec(&conf->active_stripes);
+ conf->max_nr_stripes--;
return 1;
}
static void shrink_stripes(struct r5conf *conf)
{
- int hash;
- for (hash = 0; hash < NR_STRIPE_HASH_LOCKS; hash++)
- while (drop_one_stripe(conf, hash))
- ;
+ while (conf->max_nr_stripes &&
+ drop_one_stripe(conf))
+ ;
if (conf->slab_cache)
kmem_cache_destroy(conf->slab_cache);
* how busy the stripe_cache is
*/
- if (conf->inactive_blocked)
+ if (test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state))
return 1;
if (conf->quiesce)
return 1;
unsigned int chunk_sectors = mddev->chunk_sectors;
unsigned int bio_sectors = bvm->bi_size >> 9;
- if ((bvm->bi_rw & 1) == WRITE)
- return biovec->bv_len; /* always allow writes to be mergeable */
+ /*
+ * always allow writes to be mergeable, read as well if array
+ * is degraded as we'll go through stripe cache anyway.
+ */
+ if ((bvm->bi_rw & 1) == WRITE || mddev->degraded)
+ return biovec->bv_len;
if (mddev->new_chunk_sectors < mddev->chunk_sectors)
chunk_sectors = mddev->new_chunk_sectors;
md_write_start(mddev, bi);
- if (rw == READ &&
+ /*
+ * If array is degraded, better not do chunk aligned read because
+ * later we might have to read it again in order to reconstruct
+ * data on failed drives.
+ */
+ if (rw == READ && mddev->degraded == 0 &&
mddev->reshape_position == MaxSector &&
chunk_aligned_read(mddev,bi))
return;
int batch_size, released;
released = release_stripe_list(conf, conf->temp_inactive_list);
+ if (released)
+ clear_bit(R5_DID_ALLOC, &conf->cache_state);
if (
!list_empty(&conf->bitmap_list)) {
pr_debug("%d stripes handled\n", handled);
spin_unlock_irq(&conf->device_lock);
+ if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state)) {
+ grow_one_stripe(conf, __GFP_NOWARN);
+ /* Set flag even if allocation failed. This helps
+ * slow down allocation requests when mem is short
+ */
+ set_bit(R5_DID_ALLOC, &conf->cache_state);
+ }
async_tx_issue_pending_all();
blk_finish_plug(&plug);
spin_lock(&mddev->lock);
conf = mddev->private;
if (conf)
- ret = sprintf(page, "%d\n", conf->max_nr_stripes);
+ ret = sprintf(page, "%d\n", conf->min_nr_stripes);
spin_unlock(&mddev->lock);
return ret;
}
{
struct r5conf *conf = mddev->private;
int err;
- int hash;
if (size <= 16 || size > 32768)
return -EINVAL;
- hash = (conf->max_nr_stripes - 1) % NR_STRIPE_HASH_LOCKS;
- while (size < conf->max_nr_stripes) {
- if (drop_one_stripe(conf, hash))
- conf->max_nr_stripes--;
- else
- break;
- hash--;
- if (hash < 0)
- hash = NR_STRIPE_HASH_LOCKS - 1;
- }
+
+ conf->min_nr_stripes = size;
+ while (size < conf->max_nr_stripes &&
+ drop_one_stripe(conf))
+ ;
+
+
err = md_allow_write(mddev);
if (err)
return err;
- hash = conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS;
- while (size > conf->max_nr_stripes) {
- if (grow_one_stripe(conf, hash))
- conf->max_nr_stripes++;
- else break;
- hash = (hash + 1) % NR_STRIPE_HASH_LOCKS;
- }
+
+ while (size > conf->max_nr_stripes)
+ if (!grow_one_stripe(conf, GFP_KERNEL))
+ break;
+
return 0;
}
EXPORT_SYMBOL(raid5_set_cache_size);
raid5_show_stripe_cache_size,
raid5_store_stripe_cache_size);
+static ssize_t
+raid5_show_rmw_level(struct mddev *mddev, char *page)
+{
+ struct r5conf *conf = mddev->private;
+ if (conf)
+ return sprintf(page, "%d\n", conf->rmw_level);
+ else
+ return 0;
+}
+
+static ssize_t
+raid5_store_rmw_level(struct mddev *mddev, const char *page, size_t len)
+{
+ struct r5conf *conf = mddev->private;
+ unsigned long new;
+
+ if (!conf)
+ return -ENODEV;
+
+ if (len >= PAGE_SIZE)
+ return -EINVAL;
+
+ if (kstrtoul(page, 10, &new))
+ return -EINVAL;
+
+ if (new != PARITY_DISABLE_RMW && !raid6_call.xor_syndrome)
+ return -EINVAL;
+
+ if (new != PARITY_DISABLE_RMW &&
+ new != PARITY_ENABLE_RMW &&
+ new != PARITY_PREFER_RMW)
+ return -EINVAL;
+
+ conf->rmw_level = new;
+ return len;
+}
+
+static struct md_sysfs_entry
+raid5_rmw_level = __ATTR(rmw_level, S_IRUGO | S_IWUSR,
+ raid5_show_rmw_level,
+ raid5_store_rmw_level);
+
+
static ssize_t
raid5_show_preread_threshold(struct mddev *mddev, char *page)
{
conf = mddev->private;
if (!conf)
err = -ENODEV;
- else if (new > conf->max_nr_stripes)
+ else if (new > conf->min_nr_stripes)
err = -EINVAL;
else
conf->bypass_threshold = new;
&raid5_preread_bypass_threshold.attr,
&raid5_group_thread_cnt.attr,
&raid5_skip_copy.attr,
+ &raid5_rmw_level.attr,
NULL,
};
static struct attribute_group raid5_attrs_group = {
static void free_conf(struct r5conf *conf)
{
+ if (conf->shrinker.seeks)
+ unregister_shrinker(&conf->shrinker);
free_thread_groups(conf);
shrink_stripes(conf);
raid5_free_percpu(conf);
return err;
}
+static unsigned long raid5_cache_scan(struct shrinker *shrink,
+ struct shrink_control *sc)
+{
+ struct r5conf *conf = container_of(shrink, struct r5conf, shrinker);
+ int ret = 0;
+ while (ret < sc->nr_to_scan) {
+ if (drop_one_stripe(conf) == 0)
+ return SHRINK_STOP;
+ ret++;
+ }
+ return ret;
+}
+
+static unsigned long raid5_cache_count(struct shrinker *shrink,
+ struct shrink_control *sc)
+{
+ struct r5conf *conf = container_of(shrink, struct r5conf, shrinker);
+
+ if (conf->max_nr_stripes < conf->min_nr_stripes)
+ /* unlikely, but not impossible */
+ return 0;
+ return conf->max_nr_stripes - conf->min_nr_stripes;
+}
+
static struct r5conf *setup_conf(struct mddev *mddev)
{
struct r5conf *conf;
conf->prev_algo = mddev->layout;
}
- memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
+ conf->min_nr_stripes = NR_STRIPES;
+ memory = conf->min_nr_stripes * (sizeof(struct stripe_head) +
max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS);
- if (grow_stripes(conf, NR_STRIPES)) {
+ if (grow_stripes(conf, conf->min_nr_stripes)) {
printk(KERN_ERR
"md/raid:%s: couldn't allocate %dkB for buffers\n",
mdname(mddev), memory);
} else
printk(KERN_INFO "md/raid:%s: allocated %dkB\n",
mdname(mddev), memory);
+ /*
+ * Losing a stripe head costs more than the time to refill it,
+ * it reduces the queue depth and so can hurt throughput.
+ * So set it rather large, scaled by number of devices.
+ */
+ conf->shrinker.seeks = DEFAULT_SEEKS * conf->raid_disks * 4;
+ conf->shrinker.scan_objects = raid5_cache_scan;
+ conf->shrinker.count_objects = raid5_cache_count;
+ conf->shrinker.batch = 128;
+ conf->shrinker.flags = 0;
+ register_shrinker(&conf->shrinker);
sprintf(pers_name, "raid%d", mddev->new_level);
conf->thread = md_register_thread(raid5d, mddev, pers_name);
*/
struct r5conf *conf = mddev->private;
if (((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4
- > conf->max_nr_stripes ||
+ > conf->min_nr_stripes ||
((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4
- > conf->max_nr_stripes) {
+ > conf->min_nr_stripes) {
printk(KERN_WARNING "md/raid:%s: reshape: not enough stripes. Needed %lu\n",
mdname(mddev),
((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9)