From: Linus Torvalds Date: Wed, 17 Jun 2009 16:46:33 +0000 (-0700) Subject: Merge branch 'linux-next' of git://git.infradead.org/ubifs-2.6 X-Git-Tag: v2.6.31-rc1~287 X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?p=pandora-kernel.git;a=commitdiff_plain;h=b7c142dbf1e7422d0be7f7faa3f1163ad9da9788;hp=0bd8df908de2aefe312d05bd25cd3abc21a6d1da Merge branch 'linux-next' of git://git.infradead.org/ubifs-2.6 * 'linux-next' of git://git.infradead.org/ubifs-2.6: UBIFS: start using hrtimers hrtimer: export ktime_add_safe UBIFS: do not forget to register BDI device UBIFS: allow sync option in rootflags UBIFS: remove dead code UBIFS: use anonymous device UBIFS: return proper error code if the compr is not present UBIFS: return error if link and unlink race UBIFS: reset no_space flag after inode deletion --- diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index af1914462f02..eaf6d891d46f 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -91,7 +91,6 @@ static int shrink_liability(struct ubifs_info *c, int nr_to_write) return nr_written; } - /** * run_gc - run garbage collector. * @c: UBIFS file-system description object @@ -628,7 +627,7 @@ void ubifs_convert_page_budget(struct ubifs_info *c) * * This function releases budget corresponding to a dirty inode. It is usually * called when after the inode has been written to the media and marked as - * clean. + * clean. It also causes the "no space" flags to be cleared. */ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, struct ubifs_inode *ui) @@ -636,6 +635,7 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, struct ubifs_budget_req req; memset(&req, 0, sizeof(struct ubifs_budget_req)); + /* The "no space" flags will be cleared because dd_growth is > 0 */ req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8); ubifs_release_budget(c, &req); } diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index f55d523c52bb..552fb0111fff 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -528,6 +528,25 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, inode->i_nlink, dir->i_ino); ubifs_assert(mutex_is_locked(&dir->i_mutex)); ubifs_assert(mutex_is_locked(&inode->i_mutex)); + + /* + * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing + * otherwise has the potential to corrupt the orphan inode list. + * + * Indeed, consider a scenario when 'vfs_link(dirA/fileA)' and + * 'vfs_unlink(dirA/fileA, dirB/fileB)' race. 'vfs_link()' does not + * lock 'dirA->i_mutex', so this is possible. Both of the functions + * lock 'fileA->i_mutex' though. Suppose 'vfs_unlink()' wins, and takes + * 'fileA->i_mutex' mutex first. Suppose 'fileA->i_nlink' is 1. In this + * case 'ubifs_unlink()' will drop the last reference, and put 'inodeA' + * to the list of orphans. After this, 'vfs_link()' will link + * 'dirB/fileB' to 'inodeA'. This is a problem because, for example, + * the subsequent 'vfs_unlink(dirB/fileB)' will add the same inode + * to the list of orphans. + */ + if (inode->i_nlink == 0) + return -ENOENT; + err = dbg_check_synced_i_size(inode); if (err) return err; diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index e8e632a1dcdf..bc5857199ec2 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -293,13 +293,14 @@ void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last) * * This function is called when the write-buffer timer expires. */ -static void wbuf_timer_callback_nolock(unsigned long data) +static enum hrtimer_restart wbuf_timer_callback_nolock(struct hrtimer *timer) { - struct ubifs_wbuf *wbuf = (struct ubifs_wbuf *)data; + struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer); wbuf->need_sync = 1; wbuf->c->need_wbuf_sync = 1; ubifs_wake_up_bgt(wbuf->c); + return HRTIMER_NORESTART; } /** @@ -308,13 +309,12 @@ static void wbuf_timer_callback_nolock(unsigned long data) */ static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) { - ubifs_assert(!timer_pending(&wbuf->timer)); + ubifs_assert(!hrtimer_active(&wbuf->timer)); - if (!wbuf->timeout) + if (!ktime_to_ns(wbuf->softlimit)) return; - - wbuf->timer.expires = jiffies + wbuf->timeout; - add_timer(&wbuf->timer); + hrtimer_start_range_ns(&wbuf->timer, wbuf->softlimit, wbuf->delta, + HRTIMER_MODE_REL); } /** @@ -329,7 +329,7 @@ static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) * should be canceled. */ wbuf->need_sync = 0; - del_timer(&wbuf->timer); + hrtimer_cancel(&wbuf->timer); } /** @@ -825,6 +825,7 @@ out: int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) { size_t size; + ktime_t hardlimit; wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL); if (!wbuf->buf) @@ -845,14 +846,21 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) wbuf->sync_callback = NULL; mutex_init(&wbuf->io_mutex); spin_lock_init(&wbuf->lock); - wbuf->c = c; - init_timer(&wbuf->timer); - wbuf->timer.function = wbuf_timer_callback_nolock; - wbuf->timer.data = (unsigned long)wbuf; - wbuf->timeout = DEFAULT_WBUF_TIMEOUT; wbuf->next_ino = 0; + hrtimer_init(&wbuf->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + wbuf->timer.function = wbuf_timer_callback_nolock; + /* + * Make write-buffer soft limit to be 20% of the hard limit. The + * write-buffer timer is allowed to expire any time between the soft + * and hard limits. + */ + hardlimit = ktime_set(DEFAULT_WBUF_TIMEOUT_SECS, 0); + wbuf->delta = (DEFAULT_WBUF_TIMEOUT_SECS * NSEC_PER_SEC) * 2 / 10; + wbuf->softlimit = ktime_sub_ns(hardlimit, wbuf->delta); + hrtimer_set_expires_range_ns(&wbuf->timer, wbuf->softlimit, + wbuf->delta); return 0; } diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 10662975d2ef..805605250f12 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -343,33 +343,15 @@ int ubifs_write_rcvrd_mst_node(struct ubifs_info *c) * * This function returns %1 if @offs was in the last write to the LEB whose data * is in @buf, otherwise %0 is returned. The determination is made by checking - * for subsequent empty space starting from the next min_io_size boundary (or a - * bit less than the common header size if min_io_size is one). + * for subsequent empty space starting from the next @c->min_io_size boundary. */ static int is_last_write(const struct ubifs_info *c, void *buf, int offs) { - int empty_offs; - int check_len; + int empty_offs, check_len; uint8_t *p; - if (c->min_io_size == 1) { - check_len = c->leb_size - offs; - p = buf + check_len; - for (; check_len > 0; check_len--) - if (*--p != 0xff) - break; - /* - * 'check_len' is the size of the corruption which cannot be - * more than the size of 1 node if it was caused by an unclean - * unmount. - */ - if (check_len > UBIFS_MAX_NODE_SZ) - return 0; - return 1; - } - /* - * Round up to the next c->min_io_size boundary i.e. 'offs' is in the + * Round up to the next @c->min_io_size boundary i.e. @offs is in the * last wbuf written. After that should be empty space. */ empty_offs = ALIGN(offs + 1, c->min_io_size); @@ -392,7 +374,7 @@ static int is_last_write(const struct ubifs_info *c, void *buf, int offs) * * This function pads up to the next min_io_size boundary (if there is one) and * sets empty space to all 0xff. @buf, @offs and @len are updated to the next - * min_io_size boundary (if there is one). + * @c->min_io_size boundary. */ static void clean_buf(const struct ubifs_info *c, void **buf, int lnum, int *offs, int *len) @@ -402,11 +384,6 @@ static void clean_buf(const struct ubifs_info *c, void **buf, int lnum, lnum = lnum; dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs); - if (c->min_io_size == 1) { - memset(*buf, 0xff, c->leb_size - *offs); - return; - } - ubifs_assert(!(*offs & 7)); empty_offs = ALIGN(*offs, c->min_io_size); pad_len = empty_offs - *offs; diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 3260b73abe29..79fad43f3c57 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -361,6 +361,11 @@ static void ubifs_delete_inode(struct inode *inode) out: if (ui->dirty) ubifs_release_dirty_inode_budget(c, ui); + else { + /* We've deleted something - clean the "no space" flags */ + c->nospace = c->nospace_rp = 0; + smp_wmb(); + } clear_inode(inode); } @@ -792,7 +797,7 @@ static int alloc_wbufs(struct ubifs_info *c) * does not need to be synchronized by timer. */ c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; - c->jheads[GCHD].wbuf.timeout = 0; + c->jheads[GCHD].wbuf.softlimit = ktime_set(0, 0); return 0; } @@ -932,6 +937,27 @@ static const match_table_t tokens = { {Opt_err, NULL}, }; +/** + * parse_standard_option - parse a standard mount option. + * @option: the option to parse + * + * Normally, standard mount options like "sync" are passed to file-systems as + * flags. However, when a "rootflags=" kernel boot parameter is used, they may + * be present in the options string. This function tries to deal with this + * situation and parse standard options. Returns 0 if the option was not + * recognized, and the corresponding integer flag if it was. + * + * UBIFS is only interested in the "sync" option, so do not check for anything + * else. + */ +static int parse_standard_option(const char *option) +{ + ubifs_msg("parse %s", option); + if (!strcmp(option, "sync")) + return MS_SYNCHRONOUS; + return 0; +} + /** * ubifs_parse_options - parse mount parameters. * @c: UBIFS file-system description object @@ -1008,9 +1034,19 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options, break; } default: - ubifs_err("unrecognized mount option \"%s\" " - "or missing value", p); - return -EINVAL; + { + unsigned long flag; + struct super_block *sb = c->vfs_sb; + + flag = parse_standard_option(p); + if (!flag) { + ubifs_err("unrecognized mount option \"%s\" " + "or missing value", p); + return -EINVAL; + } + sb->s_flags |= flag; + break; + } } } @@ -1180,6 +1216,7 @@ static int mount_ubifs(struct ubifs_info *c) if (!ubifs_compr_present(c->default_compr)) { ubifs_err("'compressor \"%s\" is not compiled in", ubifs_compr_name(c->default_compr)); + err = -ENOTSUPP; goto out_free; } @@ -1656,7 +1693,7 @@ static void ubifs_remount_ro(struct ubifs_info *c) for (i = 0; i < c->jhead_cnt; i++) { ubifs_wbuf_sync(&c->jheads[i].wbuf); - del_timer_sync(&c->jheads[i].wbuf.timer); + hrtimer_cancel(&c->jheads[i].wbuf.timer); } c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); @@ -1719,7 +1756,7 @@ static void ubifs_put_super(struct super_block *sb) if (c->jheads) for (i = 0; i < c->jhead_cnt; i++) { ubifs_wbuf_sync(&c->jheads[i].wbuf); - del_timer_sync(&c->jheads[i].wbuf.timer); + hrtimer_cancel(&c->jheads[i].wbuf.timer); } /* @@ -1911,6 +1948,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) INIT_LIST_HEAD(&c->orph_list); INIT_LIST_HEAD(&c->orph_new); + c->vfs_sb = sb; c->highest_inum = UBIFS_FIRST_INO; c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM; @@ -1945,13 +1983,10 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto out_bdi; - c->vfs_sb = sb; - sb->s_fs_info = c; sb->s_magic = UBIFS_SUPER_MAGIC; sb->s_blocksize = UBIFS_BLOCK_SIZE; sb->s_blocksize_bits = UBIFS_BLOCK_SHIFT; - sb->s_dev = c->vi.cdev; sb->s_maxbytes = c->max_inode_sz = key_max_inode_size(c); if (c->max_inode_sz > MAX_LFS_FILESIZE) sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE; @@ -1996,16 +2031,9 @@ out_free: static int sb_test(struct super_block *sb, void *data) { dev_t *dev = data; + struct ubifs_info *c = sb->s_fs_info; - return sb->s_dev == *dev; -} - -static int sb_set(struct super_block *sb, void *data) -{ - dev_t *dev = data; - - sb->s_dev = *dev; - return 0; + return c->vi.cdev == *dev; } static int ubifs_get_sb(struct file_system_type *fs_type, int flags, @@ -2033,7 +2061,7 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags, dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id); - sb = sget(fs_type, &sb_test, &sb_set, &vi.cdev); + sb = sget(fs_type, &sb_test, &set_anon_super, &vi.cdev); if (IS_ERR(sb)) { err = PTR_ERR(sb); goto out_close; @@ -2073,16 +2101,11 @@ out_close: return err; } -static void ubifs_kill_sb(struct super_block *sb) -{ - generic_shutdown_super(sb); -} - static struct file_system_type ubifs_fs_type = { .name = "ubifs", .owner = THIS_MODULE, .get_sb = ubifs_get_sb, - .kill_sb = ubifs_kill_sb + .kill_sb = kill_anon_super, }; /* diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 0a8341e14088..1bf01d820066 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -95,8 +95,8 @@ */ #define BGT_NAME_PATTERN "ubifs_bgt%d_%d" -/* Default write-buffer synchronization timeout (5 secs) */ -#define DEFAULT_WBUF_TIMEOUT (5 * HZ) +/* Default write-buffer synchronization timeout in seconds */ +#define DEFAULT_WBUF_TIMEOUT_SECS 5 /* Maximum possible inode number (only 32-bit inodes are supported now) */ #define MAX_INUM 0xFFFFFFFF @@ -650,8 +650,10 @@ typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c, * @io_mutex: serializes write-buffer I/O * @lock: serializes @buf, @lnum, @offs, @avail, @used, @next_ino and @inodes * fields + * @softlimit: soft write-buffer timeout interval + * @delta: hard and soft timeouts delta (the timer expire inteval is @softlimit + * and @softlimit + @delta) * @timer: write-buffer timer - * @timeout: timer expire interval in jiffies * @need_sync: it is set if its timer expired and needs sync * @next_ino: points to the next position of the following inode number * @inodes: stores the inode numbers of the nodes which are in wbuf @@ -678,8 +680,9 @@ struct ubifs_wbuf { int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad); struct mutex io_mutex; spinlock_t lock; - struct timer_list timer; - int timeout; + ktime_t softlimit; + unsigned long long delta; + struct hrtimer timer; int need_sync; int next_ino; ino_t *inodes; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index b675a67c9ac3..9002958a96e7 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -380,6 +380,8 @@ ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs) return res; } +EXPORT_SYMBOL_GPL(ktime_add_safe); + #ifdef CONFIG_DEBUG_OBJECTS_TIMERS static struct debug_obj_descr hrtimer_debug_descr;