2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
4 /* Reiserfs block (de)allocator, bitmap-based. */
6 #include <linux/time.h>
7 #include <linux/reiserfs_fs.h>
8 #include <linux/errno.h>
9 #include <linux/buffer_head.h>
10 #include <linux/kernel.h>
11 #include <linux/pagemap.h>
12 #include <linux/reiserfs_fs_sb.h>
13 #include <linux/reiserfs_fs_i.h>
14 #include <linux/quotaops.h>
16 #define PREALLOCATION_SIZE 9
18 /* different reiserfs block allocator options */
20 #define SB_ALLOC_OPTS(s) (REISERFS_SB(s)->s_alloc_options.bits)
22 #define _ALLOC_concentrating_formatted_nodes 0
23 #define _ALLOC_displacing_large_files 1
24 #define _ALLOC_displacing_new_packing_localities 2
25 #define _ALLOC_old_hashed_relocation 3
26 #define _ALLOC_new_hashed_relocation 4
27 #define _ALLOC_skip_busy 5
28 #define _ALLOC_displace_based_on_dirid 6
29 #define _ALLOC_hashed_formatted_nodes 7
30 #define _ALLOC_old_way 8
31 #define _ALLOC_hundredth_slices 9
32 #define _ALLOC_dirid_groups 10
33 #define _ALLOC_oid_groups 11
34 #define _ALLOC_packing_groups 12
36 #define concentrating_formatted_nodes(s) test_bit(_ALLOC_concentrating_formatted_nodes, &SB_ALLOC_OPTS(s))
37 #define displacing_large_files(s) test_bit(_ALLOC_displacing_large_files, &SB_ALLOC_OPTS(s))
38 #define displacing_new_packing_localities(s) test_bit(_ALLOC_displacing_new_packing_localities, &SB_ALLOC_OPTS(s))
40 #define SET_OPTION(optname) \
42 reiserfs_warning(s, "reiserfs: option \"%s\" is set", #optname); \
43 set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \
45 #define TEST_OPTION(optname, s) \
46 test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s))
48 static inline void get_bit_address(struct super_block *s,
49 b_blocknr_t block, int *bmap_nr, int *offset)
51 /* It is in the bitmap block number equal to the block
52 * number divided by the number of bits in a block. */
53 *bmap_nr = block >> (s->s_blocksize_bits + 3);
54 /* Within that bitmap block it is located at bit offset *offset. */
55 *offset = block & ((s->s_blocksize << 3) - 1);
58 #ifdef CONFIG_REISERFS_CHECK
59 int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
63 if (block == 0 || block >= SB_BLOCK_COUNT(s)) {
65 "vs-4010: is_reusable: block number is out of range %lu (%u)",
66 block, SB_BLOCK_COUNT(s));
70 get_bit_address(s, block, &bmap, &offset);
72 /* Old format filesystem? Unlikely, but the bitmaps are all up front so
73 * we need to account for it. */
74 if (unlikely(test_bit(REISERFS_OLD_FORMAT,
75 &(REISERFS_SB(s)->s_properties)))) {
76 b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1;
77 if (block >= bmap1 && block <= bmap1 + SB_BMAP_NR(s)) {
78 reiserfs_warning(s, "vs: 4019: is_reusable: "
79 "bitmap block %lu(%u) can't be freed or reused",
80 block, SB_BMAP_NR(s));
85 reiserfs_warning(s, "vs: 4020: is_reusable: "
86 "bitmap block %lu(%u) can't be freed or reused",
87 block, SB_BMAP_NR(s));
92 if (bmap >= SB_BMAP_NR(s)) {
94 "vs-4030: is_reusable: there is no so many bitmap blocks: "
95 "block=%lu, bitmap_nr=%d", block, bmap);
99 if ((bit_value == 0 &&
100 reiserfs_test_le_bit(offset, SB_AP_BITMAP(s)[bmap].bh->b_data)) ||
102 reiserfs_test_le_bit(offset, SB_AP_BITMAP(s)[bmap].bh->b_data) == 0)) {
104 "vs-4040: is_reusable: corresponding bit of block %lu does not "
105 "match required value (bmap==%d, offset==%d) test_bit==%d",
106 block, bmap, offset, reiserfs_test_le_bit(offset,
114 if (bit_value == 0 && block == SB_ROOT_BLOCK(s)) {
116 "vs-4050: is_reusable: this is root block (%u), "
117 "it must be busy", SB_ROOT_BLOCK(s));
123 #endif /* CONFIG_REISERFS_CHECK */
125 /* searches in journal structures for a given block number (bmap, off). If block
126 is found in reiserfs journal it suggests next free block candidate to test. */
127 static inline int is_block_in_journal(struct super_block *s, int bmap, int
132 if (reiserfs_in_journal(s, bmap, off, 1, &tmp)) {
133 if (tmp) { /* hint supplied */
135 PROC_INFO_INC(s, scan_bitmap.in_journal_hint);
137 (*next) = off + 1; /* inc offset to avoid looping. */
138 PROC_INFO_INC(s, scan_bitmap.in_journal_nohint);
140 PROC_INFO_INC(s, scan_bitmap.retry);
146 /* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap
148 static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
149 int bmap_n, int *beg, int boundary, int min,
152 struct super_block *s = th->t_super;
153 struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n];
157 BUG_ON(!th->t_trans_id);
159 RFALSE(bmap_n >= SB_BMAP_NR(s), "Bitmap %d is out of range (0..%d)",
160 bmap_n, SB_BMAP_NR(s) - 1);
161 PROC_INFO_INC(s, scan_bitmap.bmap);
162 /* this is unclear and lacks comments, explain how journal bitmaps
163 work here for the reader. Convey a sense of the design here. What
165 /* - I mean `a window of zero bits' as in description of this function - Zam. */
168 reiserfs_warning(s, "NULL bitmap info pointer for bitmap %d",
172 if (buffer_locked(bi->bh)) {
173 PROC_INFO_INC(s, scan_bitmap.wait);
174 __wait_on_buffer(bi->bh);
179 if (bi->free_count < min)
180 return 0; // No free blocks in this bitmap
182 /* search for a first zero bit -- beggining of a window */
183 *beg = reiserfs_find_next_zero_le_bit
184 ((unsigned long *)(bi->bh->b_data), boundary, *beg);
186 if (*beg + min > boundary) { /* search for a zero bit fails or the rest of bitmap block
187 * cannot contain a zero window of minimum size */
191 if (unfm && is_block_in_journal(s, bmap_n, *beg, beg))
193 /* first zero bit found; we check next bits */
194 for (end = *beg + 1;; end++) {
195 if (end >= *beg + max || end >= boundary
196 || reiserfs_test_le_bit(end, bi->bh->b_data)) {
200 /* finding the other end of zero bit window requires looking into journal structures (in
201 * case of searching for free blocks for unformatted nodes) */
202 if (unfm && is_block_in_journal(s, bmap_n, end, &next))
206 /* now (*beg) points to beginning of zero bits window,
207 * (end) points to one bit after the window end */
208 if (end - *beg >= min) { /* it seems we have found window of proper size */
210 reiserfs_prepare_for_journal(s, bi->bh, 1);
211 /* try to set all blocks used checking are they still free */
212 for (i = *beg; i < end; i++) {
213 /* It seems that we should not check in journal again. */
214 if (reiserfs_test_and_set_le_bit
215 (i, bi->bh->b_data)) {
216 /* bit was set by another process
217 * while we slept in prepare_for_journal() */
218 PROC_INFO_INC(s, scan_bitmap.stolen);
219 if (i >= *beg + min) { /* we can continue with smaller set of allocated blocks,
220 * if length of this set is more or equal to `min' */
224 /* otherwise we clear all bit were set ... */
226 reiserfs_test_and_clear_le_bit
228 reiserfs_restore_prepared_buffer(s,
232 /* ... and search again in current block from beginning */
236 bi->free_count -= (end - *beg);
237 journal_mark_dirty(th, s, bi->bh);
239 /* free block count calculation */
240 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
242 PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg));
243 journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s));
252 static int bmap_hash_id(struct super_block *s, u32 id)
254 char *hash_in = NULL;
261 hash_in = (char *)(&id);
262 hash = keyed_hash(hash_in, 4);
263 bm = hash % SB_BMAP_NR(s);
267 /* this can only be true when SB_BMAP_NR = 1 */
268 if (bm >= SB_BMAP_NR(s))
274 * hashes the id and then returns > 0 if the block group for the
275 * corresponding hash is full
277 static inline int block_group_used(struct super_block *s, u32 id)
280 bm = bmap_hash_id(s, id);
281 if (SB_AP_BITMAP(s)[bm].free_count > ((s->s_blocksize << 3) * 60 / 100)) {
288 * the packing is returned in disk byte order
290 __le32 reiserfs_choose_packing(struct inode * dir)
293 if (TEST_OPTION(packing_groups, dir->i_sb)) {
294 u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id);
296 * some versions of reiserfsck expect packing locality 1 to be
299 if (parent_dir == 1 || block_group_used(dir->i_sb, parent_dir))
300 packing = INODE_PKEY(dir)->k_objectid;
302 packing = INODE_PKEY(dir)->k_dir_id;
304 packing = INODE_PKEY(dir)->k_objectid;
308 /* Tries to find contiguous zero bit window (given size) in given region of
309 * bitmap and place new blocks there. Returns number of allocated blocks. */
310 static int scan_bitmap(struct reiserfs_transaction_handle *th,
311 b_blocknr_t * start, b_blocknr_t finish,
312 int min, int max, int unfm, unsigned long file_block)
314 int nr_allocated = 0;
315 struct super_block *s = th->t_super;
316 /* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr
317 * - Hans, it is not a block number - Zam. */
321 int off_max = s->s_blocksize << 3;
323 BUG_ON(!th->t_trans_id);
325 PROC_INFO_INC(s, scan_bitmap.call);
326 if (SB_FREE_BLOCKS(s) <= 0)
327 return 0; // No point in looking for more free blocks
329 get_bit_address(s, *start, &bm, &off);
330 get_bit_address(s, finish, &end_bm, &end_off);
331 if (bm > SB_BMAP_NR(s))
333 if (end_bm > SB_BMAP_NR(s))
334 end_bm = SB_BMAP_NR(s);
336 /* When the bitmap is more than 10% free, anyone can allocate.
337 * When it's less than 10% free, only files that already use the
338 * bitmap are allowed. Once we pass 80% full, this restriction
341 * We do this so that files that grow later still have space close to
342 * their original allocation. This improves locality, and presumably
343 * performance as a result.
345 * This is only an allocation policy and does not make up for getting a
346 * bad hint. Decent hinting must be implemented for this to work well.
348 if (TEST_OPTION(skip_busy, s)
349 && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s) / 20) {
350 for (; bm < end_bm; bm++, off = 0) {
351 if ((off && (!unfm || (file_block != 0)))
352 || SB_AP_BITMAP(s)[bm].free_count >
353 (s->s_blocksize << 3) / 10)
355 scan_bitmap_block(th, bm, &off, off_max,
360 /* we know from above that start is a reasonable number */
361 get_bit_address(s, *start, &bm, &off);
364 for (; bm < end_bm; bm++, off = 0) {
366 scan_bitmap_block(th, bm, &off, off_max, min, max, unfm);
372 scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm);
375 *start = bm * off_max + off;
380 static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
381 struct inode *inode, b_blocknr_t block,
384 struct super_block *s = th->t_super;
385 struct reiserfs_super_block *rs;
386 struct buffer_head *sbh;
387 struct reiserfs_bitmap_info *apbi;
390 BUG_ON(!th->t_trans_id);
392 PROC_INFO_INC(s, free_block);
394 rs = SB_DISK_SUPER_BLOCK(s);
395 sbh = SB_BUFFER_WITH_SB(s);
396 apbi = SB_AP_BITMAP(s);
398 get_bit_address(s, block, &nr, &offset);
400 if (nr >= sb_bmap_nr(rs)) {
401 reiserfs_warning(s, "vs-4075: reiserfs_free_block: "
402 "block %lu is out of range on %s",
403 block, reiserfs_bdevname(s));
407 reiserfs_prepare_for_journal(s, apbi[nr].bh, 1);
409 /* clear bit for the given block in bit map */
410 if (!reiserfs_test_and_clear_le_bit(offset, apbi[nr].bh->b_data)) {
411 reiserfs_warning(s, "vs-4080: reiserfs_free_block: "
412 "free_block (%s:%lu)[dev:blocknr]: bit already cleared",
413 reiserfs_bdevname(s), block);
415 apbi[nr].free_count++;
416 journal_mark_dirty(th, s, apbi[nr].bh);
418 reiserfs_prepare_for_journal(s, sbh, 1);
419 /* update super block */
420 set_sb_free_blocks(rs, sb_free_blocks(rs) + 1);
422 journal_mark_dirty(th, s, sbh);
424 DQUOT_FREE_BLOCK_NODIRTY(inode, 1);
427 void reiserfs_free_block(struct reiserfs_transaction_handle *th,
428 struct inode *inode, b_blocknr_t block,
431 struct super_block *s = th->t_super;
433 BUG_ON(!th->t_trans_id);
435 RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
436 RFALSE(is_reusable(s, block, 1) == 0,
437 "vs-4071: can not free such block");
438 /* mark it before we clear it, just in case */
439 journal_mark_freed(th, s, block);
440 _reiserfs_free_block(th, inode, block, for_unformatted);
443 /* preallocated blocks don't need to be run through journal_mark_freed */
444 static void reiserfs_free_prealloc_block(struct reiserfs_transaction_handle *th,
445 struct inode *inode, b_blocknr_t block)
448 "vs-4060: trying to free block on nonexistent device");
449 RFALSE(is_reusable(th->t_super, block, 1) == 0,
450 "vs-4070: can not free such block");
451 BUG_ON(!th->t_trans_id);
452 _reiserfs_free_block(th, inode, block, 1);
455 static void __discard_prealloc(struct reiserfs_transaction_handle *th,
456 struct reiserfs_inode_info *ei)
458 unsigned long save = ei->i_prealloc_block;
460 struct inode *inode = &ei->vfs_inode;
461 BUG_ON(!th->t_trans_id);
462 #ifdef CONFIG_REISERFS_CHECK
463 if (ei->i_prealloc_count < 0)
464 reiserfs_warning(th->t_super,
465 "zam-4001:%s: inode has negative prealloc blocks count.",
468 while (ei->i_prealloc_count > 0) {
469 reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block);
470 ei->i_prealloc_block++;
471 ei->i_prealloc_count--;
475 reiserfs_update_sd(th, inode);
476 ei->i_prealloc_block = save;
477 list_del_init(&(ei->i_prealloc_list));
480 /* FIXME: It should be inline function */
481 void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th,
484 struct reiserfs_inode_info *ei = REISERFS_I(inode);
485 BUG_ON(!th->t_trans_id);
486 if (ei->i_prealloc_count)
487 __discard_prealloc(th, ei);
490 void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th)
492 struct list_head *plist = &SB_JOURNAL(th->t_super)->j_prealloc_list;
494 BUG_ON(!th->t_trans_id);
496 while (!list_empty(plist)) {
497 struct reiserfs_inode_info *ei;
498 ei = list_entry(plist->next, struct reiserfs_inode_info,
500 #ifdef CONFIG_REISERFS_CHECK
501 if (!ei->i_prealloc_count) {
502 reiserfs_warning(th->t_super,
503 "zam-4001:%s: inode is in prealloc list but has no preallocated blocks.",
507 __discard_prealloc(th, ei);
511 void reiserfs_init_alloc_options(struct super_block *s)
513 set_bit(_ALLOC_skip_busy, &SB_ALLOC_OPTS(s));
514 set_bit(_ALLOC_dirid_groups, &SB_ALLOC_OPTS(s));
515 set_bit(_ALLOC_packing_groups, &SB_ALLOC_OPTS(s));
518 /* block allocator related options are parsed here */
519 int reiserfs_parse_alloc_options(struct super_block *s, char *options)
521 char *this_char, *value;
523 REISERFS_SB(s)->s_alloc_options.bits = 0; /* clear default settings */
525 while ((this_char = strsep(&options, ":")) != NULL) {
526 if ((value = strchr(this_char, '=')) != NULL)
529 if (!strcmp(this_char, "concentrating_formatted_nodes")) {
531 SET_OPTION(concentrating_formatted_nodes);
533 && *value) ? simple_strtoul(value, &value,
535 if (temp <= 0 || temp > 100) {
536 REISERFS_SB(s)->s_alloc_options.border = 10;
538 REISERFS_SB(s)->s_alloc_options.border =
543 if (!strcmp(this_char, "displacing_large_files")) {
544 SET_OPTION(displacing_large_files);
545 REISERFS_SB(s)->s_alloc_options.large_file_size =
547 && *value) ? simple_strtoul(value, &value, 0) : 16;
550 if (!strcmp(this_char, "displacing_new_packing_localities")) {
551 SET_OPTION(displacing_new_packing_localities);
555 if (!strcmp(this_char, "old_hashed_relocation")) {
556 SET_OPTION(old_hashed_relocation);
560 if (!strcmp(this_char, "new_hashed_relocation")) {
561 SET_OPTION(new_hashed_relocation);
565 if (!strcmp(this_char, "dirid_groups")) {
566 SET_OPTION(dirid_groups);
569 if (!strcmp(this_char, "oid_groups")) {
570 SET_OPTION(oid_groups);
573 if (!strcmp(this_char, "packing_groups")) {
574 SET_OPTION(packing_groups);
577 if (!strcmp(this_char, "hashed_formatted_nodes")) {
578 SET_OPTION(hashed_formatted_nodes);
582 if (!strcmp(this_char, "skip_busy")) {
583 SET_OPTION(skip_busy);
587 if (!strcmp(this_char, "hundredth_slices")) {
588 SET_OPTION(hundredth_slices);
592 if (!strcmp(this_char, "old_way")) {
597 if (!strcmp(this_char, "displace_based_on_dirid")) {
598 SET_OPTION(displace_based_on_dirid);
602 if (!strcmp(this_char, "preallocmin")) {
603 REISERFS_SB(s)->s_alloc_options.preallocmin =
605 && *value) ? simple_strtoul(value, &value, 0) : 4;
609 if (!strcmp(this_char, "preallocsize")) {
610 REISERFS_SB(s)->s_alloc_options.preallocsize =
612 && *value) ? simple_strtoul(value, &value,
618 reiserfs_warning(s, "zam-4001: %s : unknown option - %s",
619 __FUNCTION__, this_char);
623 reiserfs_warning(s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s));
627 static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint)
630 if (hint->formatted_node) {
631 hash_in = (char *)&hint->key.k_dir_id;
634 //hint->search_start = hint->beg;
635 hash_in = (char *)&hint->key.k_dir_id;
637 if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
638 hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
641 (char *)(&INODE_PKEY(hint->inode)->k_objectid);
645 hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
649 * Relocation based on dirid, hashing them into a given bitmap block
650 * files. Formatted nodes are unaffected, a seperate policy covers them
652 static void dirid_groups(reiserfs_blocknr_hint_t * hint)
657 struct super_block *sb = hint->th->t_super;
659 dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
660 else if (hint->formatted_node)
661 dirid = hint->key.k_dir_id;
664 bm = bmap_hash_id(sb, dirid);
665 hash = bm * (sb->s_blocksize << 3);
666 /* give a portion of the block group to metadata */
668 hash += sb->s_blocksize / 2;
669 hint->search_start = hash;
674 * Relocation based on oid, hashing them into a given bitmap block
675 * files. Formatted nodes are unaffected, a seperate policy covers them
677 static void oid_groups(reiserfs_blocknr_hint_t * hint)
685 dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
687 /* keep the root dir and it's first set of subdirs close to
688 * the start of the disk
691 hash = (hint->inode->i_sb->s_blocksize << 3);
693 oid = le32_to_cpu(INODE_PKEY(hint->inode)->k_objectid);
694 bm = bmap_hash_id(hint->inode->i_sb, oid);
695 hash = bm * (hint->inode->i_sb->s_blocksize << 3);
697 hint->search_start = hash;
701 /* returns 1 if it finds an indirect item and gets valid hint info
702 * from it, otherwise 0
704 static int get_left_neighbor(reiserfs_blocknr_hint_t * hint)
707 struct buffer_head *bh;
708 struct item_head *ih;
713 if (!hint->path) /* reiserfs code can call this function w/o pointer to path
714 * structure supplied; then we rely on supplied search_start */
718 bh = get_last_bh(path);
719 RFALSE(!bh, "green-4002: Illegal path specified to get_left_neighbor");
721 pos_in_item = path->pos_in_item;
722 item = get_item(path);
724 hint->search_start = bh->b_blocknr;
726 if (!hint->formatted_node && is_indirect_le_ih(ih)) {
727 /* for indirect item: go to left and look for the first non-hole entry
728 in the indirect item */
729 if (pos_in_item == I_UNFM_NUM(ih))
731 // pos_in_item = I_UNFM_NUM (ih) - 1;
732 while (pos_in_item >= 0) {
733 int t = get_block_num(item, pos_in_item);
735 hint->search_start = t;
743 /* does result value fit into specified region? */
747 /* should be, if formatted node, then try to put on first part of the device
748 specified as number of percent with mount option device, else try to put
749 on last of device. This is not to say it is good code to do so,
750 but the effect should be measured. */
751 static inline void set_border_in_hint(struct super_block *s,
752 reiserfs_blocknr_hint_t * hint)
755 SB_BLOCK_COUNT(s) / REISERFS_SB(s)->s_alloc_options.border;
757 if (hint->formatted_node)
758 hint->end = border - 1;
763 static inline void displace_large_file(reiserfs_blocknr_hint_t * hint)
765 if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
768 keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id),
769 4) % (hint->end - hint->beg);
773 keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid),
774 4) % (hint->end - hint->beg);
777 static inline void hash_formatted_node(reiserfs_blocknr_hint_t * hint)
782 hash_in = (char *)&hint->key.k_dir_id;
783 else if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
784 hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
786 hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid);
789 hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
793 this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t *
796 return hint->block ==
797 REISERFS_SB(hint->th->t_super)->s_alloc_options.large_file_size;
800 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
801 static inline void displace_new_packing_locality(reiserfs_blocknr_hint_t * hint)
803 struct in_core_key *key = &hint->key;
805 hint->th->displace_new_blocks = 0;
807 hint->beg + keyed_hash((char *)(&key->k_objectid),
808 4) % (hint->end - hint->beg);
812 static inline int old_hashed_relocation(reiserfs_blocknr_hint_t * hint)
817 if (hint->formatted_node || hint->inode == NULL) {
821 hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id);
823 hint->beg + (u32) keyed_hash(((char *)(&hash_in)),
824 4) % (hint->end - hint->beg - 1);
825 if (border > hint->search_start)
826 hint->search_start = border;
831 static inline int old_way(reiserfs_blocknr_hint_t * hint)
835 if (hint->formatted_node || hint->inode == NULL) {
841 le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end -
843 if (border > hint->search_start)
844 hint->search_start = border;
849 static inline void hundredth_slices(reiserfs_blocknr_hint_t * hint)
851 struct in_core_key *key = &hint->key;
852 b_blocknr_t slice_start;
855 (keyed_hash((char *)(&key->k_dir_id), 4) % 100) * (hint->end / 100);
856 if (slice_start > hint->search_start
857 || slice_start + (hint->end / 100) <= hint->search_start) {
858 hint->search_start = slice_start;
862 static void determine_search_start(reiserfs_blocknr_hint_t * hint,
865 struct super_block *s = hint->th->t_super;
869 hint->end = SB_BLOCK_COUNT(s) - 1;
871 /* This is former border algorithm. Now with tunable border offset */
872 if (concentrating_formatted_nodes(s))
873 set_border_in_hint(s, hint);
875 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
876 /* whenever we create a new directory, we displace it. At first we will
877 hash for location, later we might look for a moderately empty place for
879 if (displacing_new_packing_localities(s)
880 && hint->th->displace_new_blocks) {
881 displace_new_packing_locality(hint);
883 /* we do not continue determine_search_start,
884 * if new packing locality is being displaced */
889 /* all persons should feel encouraged to add more special cases here and
892 if (displacing_large_files(s) && !hint->formatted_node
893 && this_blocknr_allocation_would_make_it_a_large_file(hint)) {
894 displace_large_file(hint);
898 /* if none of our special cases is relevant, use the left neighbor in the
899 tree order of the new node we are allocating for */
900 if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) {
901 hash_formatted_node(hint);
905 unfm_hint = get_left_neighbor(hint);
907 /* Mimic old block allocator behaviour, that is if VFS allowed for preallocation,
908 new blocks are displaced based on directory ID. Also, if suggested search_start
909 is less than last preallocated block, we start searching from it, assuming that
910 HDD dataflow is faster in forward direction */
911 if (TEST_OPTION(old_way, s)) {
912 if (!hint->formatted_node) {
913 if (!reiserfs_hashed_relocation(s))
915 else if (!reiserfs_no_unhashed_relocation(s))
916 old_hashed_relocation(hint);
919 && hint->search_start <
920 REISERFS_I(hint->inode)->i_prealloc_block)
922 REISERFS_I(hint->inode)->i_prealloc_block;
927 /* This is an approach proposed by Hans */
928 if (TEST_OPTION(hundredth_slices, s)
929 && !(displacing_large_files(s) && !hint->formatted_node)) {
930 hundredth_slices(hint);
934 /* old_hashed_relocation only works on unformatted */
935 if (!unfm_hint && !hint->formatted_node &&
936 TEST_OPTION(old_hashed_relocation, s)) {
937 old_hashed_relocation(hint);
939 /* new_hashed_relocation works with both formatted/unformatted nodes */
940 if ((!unfm_hint || hint->formatted_node) &&
941 TEST_OPTION(new_hashed_relocation, s)) {
942 new_hashed_relocation(hint);
944 /* dirid grouping works only on unformatted nodes */
945 if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
948 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
949 if (hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
954 /* oid grouping works only on unformatted nodes */
955 if (!unfm_hint && !hint->formatted_node && TEST_OPTION(oid_groups, s)) {
961 static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint)
963 /* make minimum size a mount option and benchmark both ways */
964 /* we preallocate blocks only for regular files, specific size */
965 /* benchmark preallocating always and see what happens */
967 hint->prealloc_size = 0;
969 if (!hint->formatted_node && hint->preallocate) {
970 if (S_ISREG(hint->inode->i_mode)
971 && hint->inode->i_size >=
972 REISERFS_SB(hint->th->t_super)->s_alloc_options.
973 preallocmin * hint->inode->i_sb->s_blocksize)
974 hint->prealloc_size =
975 REISERFS_SB(hint->th->t_super)->s_alloc_options.
981 /* XXX I know it could be merged with upper-level function;
982 but may be result function would be too complex. */
983 static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint,
984 b_blocknr_t * new_blocknrs,
986 b_blocknr_t finish, int min,
990 int rest = amount_needed;
993 while (rest > 0 && start <= finish) {
994 nr_allocated = scan_bitmap(hint->th, &start, finish, min,
995 rest + prealloc_size,
996 !hint->formatted_node, hint->block);
998 if (nr_allocated == 0) /* no new blocks allocated, return */
1001 /* fill free_blocknrs array first */
1002 while (rest > 0 && nr_allocated > 0) {
1003 *new_blocknrs++ = start++;
1008 /* do we have something to fill prealloc. array also ? */
1009 if (nr_allocated > 0) {
1010 /* it means prealloc_size was greater that 0 and we do preallocation */
1011 list_add(&REISERFS_I(hint->inode)->i_prealloc_list,
1012 &SB_JOURNAL(hint->th->t_super)->
1014 REISERFS_I(hint->inode)->i_prealloc_block = start;
1015 REISERFS_I(hint->inode)->i_prealloc_count =
1021 return (amount_needed - rest);
1024 static inline int blocknrs_and_prealloc_arrays_from_search_start
1025 (reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs,
1026 int amount_needed) {
1027 struct super_block *s = hint->th->t_super;
1028 b_blocknr_t start = hint->search_start;
1029 b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1;
1031 int nr_allocated = 0;
1034 determine_prealloc_size(hint);
1035 if (!hint->formatted_node) {
1037 #ifdef REISERQUOTA_DEBUG
1038 reiserfs_debug(s, REISERFS_DEBUG_CODE,
1039 "reiserquota: allocating %d blocks id=%u",
1040 amount_needed, hint->inode->i_uid);
1043 DQUOT_ALLOC_BLOCK_NODIRTY(hint->inode, amount_needed);
1044 if (quota_ret) /* Quota exceeded? */
1045 return QUOTA_EXCEEDED;
1046 if (hint->preallocate && hint->prealloc_size) {
1047 #ifdef REISERQUOTA_DEBUG
1048 reiserfs_debug(s, REISERFS_DEBUG_CODE,
1049 "reiserquota: allocating (prealloc) %d blocks id=%u",
1050 hint->prealloc_size, hint->inode->i_uid);
1053 DQUOT_PREALLOC_BLOCK_NODIRTY(hint->inode,
1054 hint->prealloc_size);
1056 hint->preallocate = hint->prealloc_size = 0;
1058 /* for unformatted nodes, force large allocations */
1059 bigalloc = amount_needed;
1063 /* in bigalloc mode, nr_allocated should stay zero until
1064 * the entire allocation is filled
1066 if (unlikely(bigalloc && nr_allocated)) {
1067 reiserfs_warning(s, "bigalloc is %d, nr_allocated %d\n",
1068 bigalloc, nr_allocated);
1069 /* reset things to a sane value */
1070 bigalloc = amount_needed - nr_allocated;
1073 * try pass 0 and pass 1 looking for a nice big
1074 * contiguous allocation. Then reset and look
1075 * for anything you can find.
1077 if (passno == 2 && bigalloc) {
1082 case 0: /* Search from hint->search_start to end of disk */
1083 start = hint->search_start;
1084 finish = SB_BLOCK_COUNT(s) - 1;
1086 case 1: /* Search from hint->beg to hint->search_start */
1088 finish = hint->search_start;
1090 case 2: /* Last chance: Search from 0 to hint->beg */
1094 default: /* We've tried searching everywhere, not enough space */
1095 /* Free the blocks */
1096 if (!hint->formatted_node) {
1097 #ifdef REISERQUOTA_DEBUG
1098 reiserfs_debug(s, REISERFS_DEBUG_CODE,
1099 "reiserquota: freeing (nospace) %d blocks id=%u",
1101 hint->prealloc_size -
1103 hint->inode->i_uid);
1105 DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + hint->prealloc_size - nr_allocated); /* Free not allocated blocks */
1107 while (nr_allocated--)
1108 reiserfs_free_block(hint->th, hint->inode,
1109 new_blocknrs[nr_allocated],
1110 !hint->formatted_node);
1112 return NO_DISK_SPACE;
1114 } while ((nr_allocated += allocate_without_wrapping_disk(hint,
1125 if (!hint->formatted_node &&
1126 amount_needed + hint->prealloc_size >
1127 nr_allocated + REISERFS_I(hint->inode)->i_prealloc_count) {
1128 /* Some of preallocation blocks were not allocated */
1129 #ifdef REISERQUOTA_DEBUG
1130 reiserfs_debug(s, REISERFS_DEBUG_CODE,
1131 "reiserquota: freeing (failed prealloc) %d blocks id=%u",
1132 amount_needed + hint->prealloc_size -
1134 REISERFS_I(hint->inode)->i_prealloc_count,
1135 hint->inode->i_uid);
1137 DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed +
1138 hint->prealloc_size - nr_allocated -
1139 REISERFS_I(hint->inode)->
1146 /* grab new blocknrs from preallocated list */
1147 /* return amount still needed after using them */
1148 static int use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint,
1149 b_blocknr_t * new_blocknrs,
1152 struct inode *inode = hint->inode;
1154 if (REISERFS_I(inode)->i_prealloc_count > 0) {
1155 while (amount_needed) {
1157 *new_blocknrs++ = REISERFS_I(inode)->i_prealloc_block++;
1158 REISERFS_I(inode)->i_prealloc_count--;
1162 if (REISERFS_I(inode)->i_prealloc_count <= 0) {
1163 list_del(&REISERFS_I(inode)->i_prealloc_list);
1168 /* return amount still needed after using preallocated blocks */
1169 return amount_needed;
1172 int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs, int amount_needed, int reserved_by_us /* Amount of blocks we have
1173 already reserved */ )
1175 int initial_amount_needed = amount_needed;
1177 struct super_block *s = hint->th->t_super;
1179 /* Check if there is enough space, taking into account reserved space */
1180 if (SB_FREE_BLOCKS(s) - REISERFS_SB(s)->reserved_blocks <
1181 amount_needed - reserved_by_us)
1182 return NO_DISK_SPACE;
1183 /* should this be if !hint->inode && hint->preallocate? */
1184 /* do you mean hint->formatted_node can be removed ? - Zam */
1185 /* hint->formatted_node cannot be removed because we try to access
1186 inode information here, and there is often no inode assotiated with
1187 metadata allocations - green */
1189 if (!hint->formatted_node && hint->preallocate) {
1190 amount_needed = use_preallocated_list_if_available
1191 (hint, new_blocknrs, amount_needed);
1192 if (amount_needed == 0) /* all blocknrs we need we got from
1195 new_blocknrs += (initial_amount_needed - amount_needed);
1198 /* find search start and save it in hint structure */
1199 determine_search_start(hint, amount_needed);
1200 if (hint->search_start >= SB_BLOCK_COUNT(s))
1201 hint->search_start = SB_BLOCK_COUNT(s) - 1;
1203 /* allocation itself; fill new_blocknrs and preallocation arrays */
1204 ret = blocknrs_and_prealloc_arrays_from_search_start
1205 (hint, new_blocknrs, amount_needed);
1207 /* we used prealloc. list to fill (partially) new_blocknrs array. If final allocation fails we
1208 * need to return blocks back to prealloc. list or just free them. -- Zam (I chose second
1211 if (ret != CARRY_ON) {
1212 while (amount_needed++ < initial_amount_needed) {
1213 reiserfs_free_block(hint->th, hint->inode,
1214 *(--new_blocknrs), 1);
1220 /* These 2 functions are here to provide blocks reservation to the rest of kernel */
1221 /* Reserve @blocks amount of blocks in fs pointed by @sb. Caller must make sure
1222 there are actually this much blocks on the FS available */
1223 void reiserfs_claim_blocks_to_be_allocated(struct super_block *sb, /* super block of
1227 int blocks /* How much to reserve */
1231 /* Fast case, if reservation is zero - exit immediately. */
1235 spin_lock(&REISERFS_SB(sb)->bitmap_lock);
1236 REISERFS_SB(sb)->reserved_blocks += blocks;
1237 spin_unlock(&REISERFS_SB(sb)->bitmap_lock);
1240 /* Unreserve @blocks amount of blocks in fs pointed by @sb */
1241 void reiserfs_release_claimed_blocks(struct super_block *sb, /* super block of
1245 int blocks /* How much to unreserve */
1249 /* Fast case, if unreservation is zero - exit immediately. */
1253 spin_lock(&REISERFS_SB(sb)->bitmap_lock);
1254 REISERFS_SB(sb)->reserved_blocks -= blocks;
1255 spin_unlock(&REISERFS_SB(sb)->bitmap_lock);
1256 RFALSE(REISERFS_SB(sb)->reserved_blocks < 0,
1257 "amount of blocks reserved became zero?");
1260 /* This function estimates how much pages we will be able to write to FS
1261 used for reiserfs_file_write() purposes for now. */
1262 int reiserfs_can_fit_pages(struct super_block *sb /* superblock of filesystem
1263 to estimate space */ )
1267 spin_lock(&REISERFS_SB(sb)->bitmap_lock);
1269 (SB_FREE_BLOCKS(sb) -
1270 REISERFS_SB(sb)->reserved_blocks) >> (PAGE_CACHE_SHIFT -
1271 sb->s_blocksize_bits);
1272 spin_unlock(&REISERFS_SB(sb)->bitmap_lock);
1274 return space > 0 ? space : 0;