Btrfs: Fix split_leaf to avoid incorrect double splits
[pandora-kernel.git] / fs / btrfs / ctree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include "ctree.h"
21 #include "disk-io.h"
22 #include "transaction.h"
23 #include "print-tree.h"
24
25 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
26                       *root, struct btrfs_path *path, int level);
27 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
28                       *root, struct btrfs_key *ins_key,
29                       struct btrfs_path *path, int data_size);
30 static int push_node_left(struct btrfs_trans_handle *trans,
31                           struct btrfs_root *root, struct extent_buffer *dst,
32                           struct extent_buffer *src);
33 static int balance_node_right(struct btrfs_trans_handle *trans,
34                               struct btrfs_root *root,
35                               struct extent_buffer *dst_buf,
36                               struct extent_buffer *src_buf);
37 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
38                    struct btrfs_path *path, int level, int slot);
39
40 inline void btrfs_init_path(struct btrfs_path *p)
41 {
42         memset(p, 0, sizeof(*p));
43 }
44
45 struct btrfs_path *btrfs_alloc_path(void)
46 {
47         struct btrfs_path *path;
48         path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
49         if (path) {
50                 btrfs_init_path(path);
51                 path->reada = 1;
52         }
53         return path;
54 }
55
56 void btrfs_free_path(struct btrfs_path *p)
57 {
58         btrfs_release_path(NULL, p);
59         kmem_cache_free(btrfs_path_cachep, p);
60 }
61
62 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
63 {
64         int i;
65         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
66                 if (!p->nodes[i])
67                         break;
68                 free_extent_buffer(p->nodes[i]);
69         }
70         memset(p, 0, sizeof(*p));
71 }
72
73 static int __btrfs_cow_block(struct btrfs_trans_handle *trans,
74                              struct btrfs_root *root,
75                              struct extent_buffer *buf,
76                              struct extent_buffer *parent, int parent_slot,
77                              struct extent_buffer **cow_ret,
78                              u64 search_start, u64 empty_size)
79 {
80         struct extent_buffer *cow;
81         int ret = 0;
82         int different_trans = 0;
83
84         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
85
86         cow = btrfs_alloc_free_block(trans, root, buf->len,
87                                      search_start, empty_size);
88         if (IS_ERR(cow))
89                 return PTR_ERR(cow);
90
91         copy_extent_buffer(cow, buf, 0, 0, cow->len);
92         btrfs_set_header_bytenr(cow, cow->start);
93         btrfs_set_header_generation(cow, trans->transid);
94         btrfs_set_header_owner(cow, root->root_key.objectid);
95
96         WARN_ON(btrfs_header_generation(buf) > trans->transid);
97         if (btrfs_header_generation(buf) != trans->transid) {
98                 different_trans = 1;
99                 ret = btrfs_inc_ref(trans, root, buf);
100                 if (ret)
101                         return ret;
102         } else {
103                 clean_tree_block(trans, root, buf);
104         }
105
106         if (buf == root->node) {
107                 root->node = cow;
108                 extent_buffer_get(cow);
109                 if (buf != root->commit_root) {
110                         btrfs_free_extent(trans, root, buf->start,
111                                           buf->len, 1);
112                 }
113                 free_extent_buffer(buf);
114         } else {
115                 btrfs_set_node_blockptr(parent, parent_slot,
116                                         cow->start);
117                 btrfs_mark_buffer_dirty(parent);
118                 WARN_ON(btrfs_header_generation(parent) != trans->transid);
119                 btrfs_free_extent(trans, root, buf->start, buf->len, 1);
120         }
121         free_extent_buffer(buf);
122         btrfs_mark_buffer_dirty(cow);
123         *cow_ret = cow;
124         return 0;
125 }
126
127 int btrfs_cow_block(struct btrfs_trans_handle *trans,
128                     struct btrfs_root *root, struct extent_buffer *buf,
129                     struct extent_buffer *parent, int parent_slot,
130                     struct extent_buffer **cow_ret)
131 {
132         u64 search_start;
133         int ret;
134         if (trans->transaction != root->fs_info->running_transaction) {
135                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
136                        root->fs_info->running_transaction->transid);
137                 WARN_ON(1);
138         }
139         if (trans->transid != root->fs_info->generation) {
140                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
141                        root->fs_info->generation);
142                 WARN_ON(1);
143         }
144         if (btrfs_header_generation(buf) == trans->transid) {
145                 *cow_ret = buf;
146                 return 0;
147         }
148
149         search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1);
150         ret = __btrfs_cow_block(trans, root, buf, parent,
151                                  parent_slot, cow_ret, search_start, 0);
152         return ret;
153 }
154
155 static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
156 {
157         if (blocknr < other && other - (blocknr + blocksize) < 32768)
158                 return 1;
159         if (blocknr > other && blocknr - (other + blocksize) < 32768)
160                 return 1;
161         return 0;
162 }
163
164 static int should_defrag_leaf(struct extent_buffer *leaf)
165 {
166         struct btrfs_key key;
167         u32 nritems;
168
169         if (btrfs_buffer_defrag(leaf))
170                 return 1;
171
172         nritems = btrfs_header_nritems(leaf);
173         if (nritems == 0)
174                 return 0;
175
176         btrfs_item_key_to_cpu(leaf, &key, 0);
177         if (key.type == BTRFS_DIR_ITEM_KEY)
178                 return 1;
179
180
181         btrfs_item_key_to_cpu(leaf, &key, nritems - 1);
182         if (key.type == BTRFS_DIR_ITEM_KEY)
183                 return 1;
184         if (nritems > 4) {
185                 btrfs_item_key_to_cpu(leaf, &key, nritems / 2);
186                 if (key.type == BTRFS_DIR_ITEM_KEY)
187                         return 1;
188         }
189         return 0;
190 }
191
192 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
193                        struct btrfs_root *root, struct extent_buffer *parent,
194                        int start_slot, int cache_only, u64 *last_ret,
195                        struct btrfs_key *progress)
196 {
197         struct extent_buffer *cur;
198         struct extent_buffer *tmp;
199         u64 blocknr;
200         u64 search_start = *last_ret;
201         u64 last_block = 0;
202         u64 other;
203         u32 parent_nritems;
204         int end_slot;
205         int i;
206         int err = 0;
207         int parent_level;
208         int uptodate;
209         u32 blocksize;
210
211         if (trans->transaction != root->fs_info->running_transaction) {
212                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
213                        root->fs_info->running_transaction->transid);
214                 WARN_ON(1);
215         }
216         if (trans->transid != root->fs_info->generation) {
217                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
218                        root->fs_info->generation);
219                 WARN_ON(1);
220         }
221         parent_level = btrfs_header_level(parent);
222
223         parent_nritems = btrfs_header_nritems(parent);
224         blocksize = btrfs_level_size(root, parent_level - 1);
225         end_slot = parent_nritems;
226
227         if (parent_nritems == 1)
228                 return 0;
229
230         if (root != root->fs_info->extent_root) {
231                 struct btrfs_key first_key;
232                 struct btrfs_key last_key;
233
234                 btrfs_node_key_to_cpu(parent, &first_key, 0);
235                 btrfs_node_key_to_cpu(parent, &last_key, parent_nritems - 1);
236                 if (first_key.objectid != last_key.objectid)
237                         return 0;
238         }
239
240         for (i = start_slot; i < end_slot; i++) {
241                 int close = 1;
242
243                 blocknr = btrfs_node_blockptr(parent, i);
244                 if (last_block == 0)
245                         last_block = blocknr;
246                 if (i > 0) {
247                         other = btrfs_node_blockptr(parent, i - 1);
248                         close = close_blocks(blocknr, other, blocksize);
249                 }
250                 if (close && i < end_slot - 1) {
251                         other = btrfs_node_blockptr(parent, i + 1);
252                         close = close_blocks(blocknr, other, blocksize);
253                 }
254                 if (close) {
255                         last_block = blocknr;
256                         continue;
257                 }
258
259                 cur = btrfs_find_tree_block(root, blocknr, blocksize);
260                 if (cur)
261                         uptodate = btrfs_buffer_uptodate(cur);
262                 else
263                         uptodate = 0;
264                 if (!cur || !uptodate ||
265                     (parent_level != 1 && !btrfs_buffer_defrag(cur)) ||
266                     (parent_level == 1 && !should_defrag_leaf(cur))) {
267                         if (cache_only) {
268                                 free_extent_buffer(cur);
269                                 continue;
270                         }
271                         if (!cur) {
272                                 cur = read_tree_block(root, blocknr,
273                                                          blocksize);
274                         } else if (!uptodate) {
275                                 btrfs_read_buffer(cur);
276                         }
277                 }
278                 if (search_start == 0)
279                         search_start = last_block;
280
281                 err = __btrfs_cow_block(trans, root, cur, parent, i,
282                                         &tmp, search_start,
283                                         min(16 * blocksize,
284                                             (end_slot - i) * blocksize));
285                 if (err) {
286                         free_extent_buffer(cur);
287                         break;
288                 }
289                 search_start = tmp->start;
290                 *last_ret = search_start;
291                 if (parent_level == 1)
292                         btrfs_clear_buffer_defrag(tmp);
293                 free_extent_buffer(tmp);
294         }
295         return err;
296 }
297
298 /*
299  * The leaf data grows from end-to-front in the node.
300  * this returns the address of the start of the last item,
301  * which is the stop of the leaf data stack
302  */
303 static inline unsigned int leaf_data_end(struct btrfs_root *root,
304                                          struct extent_buffer *leaf)
305 {
306         u32 nr = btrfs_header_nritems(leaf);
307         if (nr == 0)
308                 return BTRFS_LEAF_DATA_SIZE(root);
309         return btrfs_item_offset_nr(leaf, nr - 1);
310 }
311
312 /*
313  * compare two keys in a memcmp fashion
314  */
315 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
316 {
317         struct btrfs_key k1;
318
319         btrfs_disk_key_to_cpu(&k1, disk);
320
321         if (k1.objectid > k2->objectid)
322                 return 1;
323         if (k1.objectid < k2->objectid)
324                 return -1;
325         if (k1.type > k2->type)
326                 return 1;
327         if (k1.type < k2->type)
328                 return -1;
329         if (k1.offset > k2->offset)
330                 return 1;
331         if (k1.offset < k2->offset)
332                 return -1;
333         return 0;
334 }
335
336 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
337                       int level)
338 {
339         struct extent_buffer *parent = NULL;
340         struct extent_buffer *node = path->nodes[level];
341         struct btrfs_disk_key parent_key;
342         struct btrfs_disk_key node_key;
343         int parent_slot;
344         int slot;
345         struct btrfs_key cpukey;
346         u32 nritems = btrfs_header_nritems(node);
347
348         if (path->nodes[level + 1])
349                 parent = path->nodes[level + 1];
350
351         slot = path->slots[level];
352         BUG_ON(nritems == 0);
353         if (parent) {
354                 parent_slot = path->slots[level + 1];
355                 btrfs_node_key(parent, &parent_key, parent_slot);
356                 btrfs_node_key(node, &node_key, 0);
357                 BUG_ON(memcmp(&parent_key, &node_key,
358                               sizeof(struct btrfs_disk_key)));
359                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
360                        btrfs_header_bytenr(node));
361         }
362         BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
363         if (slot != 0) {
364                 btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
365                 btrfs_node_key(node, &node_key, slot);
366                 BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
367         }
368         if (slot < nritems - 1) {
369                 btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
370                 btrfs_node_key(node, &node_key, slot);
371                 BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
372         }
373         return 0;
374 }
375
376 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
377                       int level)
378 {
379         struct extent_buffer *leaf = path->nodes[level];
380         struct extent_buffer *parent = NULL;
381         int parent_slot;
382         struct btrfs_key cpukey;
383         struct btrfs_disk_key parent_key;
384         struct btrfs_disk_key leaf_key;
385         int slot = path->slots[0];
386
387         u32 nritems = btrfs_header_nritems(leaf);
388
389         if (path->nodes[level + 1])
390                 parent = path->nodes[level + 1];
391
392         if (nritems == 0)
393                 return 0;
394
395         if (parent) {
396                 parent_slot = path->slots[level + 1];
397                 btrfs_node_key(parent, &parent_key, parent_slot);
398                 btrfs_item_key(leaf, &leaf_key, 0);
399
400                 BUG_ON(memcmp(&parent_key, &leaf_key,
401                        sizeof(struct btrfs_disk_key)));
402                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
403                        btrfs_header_bytenr(leaf));
404         }
405 #if 0
406         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
407                 btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
408                 btrfs_item_key(leaf, &leaf_key, i);
409                 if (comp_keys(&leaf_key, &cpukey) >= 0) {
410                         btrfs_print_leaf(root, leaf);
411                         printk("slot %d offset bad key\n", i);
412                         BUG_ON(1);
413                 }
414                 if (btrfs_item_offset_nr(leaf, i) !=
415                         btrfs_item_end_nr(leaf, i + 1)) {
416                         btrfs_print_leaf(root, leaf);
417                         printk("slot %d offset bad\n", i);
418                         BUG_ON(1);
419                 }
420                 if (i == 0) {
421                         if (btrfs_item_offset_nr(leaf, i) +
422                                btrfs_item_size_nr(leaf, i) !=
423                                BTRFS_LEAF_DATA_SIZE(root)) {
424                                 btrfs_print_leaf(root, leaf);
425                                 printk("slot %d first offset bad\n", i);
426                                 BUG_ON(1);
427                         }
428                 }
429         }
430         if (nritems > 0) {
431                 if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
432                                 btrfs_print_leaf(root, leaf);
433                                 printk("slot %d bad size \n", nritems - 1);
434                                 BUG_ON(1);
435                 }
436         }
437 #endif
438         if (slot != 0 && slot < nritems - 1) {
439                 btrfs_item_key(leaf, &leaf_key, slot);
440                 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
441                 if (comp_keys(&leaf_key, &cpukey) <= 0) {
442                         btrfs_print_leaf(root, leaf);
443                         printk("slot %d offset bad key\n", slot);
444                         BUG_ON(1);
445                 }
446                 if (btrfs_item_offset_nr(leaf, slot - 1) !=
447                        btrfs_item_end_nr(leaf, slot)) {
448                         btrfs_print_leaf(root, leaf);
449                         printk("slot %d offset bad\n", slot);
450                         BUG_ON(1);
451                 }
452         }
453         if (slot < nritems - 1) {
454                 btrfs_item_key(leaf, &leaf_key, slot);
455                 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
456                 BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
457                 if (btrfs_item_offset_nr(leaf, slot) !=
458                         btrfs_item_end_nr(leaf, slot + 1)) {
459                         btrfs_print_leaf(root, leaf);
460                         printk("slot %d offset bad\n", slot);
461                         BUG_ON(1);
462                 }
463         }
464         BUG_ON(btrfs_item_offset_nr(leaf, 0) +
465                btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
466         return 0;
467 }
468
469 static int check_block(struct btrfs_root *root, struct btrfs_path *path,
470                         int level)
471 {
472         return 0;
473 #if 0
474         struct extent_buffer *buf = path->nodes[level];
475
476         if (memcmp_extent_buffer(buf, root->fs_info->fsid,
477                                  (unsigned long)btrfs_header_fsid(buf),
478                                  BTRFS_FSID_SIZE)) {
479                 printk("warning bad block %Lu\n", buf->start);
480                 return 1;
481         }
482 #endif
483         if (level == 0)
484                 return check_leaf(root, path, level);
485         return check_node(root, path, level);
486 }
487
488 /*
489  * search for key in the extent_buffer.  The items start at offset p,
490  * and they are item_size apart.  There are 'max' items in p.
491  *
492  * the slot in the array is returned via slot, and it points to
493  * the place where you would insert key if it is not found in
494  * the array.
495  *
496  * slot may point to max if the key is bigger than all of the keys
497  */
498 static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
499                               int item_size, struct btrfs_key *key,
500                               int max, int *slot)
501 {
502         int low = 0;
503         int high = max;
504         int mid;
505         int ret;
506         struct btrfs_disk_key *tmp = NULL;
507         struct btrfs_disk_key unaligned;
508         unsigned long offset;
509         char *map_token = NULL;
510         char *kaddr = NULL;
511         unsigned long map_start = 0;
512         unsigned long map_len = 0;
513         int err;
514
515         while(low < high) {
516                 mid = (low + high) / 2;
517                 offset = p + mid * item_size;
518
519                 if (!map_token || offset < map_start ||
520                     (offset + sizeof(struct btrfs_disk_key)) >
521                     map_start + map_len) {
522                         if (map_token) {
523                                 unmap_extent_buffer(eb, map_token, KM_USER0);
524                                 map_token = NULL;
525                         }
526                         err = map_extent_buffer(eb, offset,
527                                                 sizeof(struct btrfs_disk_key),
528                                                 &map_token, &kaddr,
529                                                 &map_start, &map_len, KM_USER0);
530
531                         if (!err) {
532                                 tmp = (struct btrfs_disk_key *)(kaddr + offset -
533                                                         map_start);
534                         } else {
535                                 read_extent_buffer(eb, &unaligned,
536                                                    offset, sizeof(unaligned));
537                                 tmp = &unaligned;
538                         }
539
540                 } else {
541                         tmp = (struct btrfs_disk_key *)(kaddr + offset -
542                                                         map_start);
543                 }
544                 ret = comp_keys(tmp, key);
545
546                 if (ret < 0)
547                         low = mid + 1;
548                 else if (ret > 0)
549                         high = mid;
550                 else {
551                         *slot = mid;
552                         if (map_token)
553                                 unmap_extent_buffer(eb, map_token, KM_USER0);
554                         return 0;
555                 }
556         }
557         *slot = low;
558         if (map_token)
559                 unmap_extent_buffer(eb, map_token, KM_USER0);
560         return 1;
561 }
562
563 /*
564  * simple bin_search frontend that does the right thing for
565  * leaves vs nodes
566  */
567 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
568                       int level, int *slot)
569 {
570         if (level == 0) {
571                 return generic_bin_search(eb,
572                                           offsetof(struct btrfs_leaf, items),
573                                           sizeof(struct btrfs_item),
574                                           key, btrfs_header_nritems(eb),
575                                           slot);
576         } else {
577                 return generic_bin_search(eb,
578                                           offsetof(struct btrfs_node, ptrs),
579                                           sizeof(struct btrfs_key_ptr),
580                                           key, btrfs_header_nritems(eb),
581                                           slot);
582         }
583         return -1;
584 }
585
586 static struct extent_buffer *read_node_slot(struct btrfs_root *root,
587                                    struct extent_buffer *parent, int slot)
588 {
589         if (slot < 0)
590                 return NULL;
591         if (slot >= btrfs_header_nritems(parent))
592                 return NULL;
593         return read_tree_block(root, btrfs_node_blockptr(parent, slot),
594                        btrfs_level_size(root, btrfs_header_level(parent) - 1));
595 }
596
597 static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root
598                          *root, struct btrfs_path *path, int level)
599 {
600         struct extent_buffer *right = NULL;
601         struct extent_buffer *mid;
602         struct extent_buffer *left = NULL;
603         struct extent_buffer *parent = NULL;
604         int ret = 0;
605         int wret;
606         int pslot;
607         int orig_slot = path->slots[level];
608         int err_on_enospc = 0;
609         u64 orig_ptr;
610
611         if (level == 0)
612                 return 0;
613
614         mid = path->nodes[level];
615         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
616
617         if (level < BTRFS_MAX_LEVEL - 1)
618                 parent = path->nodes[level + 1];
619         pslot = path->slots[level + 1];
620
621         /*
622          * deal with the case where there is only one pointer in the root
623          * by promoting the node below to a root
624          */
625         if (!parent) {
626                 struct extent_buffer *child;
627
628                 if (btrfs_header_nritems(mid) != 1)
629                         return 0;
630
631                 /* promote the child to a root */
632                 child = read_node_slot(root, mid, 0);
633                 BUG_ON(!child);
634                 root->node = child;
635                 path->nodes[level] = NULL;
636                 clean_tree_block(trans, root, mid);
637                 wait_on_tree_block_writeback(root, mid);
638                 /* once for the path */
639                 free_extent_buffer(mid);
640                 ret = btrfs_free_extent(trans, root, mid->start, mid->len, 1);
641                 /* once for the root ptr */
642                 free_extent_buffer(mid);
643                 return ret;
644         }
645         if (btrfs_header_nritems(mid) >
646             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
647                 return 0;
648
649         if (btrfs_header_nritems(mid) < 2)
650                 err_on_enospc = 1;
651
652         left = read_node_slot(root, parent, pslot - 1);
653         if (left) {
654                 wret = btrfs_cow_block(trans, root, left,
655                                        parent, pslot - 1, &left);
656                 if (wret) {
657                         ret = wret;
658                         goto enospc;
659                 }
660         }
661         right = read_node_slot(root, parent, pslot + 1);
662         if (right) {
663                 wret = btrfs_cow_block(trans, root, right,
664                                        parent, pslot + 1, &right);
665                 if (wret) {
666                         ret = wret;
667                         goto enospc;
668                 }
669         }
670
671         /* first, try to make some room in the middle buffer */
672         if (left) {
673                 orig_slot += btrfs_header_nritems(left);
674                 wret = push_node_left(trans, root, left, mid);
675                 if (wret < 0)
676                         ret = wret;
677                 if (btrfs_header_nritems(mid) < 2)
678                         err_on_enospc = 1;
679         }
680
681         /*
682          * then try to empty the right most buffer into the middle
683          */
684         if (right) {
685                 wret = push_node_left(trans, root, mid, right);
686                 if (wret < 0 && wret != -ENOSPC)
687                         ret = wret;
688                 if (btrfs_header_nritems(right) == 0) {
689                         u64 bytenr = right->start;
690                         u32 blocksize = right->len;
691
692                         clean_tree_block(trans, root, right);
693                         wait_on_tree_block_writeback(root, right);
694                         free_extent_buffer(right);
695                         right = NULL;
696                         wret = del_ptr(trans, root, path, level + 1, pslot +
697                                        1);
698                         if (wret)
699                                 ret = wret;
700                         wret = btrfs_free_extent(trans, root, bytenr,
701                                                  blocksize, 1);
702                         if (wret)
703                                 ret = wret;
704                 } else {
705                         struct btrfs_disk_key right_key;
706                         btrfs_node_key(right, &right_key, 0);
707                         btrfs_set_node_key(parent, &right_key, pslot + 1);
708                         btrfs_mark_buffer_dirty(parent);
709                 }
710         }
711         if (btrfs_header_nritems(mid) == 1) {
712                 /*
713                  * we're not allowed to leave a node with one item in the
714                  * tree during a delete.  A deletion from lower in the tree
715                  * could try to delete the only pointer in this node.
716                  * So, pull some keys from the left.
717                  * There has to be a left pointer at this point because
718                  * otherwise we would have pulled some pointers from the
719                  * right
720                  */
721                 BUG_ON(!left);
722                 wret = balance_node_right(trans, root, mid, left);
723                 if (wret < 0) {
724                         ret = wret;
725                         goto enospc;
726                 }
727                 BUG_ON(wret == 1);
728         }
729         if (btrfs_header_nritems(mid) == 0) {
730                 /* we've managed to empty the middle node, drop it */
731                 u64 bytenr = mid->start;
732                 u32 blocksize = mid->len;
733                 clean_tree_block(trans, root, mid);
734                 wait_on_tree_block_writeback(root, mid);
735                 free_extent_buffer(mid);
736                 mid = NULL;
737                 wret = del_ptr(trans, root, path, level + 1, pslot);
738                 if (wret)
739                         ret = wret;
740                 wret = btrfs_free_extent(trans, root, bytenr, blocksize, 1);
741                 if (wret)
742                         ret = wret;
743         } else {
744                 /* update the parent key to reflect our changes */
745                 struct btrfs_disk_key mid_key;
746                 btrfs_node_key(mid, &mid_key, 0);
747                 btrfs_set_node_key(parent, &mid_key, pslot);
748                 btrfs_mark_buffer_dirty(parent);
749         }
750
751         /* update the path */
752         if (left) {
753                 if (btrfs_header_nritems(left) > orig_slot) {
754                         extent_buffer_get(left);
755                         path->nodes[level] = left;
756                         path->slots[level + 1] -= 1;
757                         path->slots[level] = orig_slot;
758                         if (mid)
759                                 free_extent_buffer(mid);
760                 } else {
761                         orig_slot -= btrfs_header_nritems(left);
762                         path->slots[level] = orig_slot;
763                 }
764         }
765         /* double check we haven't messed things up */
766         check_block(root, path, level);
767         if (orig_ptr !=
768             btrfs_node_blockptr(path->nodes[level], path->slots[level]))
769                 BUG();
770 enospc:
771         if (right)
772                 free_extent_buffer(right);
773         if (left)
774                 free_extent_buffer(left);
775         return ret;
776 }
777
778 /* returns zero if the push worked, non-zero otherwise */
779 static int push_nodes_for_insert(struct btrfs_trans_handle *trans,
780                                 struct btrfs_root *root,
781                                 struct btrfs_path *path, int level)
782 {
783         struct extent_buffer *right = NULL;
784         struct extent_buffer *mid;
785         struct extent_buffer *left = NULL;
786         struct extent_buffer *parent = NULL;
787         int ret = 0;
788         int wret;
789         int pslot;
790         int orig_slot = path->slots[level];
791         u64 orig_ptr;
792
793         if (level == 0)
794                 return 1;
795
796         mid = path->nodes[level];
797         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
798
799         if (level < BTRFS_MAX_LEVEL - 1)
800                 parent = path->nodes[level + 1];
801         pslot = path->slots[level + 1];
802
803         if (!parent)
804                 return 1;
805
806         left = read_node_slot(root, parent, pslot - 1);
807
808         /* first, try to make some room in the middle buffer */
809         if (left) {
810                 u32 left_nr;
811                 left_nr = btrfs_header_nritems(left);
812                 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
813                         wret = 1;
814                 } else {
815                         ret = btrfs_cow_block(trans, root, left, parent,
816                                               pslot - 1, &left);
817                         if (ret)
818                                 wret = 1;
819                         else {
820                                 wret = push_node_left(trans, root,
821                                                       left, mid);
822                         }
823                 }
824                 if (wret < 0)
825                         ret = wret;
826                 if (wret == 0) {
827                         struct btrfs_disk_key disk_key;
828                         orig_slot += left_nr;
829                         btrfs_node_key(mid, &disk_key, 0);
830                         btrfs_set_node_key(parent, &disk_key, pslot);
831                         btrfs_mark_buffer_dirty(parent);
832                         if (btrfs_header_nritems(left) > orig_slot) {
833                                 path->nodes[level] = left;
834                                 path->slots[level + 1] -= 1;
835                                 path->slots[level] = orig_slot;
836                                 free_extent_buffer(mid);
837                         } else {
838                                 orig_slot -=
839                                         btrfs_header_nritems(left);
840                                 path->slots[level] = orig_slot;
841                                 free_extent_buffer(left);
842                         }
843                         return 0;
844                 }
845                 free_extent_buffer(left);
846         }
847         right= read_node_slot(root, parent, pslot + 1);
848
849         /*
850          * then try to empty the right most buffer into the middle
851          */
852         if (right) {
853                 u32 right_nr;
854                 right_nr = btrfs_header_nritems(right);
855                 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
856                         wret = 1;
857                 } else {
858                         ret = btrfs_cow_block(trans, root, right,
859                                               parent, pslot + 1,
860                                               &right);
861                         if (ret)
862                                 wret = 1;
863                         else {
864                                 wret = balance_node_right(trans, root,
865                                                           right, mid);
866                         }
867                 }
868                 if (wret < 0)
869                         ret = wret;
870                 if (wret == 0) {
871                         struct btrfs_disk_key disk_key;
872
873                         btrfs_node_key(right, &disk_key, 0);
874                         btrfs_set_node_key(parent, &disk_key, pslot + 1);
875                         btrfs_mark_buffer_dirty(parent);
876
877                         if (btrfs_header_nritems(mid) <= orig_slot) {
878                                 path->nodes[level] = right;
879                                 path->slots[level + 1] += 1;
880                                 path->slots[level] = orig_slot -
881                                         btrfs_header_nritems(mid);
882                                 free_extent_buffer(mid);
883                         } else {
884                                 free_extent_buffer(right);
885                         }
886                         return 0;
887                 }
888                 free_extent_buffer(right);
889         }
890         return 1;
891 }
892
893 /*
894  * readahead one full node of leaves
895  */
896 static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
897                              int level, int slot)
898 {
899         struct extent_buffer *node;
900         u32 nritems;
901         u64 search;
902         u64 lowest_read;
903         u64 highest_read;
904         u64 nread = 0;
905         int direction = path->reada;
906         struct extent_buffer *eb;
907         u32 nr;
908         u32 blocksize;
909         u32 nscan = 0;
910
911         if (level != 1)
912                 return;
913
914         if (!path->nodes[level])
915                 return;
916
917         node = path->nodes[level];
918         search = btrfs_node_blockptr(node, slot);
919         blocksize = btrfs_level_size(root, level - 1);
920         eb = btrfs_find_tree_block(root, search, blocksize);
921         if (eb) {
922                 free_extent_buffer(eb);
923                 return;
924         }
925
926         highest_read = search;
927         lowest_read = search;
928
929         nritems = btrfs_header_nritems(node);
930         nr = slot;
931         while(1) {
932                 if (direction < 0) {
933                         if (nr == 0)
934                                 break;
935                         nr--;
936                 } else if (direction > 0) {
937                         nr++;
938                         if (nr >= nritems)
939                                 break;
940                 }
941                 search = btrfs_node_blockptr(node, nr);
942                 if ((search >= lowest_read && search <= highest_read) ||
943                     (search < lowest_read && lowest_read - search <= 32768) ||
944                     (search > highest_read && search - highest_read <= 32768)) {
945                         readahead_tree_block(root, search, blocksize);
946                         nread += blocksize;
947                 }
948                 nscan++;
949                 if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32))
950                         break;
951                 if(nread > (1024 * 1024) || nscan > 128)
952                         break;
953
954                 if (search < lowest_read)
955                         lowest_read = search;
956                 if (search > highest_read)
957                         highest_read = search;
958         }
959 }
960 /*
961  * look for key in the tree.  path is filled in with nodes along the way
962  * if key is found, we return zero and you can find the item in the leaf
963  * level of the path (level 0)
964  *
965  * If the key isn't found, the path points to the slot where it should
966  * be inserted, and 1 is returned.  If there are other errors during the
967  * search a negative error number is returned.
968  *
969  * if ins_len > 0, nodes and leaves will be split as we walk down the
970  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
971  * possible)
972  */
973 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
974                       *root, struct btrfs_key *key, struct btrfs_path *p, int
975                       ins_len, int cow)
976 {
977         struct extent_buffer *b;
978         u64 bytenr;
979         int slot;
980         int ret;
981         int level;
982         int should_reada = p->reada;
983         u8 lowest_level = 0;
984
985         lowest_level = p->lowest_level;
986         WARN_ON(lowest_level && ins_len);
987         WARN_ON(p->nodes[0] != NULL);
988         WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
989 again:
990         b = root->node;
991         extent_buffer_get(b);
992         while (b) {
993                 level = btrfs_header_level(b);
994                 if (cow) {
995                         int wret;
996                         wret = btrfs_cow_block(trans, root, b,
997                                                p->nodes[level + 1],
998                                                p->slots[level + 1],
999                                                &b);
1000                         if (wret) {
1001                                 free_extent_buffer(b);
1002                                 return wret;
1003                         }
1004                 }
1005                 BUG_ON(!cow && ins_len);
1006                 if (level != btrfs_header_level(b))
1007                         WARN_ON(1);
1008                 level = btrfs_header_level(b);
1009                 p->nodes[level] = b;
1010                 ret = check_block(root, p, level);
1011                 if (ret)
1012                         return -1;
1013                 ret = bin_search(b, key, level, &slot);
1014                 if (level != 0) {
1015                         if (ret && slot > 0)
1016                                 slot -= 1;
1017                         p->slots[level] = slot;
1018                         if (ins_len > 0 && btrfs_header_nritems(b) >=
1019                             BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1020                                 int sret = split_node(trans, root, p, level);
1021                                 BUG_ON(sret > 0);
1022                                 if (sret)
1023                                         return sret;
1024                                 b = p->nodes[level];
1025                                 slot = p->slots[level];
1026                         } else if (ins_len < 0) {
1027                                 int sret = balance_level(trans, root, p,
1028                                                          level);
1029                                 if (sret)
1030                                         return sret;
1031                                 b = p->nodes[level];
1032                                 if (!b) {
1033                                         btrfs_release_path(NULL, p);
1034                                         goto again;
1035                                 }
1036                                 slot = p->slots[level];
1037                                 BUG_ON(btrfs_header_nritems(b) == 1);
1038                         }
1039                         /* this is only true while dropping a snapshot */
1040                         if (level == lowest_level)
1041                                 break;
1042                         bytenr = btrfs_node_blockptr(b, slot);
1043                         if (should_reada)
1044                                 reada_for_search(root, p, level, slot);
1045                         b = read_tree_block(root, bytenr,
1046                                             btrfs_level_size(root, level - 1));
1047                 } else {
1048                         p->slots[level] = slot;
1049                         if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
1050                             sizeof(struct btrfs_item) + ins_len) {
1051                                 int sret = split_leaf(trans, root, key,
1052                                                       p, ins_len);
1053                                 BUG_ON(sret > 0);
1054                                 if (sret)
1055                                         return sret;
1056                         }
1057                         return ret;
1058                 }
1059         }
1060         return 1;
1061 }
1062
1063 /*
1064  * adjust the pointers going up the tree, starting at level
1065  * making sure the right key of each node is points to 'key'.
1066  * This is used after shifting pointers to the left, so it stops
1067  * fixing up pointers when a given leaf/node is not in slot 0 of the
1068  * higher levels
1069  *
1070  * If this fails to write a tree block, it returns -1, but continues
1071  * fixing up the blocks in ram so the tree is consistent.
1072  */
1073 static int fixup_low_keys(struct btrfs_trans_handle *trans,
1074                           struct btrfs_root *root, struct btrfs_path *path,
1075                           struct btrfs_disk_key *key, int level)
1076 {
1077         int i;
1078         int ret = 0;
1079         struct extent_buffer *t;
1080
1081         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1082                 int tslot = path->slots[i];
1083                 if (!path->nodes[i])
1084                         break;
1085                 t = path->nodes[i];
1086                 btrfs_set_node_key(t, key, tslot);
1087                 btrfs_mark_buffer_dirty(path->nodes[i]);
1088                 if (tslot != 0)
1089                         break;
1090         }
1091         return ret;
1092 }
1093
1094 /*
1095  * try to push data from one node into the next node left in the
1096  * tree.
1097  *
1098  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1099  * error, and > 0 if there was no room in the left hand block.
1100  */
1101 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
1102                           *root, struct extent_buffer *dst,
1103                           struct extent_buffer *src)
1104 {
1105         int push_items = 0;
1106         int src_nritems;
1107         int dst_nritems;
1108         int ret = 0;
1109
1110         src_nritems = btrfs_header_nritems(src);
1111         dst_nritems = btrfs_header_nritems(dst);
1112         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1113
1114         if (push_items <= 0) {
1115                 return 1;
1116         }
1117
1118         if (src_nritems < push_items)
1119                 push_items = src_nritems;
1120
1121         copy_extent_buffer(dst, src,
1122                            btrfs_node_key_ptr_offset(dst_nritems),
1123                            btrfs_node_key_ptr_offset(0),
1124                            push_items * sizeof(struct btrfs_key_ptr));
1125
1126         if (push_items < src_nritems) {
1127                 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
1128                                       btrfs_node_key_ptr_offset(push_items),
1129                                       (src_nritems - push_items) *
1130                                       sizeof(struct btrfs_key_ptr));
1131         }
1132         btrfs_set_header_nritems(src, src_nritems - push_items);
1133         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1134         btrfs_mark_buffer_dirty(src);
1135         btrfs_mark_buffer_dirty(dst);
1136         return ret;
1137 }
1138
1139 /*
1140  * try to push data from one node into the next node right in the
1141  * tree.
1142  *
1143  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1144  * error, and > 0 if there was no room in the right hand block.
1145  *
1146  * this will  only push up to 1/2 the contents of the left node over
1147  */
1148 static int balance_node_right(struct btrfs_trans_handle *trans,
1149                               struct btrfs_root *root,
1150                               struct extent_buffer *dst,
1151                               struct extent_buffer *src)
1152 {
1153         int push_items = 0;
1154         int max_push;
1155         int src_nritems;
1156         int dst_nritems;
1157         int ret = 0;
1158
1159         src_nritems = btrfs_header_nritems(src);
1160         dst_nritems = btrfs_header_nritems(dst);
1161         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1162         if (push_items <= 0)
1163                 return 1;
1164
1165         max_push = src_nritems / 2 + 1;
1166         /* don't try to empty the node */
1167         if (max_push >= src_nritems)
1168                 return 1;
1169
1170         if (max_push < push_items)
1171                 push_items = max_push;
1172
1173         memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
1174                                       btrfs_node_key_ptr_offset(0),
1175                                       (dst_nritems) *
1176                                       sizeof(struct btrfs_key_ptr));
1177
1178         copy_extent_buffer(dst, src,
1179                            btrfs_node_key_ptr_offset(0),
1180                            btrfs_node_key_ptr_offset(src_nritems - push_items),
1181                            push_items * sizeof(struct btrfs_key_ptr));
1182
1183         btrfs_set_header_nritems(src, src_nritems - push_items);
1184         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1185
1186         btrfs_mark_buffer_dirty(src);
1187         btrfs_mark_buffer_dirty(dst);
1188         return ret;
1189 }
1190
1191 /*
1192  * helper function to insert a new root level in the tree.
1193  * A new node is allocated, and a single item is inserted to
1194  * point to the existing root
1195  *
1196  * returns zero on success or < 0 on failure.
1197  */
1198 static int insert_new_root(struct btrfs_trans_handle *trans,
1199                            struct btrfs_root *root,
1200                            struct btrfs_path *path, int level)
1201 {
1202         struct extent_buffer *lower;
1203         struct extent_buffer *c;
1204         struct btrfs_disk_key lower_key;
1205
1206         BUG_ON(path->nodes[level]);
1207         BUG_ON(path->nodes[level-1] != root->node);
1208
1209         c = btrfs_alloc_free_block(trans, root, root->nodesize,
1210                                    root->node->start, 0);
1211         if (IS_ERR(c))
1212                 return PTR_ERR(c);
1213         memset_extent_buffer(c, 0, 0, root->nodesize);
1214         btrfs_set_header_nritems(c, 1);
1215         btrfs_set_header_level(c, level);
1216         btrfs_set_header_bytenr(c, c->start);
1217         btrfs_set_header_generation(c, trans->transid);
1218         btrfs_set_header_owner(c, root->root_key.objectid);
1219         lower = path->nodes[level-1];
1220
1221         write_extent_buffer(c, root->fs_info->fsid,
1222                             (unsigned long)btrfs_header_fsid(c),
1223                             BTRFS_FSID_SIZE);
1224         if (level == 1)
1225                 btrfs_item_key(lower, &lower_key, 0);
1226         else
1227                 btrfs_node_key(lower, &lower_key, 0);
1228         btrfs_set_node_key(c, &lower_key, 0);
1229         btrfs_set_node_blockptr(c, 0, lower->start);
1230
1231         btrfs_mark_buffer_dirty(c);
1232
1233         /* the super has an extra ref to root->node */
1234         free_extent_buffer(root->node);
1235         root->node = c;
1236         extent_buffer_get(c);
1237         path->nodes[level] = c;
1238         path->slots[level] = 0;
1239         return 0;
1240 }
1241
1242 /*
1243  * worker function to insert a single pointer in a node.
1244  * the node should have enough room for the pointer already
1245  *
1246  * slot and level indicate where you want the key to go, and
1247  * blocknr is the block the key points to.
1248  *
1249  * returns zero on success and < 0 on any error
1250  */
1251 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
1252                       *root, struct btrfs_path *path, struct btrfs_disk_key
1253                       *key, u64 bytenr, int slot, int level)
1254 {
1255         struct extent_buffer *lower;
1256         int nritems;
1257
1258         BUG_ON(!path->nodes[level]);
1259         lower = path->nodes[level];
1260         nritems = btrfs_header_nritems(lower);
1261         if (slot > nritems)
1262                 BUG();
1263         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
1264                 BUG();
1265         if (slot != nritems) {
1266                 memmove_extent_buffer(lower,
1267                               btrfs_node_key_ptr_offset(slot + 1),
1268                               btrfs_node_key_ptr_offset(slot),
1269                               (nritems - slot) * sizeof(struct btrfs_key_ptr));
1270         }
1271         btrfs_set_node_key(lower, key, slot);
1272         btrfs_set_node_blockptr(lower, slot, bytenr);
1273         btrfs_set_header_nritems(lower, nritems + 1);
1274         btrfs_mark_buffer_dirty(lower);
1275         return 0;
1276 }
1277
1278 /*
1279  * split the node at the specified level in path in two.
1280  * The path is corrected to point to the appropriate node after the split
1281  *
1282  * Before splitting this tries to make some room in the node by pushing
1283  * left and right, if either one works, it returns right away.
1284  *
1285  * returns 0 on success and < 0 on failure
1286  */
1287 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1288                       *root, struct btrfs_path *path, int level)
1289 {
1290         struct extent_buffer *c;
1291         struct extent_buffer *split;
1292         struct btrfs_disk_key disk_key;
1293         int mid;
1294         int ret;
1295         int wret;
1296         u32 c_nritems;
1297
1298         c = path->nodes[level];
1299         if (c == root->node) {
1300                 /* trying to split the root, lets make a new one */
1301                 ret = insert_new_root(trans, root, path, level + 1);
1302                 if (ret)
1303                         return ret;
1304         } else {
1305                 ret = push_nodes_for_insert(trans, root, path, level);
1306                 c = path->nodes[level];
1307                 if (!ret && btrfs_header_nritems(c) <
1308                     BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
1309                         return 0;
1310                 if (ret < 0)
1311                         return ret;
1312         }
1313
1314         c_nritems = btrfs_header_nritems(c);
1315         split = btrfs_alloc_free_block(trans, root, root->nodesize,
1316                                        c->start, 0);
1317         if (IS_ERR(split))
1318                 return PTR_ERR(split);
1319
1320         btrfs_set_header_flags(split, btrfs_header_flags(c));
1321         btrfs_set_header_level(split, btrfs_header_level(c));
1322         btrfs_set_header_bytenr(split, split->start);
1323         btrfs_set_header_generation(split, trans->transid);
1324         btrfs_set_header_owner(split, root->root_key.objectid);
1325         write_extent_buffer(split, root->fs_info->fsid,
1326                             (unsigned long)btrfs_header_fsid(split),
1327                             BTRFS_FSID_SIZE);
1328
1329         mid = (c_nritems + 1) / 2;
1330
1331         copy_extent_buffer(split, c,
1332                            btrfs_node_key_ptr_offset(0),
1333                            btrfs_node_key_ptr_offset(mid),
1334                            (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
1335         btrfs_set_header_nritems(split, c_nritems - mid);
1336         btrfs_set_header_nritems(c, mid);
1337         ret = 0;
1338
1339         btrfs_mark_buffer_dirty(c);
1340         btrfs_mark_buffer_dirty(split);
1341
1342         btrfs_node_key(split, &disk_key, 0);
1343         wret = insert_ptr(trans, root, path, &disk_key, split->start,
1344                           path->slots[level + 1] + 1,
1345                           level + 1);
1346         if (wret)
1347                 ret = wret;
1348
1349         if (path->slots[level] >= mid) {
1350                 path->slots[level] -= mid;
1351                 free_extent_buffer(c);
1352                 path->nodes[level] = split;
1353                 path->slots[level + 1] += 1;
1354         } else {
1355                 free_extent_buffer(split);
1356         }
1357         return ret;
1358 }
1359
1360 /*
1361  * how many bytes are required to store the items in a leaf.  start
1362  * and nr indicate which items in the leaf to check.  This totals up the
1363  * space used both by the item structs and the item data
1364  */
1365 static int leaf_space_used(struct extent_buffer *l, int start, int nr)
1366 {
1367         int data_len;
1368         int nritems = btrfs_header_nritems(l);
1369         int end = min(nritems, start + nr) - 1;
1370
1371         if (!nr)
1372                 return 0;
1373         data_len = btrfs_item_end_nr(l, start);
1374         data_len = data_len - btrfs_item_offset_nr(l, end);
1375         data_len += sizeof(struct btrfs_item) * nr;
1376         WARN_ON(data_len < 0);
1377         return data_len;
1378 }
1379
1380 /*
1381  * The space between the end of the leaf items and
1382  * the start of the leaf data.  IOW, how much room
1383  * the leaf has left for both items and data
1384  */
1385 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf)
1386 {
1387         int nritems = btrfs_header_nritems(leaf);
1388         int ret;
1389         ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
1390         if (ret < 0) {
1391                 printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
1392                        ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
1393                        leaf_space_used(leaf, 0, nritems), nritems);
1394         }
1395         return ret;
1396 }
1397
1398 /*
1399  * push some data in the path leaf to the right, trying to free up at
1400  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1401  *
1402  * returns 1 if the push failed because the other node didn't have enough
1403  * room, 0 if everything worked out and < 0 if there were major errors.
1404  */
1405 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1406                            *root, struct btrfs_path *path, int data_size)
1407 {
1408         struct extent_buffer *left = path->nodes[0];
1409         struct extent_buffer *right;
1410         struct extent_buffer *upper;
1411         struct btrfs_disk_key disk_key;
1412         int slot;
1413         int i;
1414         int free_space;
1415         int push_space = 0;
1416         int push_items = 0;
1417         struct btrfs_item *item;
1418         u32 left_nritems;
1419         u32 right_nritems;
1420         u32 data_end;
1421         u32 this_item_size;
1422         int ret;
1423
1424         slot = path->slots[1];
1425         if (!path->nodes[1]) {
1426                 return 1;
1427         }
1428         upper = path->nodes[1];
1429         if (slot >= btrfs_header_nritems(upper) - 1)
1430                 return 1;
1431
1432         right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1),
1433                                 root->leafsize);
1434         free_space = btrfs_leaf_free_space(root, right);
1435         if (free_space < data_size + sizeof(struct btrfs_item)) {
1436                 free_extent_buffer(right);
1437                 return 1;
1438         }
1439
1440         /* cow and double check */
1441         ret = btrfs_cow_block(trans, root, right, upper,
1442                               slot + 1, &right);
1443         if (ret) {
1444                 free_extent_buffer(right);
1445                 return 1;
1446         }
1447         free_space = btrfs_leaf_free_space(root, right);
1448         if (free_space < data_size + sizeof(struct btrfs_item)) {
1449                 free_extent_buffer(right);
1450                 return 1;
1451         }
1452
1453         left_nritems = btrfs_header_nritems(left);
1454         if (left_nritems == 0) {
1455                 free_extent_buffer(right);
1456                 return 1;
1457         }
1458
1459         for (i = left_nritems - 1; i >= 1; i--) {
1460                 item = btrfs_item_nr(left, i);
1461
1462                 if (path->slots[0] == i)
1463                         push_space += data_size + sizeof(*item);
1464
1465                 if (!left->map_token) {
1466                         map_extent_buffer(left, (unsigned long)item,
1467                                         sizeof(struct btrfs_item),
1468                                         &left->map_token, &left->kaddr,
1469                                         &left->map_start, &left->map_len,
1470                                         KM_USER1);
1471                 }
1472
1473                 this_item_size = btrfs_item_size(left, item);
1474                 if (this_item_size + sizeof(*item) + push_space > free_space)
1475                         break;
1476                 push_items++;
1477                 push_space += this_item_size + sizeof(*item);
1478         }
1479         if (left->map_token) {
1480                 unmap_extent_buffer(left, left->map_token, KM_USER1);
1481                 left->map_token = NULL;
1482         }
1483
1484         if (push_items == 0) {
1485                 free_extent_buffer(right);
1486                 return 1;
1487         }
1488
1489         if (push_items == left_nritems)
1490                 WARN_ON(1);
1491
1492         /* push left to right */
1493         right_nritems = btrfs_header_nritems(right);
1494         push_space = btrfs_item_end_nr(left, left_nritems - push_items);
1495         push_space -= leaf_data_end(root, left);
1496
1497         /* make room in the right data area */
1498         data_end = leaf_data_end(root, right);
1499         memmove_extent_buffer(right,
1500                               btrfs_leaf_data(right) + data_end - push_space,
1501                               btrfs_leaf_data(right) + data_end,
1502                               BTRFS_LEAF_DATA_SIZE(root) - data_end);
1503
1504         /* copy from the left data area */
1505         copy_extent_buffer(right, left, btrfs_leaf_data(right) +
1506                      BTRFS_LEAF_DATA_SIZE(root) - push_space,
1507                      btrfs_leaf_data(left) + leaf_data_end(root, left),
1508                      push_space);
1509
1510         memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
1511                               btrfs_item_nr_offset(0),
1512                               right_nritems * sizeof(struct btrfs_item));
1513
1514         /* copy the items from left to right */
1515         copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
1516                    btrfs_item_nr_offset(left_nritems - push_items),
1517                    push_items * sizeof(struct btrfs_item));
1518
1519         /* update the item pointers */
1520         right_nritems += push_items;
1521         btrfs_set_header_nritems(right, right_nritems);
1522         push_space = BTRFS_LEAF_DATA_SIZE(root);
1523
1524         for (i = 0; i < right_nritems; i++) {
1525                 item = btrfs_item_nr(right, i);
1526                 if (!right->map_token) {
1527                         map_extent_buffer(right, (unsigned long)item,
1528                                         sizeof(struct btrfs_item),
1529                                         &right->map_token, &right->kaddr,
1530                                         &right->map_start, &right->map_len,
1531                                         KM_USER1);
1532                 }
1533                 push_space -= btrfs_item_size(right, item);
1534                 btrfs_set_item_offset(right, item, push_space);
1535         }
1536
1537         if (right->map_token) {
1538                 unmap_extent_buffer(right, right->map_token, KM_USER1);
1539                 right->map_token = NULL;
1540         }
1541         left_nritems -= push_items;
1542         btrfs_set_header_nritems(left, left_nritems);
1543
1544         btrfs_mark_buffer_dirty(left);
1545         btrfs_mark_buffer_dirty(right);
1546
1547         btrfs_item_key(right, &disk_key, 0);
1548         btrfs_set_node_key(upper, &disk_key, slot + 1);
1549         btrfs_mark_buffer_dirty(upper);
1550
1551         /* then fixup the leaf pointer in the path */
1552         if (path->slots[0] >= left_nritems) {
1553                 path->slots[0] -= left_nritems;
1554                 free_extent_buffer(path->nodes[0]);
1555                 path->nodes[0] = right;
1556                 path->slots[1] += 1;
1557         } else {
1558                 free_extent_buffer(right);
1559         }
1560         return 0;
1561 }
1562 /*
1563  * push some data in the path leaf to the left, trying to free up at
1564  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1565  */
1566 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1567                           *root, struct btrfs_path *path, int data_size)
1568 {
1569         struct btrfs_disk_key disk_key;
1570         struct extent_buffer *right = path->nodes[0];
1571         struct extent_buffer *left;
1572         int slot;
1573         int i;
1574         int free_space;
1575         int push_space = 0;
1576         int push_items = 0;
1577         struct btrfs_item *item;
1578         u32 old_left_nritems;
1579         u32 right_nritems;
1580         int ret = 0;
1581         int wret;
1582         u32 this_item_size;
1583         u32 old_left_item_size;
1584
1585         slot = path->slots[1];
1586         if (slot == 0)
1587                 return 1;
1588         if (!path->nodes[1])
1589                 return 1;
1590
1591         right_nritems = btrfs_header_nritems(right);
1592         if (right_nritems == 0) {
1593                 return 1;
1594         }
1595
1596         left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1],
1597                                slot - 1), root->leafsize);
1598         free_space = btrfs_leaf_free_space(root, left);
1599         if (free_space < data_size + sizeof(struct btrfs_item)) {
1600                 free_extent_buffer(left);
1601                 return 1;
1602         }
1603
1604         /* cow and double check */
1605         ret = btrfs_cow_block(trans, root, left,
1606                               path->nodes[1], slot - 1, &left);
1607         if (ret) {
1608                 /* we hit -ENOSPC, but it isn't fatal here */
1609                 free_extent_buffer(left);
1610                 return 1;
1611         }
1612
1613         free_space = btrfs_leaf_free_space(root, left);
1614         if (free_space < data_size + sizeof(struct btrfs_item)) {
1615                 free_extent_buffer(left);
1616                 return 1;
1617         }
1618
1619         for (i = 0; i < right_nritems - 1; i++) {
1620                 item = btrfs_item_nr(right, i);
1621                 if (!right->map_token) {
1622                         map_extent_buffer(right, (unsigned long)item,
1623                                         sizeof(struct btrfs_item),
1624                                         &right->map_token, &right->kaddr,
1625                                         &right->map_start, &right->map_len,
1626                                         KM_USER1);
1627                 }
1628
1629                 if (path->slots[0] == i)
1630                         push_space += data_size + sizeof(*item);
1631
1632                 this_item_size = btrfs_item_size(right, item);
1633                 if (this_item_size + sizeof(*item) + push_space > free_space)
1634                         break;
1635
1636                 push_items++;
1637                 push_space += this_item_size + sizeof(*item);
1638         }
1639
1640         if (right->map_token) {
1641                 unmap_extent_buffer(right, right->map_token, KM_USER1);
1642                 right->map_token = NULL;
1643         }
1644
1645         if (push_items == 0) {
1646                 free_extent_buffer(left);
1647                 return 1;
1648         }
1649         if (push_items == btrfs_header_nritems(right))
1650                 WARN_ON(1);
1651
1652         /* push data from right to left */
1653         copy_extent_buffer(left, right,
1654                            btrfs_item_nr_offset(btrfs_header_nritems(left)),
1655                            btrfs_item_nr_offset(0),
1656                            push_items * sizeof(struct btrfs_item));
1657
1658         push_space = BTRFS_LEAF_DATA_SIZE(root) -
1659                      btrfs_item_offset_nr(right, push_items -1);
1660
1661         copy_extent_buffer(left, right, btrfs_leaf_data(left) +
1662                      leaf_data_end(root, left) - push_space,
1663                      btrfs_leaf_data(right) +
1664                      btrfs_item_offset_nr(right, push_items - 1),
1665                      push_space);
1666         old_left_nritems = btrfs_header_nritems(left);
1667         BUG_ON(old_left_nritems < 0);
1668
1669         old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
1670         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1671                 u32 ioff;
1672
1673                 item = btrfs_item_nr(left, i);
1674                 if (!left->map_token) {
1675                         map_extent_buffer(left, (unsigned long)item,
1676                                         sizeof(struct btrfs_item),
1677                                         &left->map_token, &left->kaddr,
1678                                         &left->map_start, &left->map_len,
1679                                         KM_USER1);
1680                 }
1681
1682                 ioff = btrfs_item_offset(left, item);
1683                 btrfs_set_item_offset(left, item,
1684                       ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
1685         }
1686         btrfs_set_header_nritems(left, old_left_nritems + push_items);
1687         if (left->map_token) {
1688                 unmap_extent_buffer(left, left->map_token, KM_USER1);
1689                 left->map_token = NULL;
1690         }
1691
1692         /* fixup right node */
1693         push_space = btrfs_item_offset_nr(right, push_items - 1) -
1694                                           leaf_data_end(root, right);
1695         memmove_extent_buffer(right, btrfs_leaf_data(right) +
1696                               BTRFS_LEAF_DATA_SIZE(root) - push_space,
1697                               btrfs_leaf_data(right) +
1698                               leaf_data_end(root, right), push_space);
1699
1700         memmove_extent_buffer(right, btrfs_item_nr_offset(0),
1701                               btrfs_item_nr_offset(push_items),
1702                              (btrfs_header_nritems(right) - push_items) *
1703                              sizeof(struct btrfs_item));
1704
1705         right_nritems = btrfs_header_nritems(right) - push_items;
1706         btrfs_set_header_nritems(right, right_nritems);
1707         push_space = BTRFS_LEAF_DATA_SIZE(root);
1708
1709         for (i = 0; i < right_nritems; i++) {
1710                 item = btrfs_item_nr(right, i);
1711
1712                 if (!right->map_token) {
1713                         map_extent_buffer(right, (unsigned long)item,
1714                                         sizeof(struct btrfs_item),
1715                                         &right->map_token, &right->kaddr,
1716                                         &right->map_start, &right->map_len,
1717                                         KM_USER1);
1718                 }
1719
1720                 push_space = push_space - btrfs_item_size(right, item);
1721                 btrfs_set_item_offset(right, item, push_space);
1722         }
1723         if (right->map_token) {
1724                 unmap_extent_buffer(right, right->map_token, KM_USER1);
1725                 right->map_token = NULL;
1726         }
1727
1728         btrfs_mark_buffer_dirty(left);
1729         btrfs_mark_buffer_dirty(right);
1730
1731         btrfs_item_key(right, &disk_key, 0);
1732         wret = fixup_low_keys(trans, root, path, &disk_key, 1);
1733         if (wret)
1734                 ret = wret;
1735
1736         /* then fixup the leaf pointer in the path */
1737         if (path->slots[0] < push_items) {
1738                 path->slots[0] += old_left_nritems;
1739                 free_extent_buffer(path->nodes[0]);
1740                 path->nodes[0] = left;
1741                 path->slots[1] -= 1;
1742         } else {
1743                 free_extent_buffer(left);
1744                 path->slots[0] -= push_items;
1745         }
1746         BUG_ON(path->slots[0] < 0);
1747         return ret;
1748 }
1749
1750 /*
1751  * split the path's leaf in two, making sure there is at least data_size
1752  * available for the resulting leaf level of the path.
1753  *
1754  * returns 0 if all went well and < 0 on failure.
1755  */
1756 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1757                       *root, struct btrfs_key *ins_key,
1758                       struct btrfs_path *path, int data_size)
1759 {
1760         struct extent_buffer *l;
1761         u32 nritems;
1762         int mid;
1763         int slot;
1764         struct extent_buffer *right;
1765         int space_needed = data_size + sizeof(struct btrfs_item);
1766         int data_copy_size;
1767         int rt_data_off;
1768         int i;
1769         int ret = 0;
1770         int wret;
1771         int double_split = 0;
1772         struct btrfs_disk_key disk_key;
1773
1774         /* first try to make some room by pushing left and right */
1775         if (ins_key->type != BTRFS_DIR_ITEM_KEY) {
1776                 wret = push_leaf_right(trans, root, path, data_size);
1777                 if (wret < 0) {
1778                         return wret;
1779                 }
1780                 if (wret) {
1781                         wret = push_leaf_left(trans, root, path, data_size);
1782                         if (wret < 0)
1783                                 return wret;
1784                 }
1785                 l = path->nodes[0];
1786
1787                 /* did the pushes work? */
1788                 if (btrfs_leaf_free_space(root, l) >=
1789                     sizeof(struct btrfs_item) + data_size) {
1790                         return 0;
1791                 }
1792         } else {
1793                 l = path->nodes[0];
1794         }
1795
1796         if (!path->nodes[1]) {
1797                 ret = insert_new_root(trans, root, path, 1);
1798                 if (ret)
1799                         return ret;
1800         }
1801         slot = path->slots[0];
1802         nritems = btrfs_header_nritems(l);
1803         mid = (nritems + 1)/ 2;
1804
1805         right = btrfs_alloc_free_block(trans, root, root->leafsize,
1806                                        l->start, 0);
1807         if (IS_ERR(right))
1808                 return PTR_ERR(right);
1809
1810         memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
1811         btrfs_set_header_bytenr(right, right->start);
1812         btrfs_set_header_generation(right, trans->transid);
1813         btrfs_set_header_owner(right, root->root_key.objectid);
1814         btrfs_set_header_level(right, 0);
1815         write_extent_buffer(right, root->fs_info->fsid,
1816                             (unsigned long)btrfs_header_fsid(right),
1817                             BTRFS_FSID_SIZE);
1818
1819         if (mid <= slot) {
1820                 if (nritems == 1 ||
1821                     leaf_space_used(l, mid, nritems - mid) + space_needed >
1822                         BTRFS_LEAF_DATA_SIZE(root)) {
1823                         if (slot >= nritems) {
1824                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1825                                 btrfs_set_header_nritems(right, 0);
1826                                 wret = insert_ptr(trans, root, path,
1827                                                   &disk_key, right->start,
1828                                                   path->slots[1] + 1, 1);
1829                                 if (wret)
1830                                         ret = wret;
1831                                 free_extent_buffer(path->nodes[0]);
1832                                 path->nodes[0] = right;
1833                                 path->slots[0] = 0;
1834                                 path->slots[1] += 1;
1835                                 return ret;
1836                         }
1837                         mid = slot;
1838                         if (mid != nritems &&
1839                             leaf_space_used(l, mid, nritems - mid) +
1840                             space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
1841                                 double_split = 1;
1842                         }
1843                 }
1844         } else {
1845                 if (leaf_space_used(l, 0, mid + 1) + space_needed >
1846                         BTRFS_LEAF_DATA_SIZE(root)) {
1847                         if (slot == 0) {
1848                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1849                                 btrfs_set_header_nritems(right, 0);
1850                                 wret = insert_ptr(trans, root, path,
1851                                                   &disk_key,
1852                                                   right->start,
1853                                                   path->slots[1], 1);
1854                                 if (wret)
1855                                         ret = wret;
1856                                 free_extent_buffer(path->nodes[0]);
1857                                 path->nodes[0] = right;
1858                                 path->slots[0] = 0;
1859                                 if (path->slots[1] == 0) {
1860                                         wret = fixup_low_keys(trans, root,
1861                                                    path, &disk_key, 1);
1862                                         if (wret)
1863                                                 ret = wret;
1864                                 }
1865                                 return ret;
1866                         }
1867                         mid = slot;
1868                         if (mid != nritems &&
1869                             leaf_space_used(l, mid, nritems - mid) +
1870                             space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
1871                                 double_split = 1;
1872                         }
1873                 }
1874         }
1875         nritems = nritems - mid;
1876         btrfs_set_header_nritems(right, nritems);
1877         data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
1878
1879         copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
1880                            btrfs_item_nr_offset(mid),
1881                            nritems * sizeof(struct btrfs_item));
1882
1883         copy_extent_buffer(right, l,
1884                      btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1885                      data_copy_size, btrfs_leaf_data(l) +
1886                      leaf_data_end(root, l), data_copy_size);
1887
1888         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
1889                       btrfs_item_end_nr(l, mid);
1890
1891         for (i = 0; i < nritems; i++) {
1892                 struct btrfs_item *item = btrfs_item_nr(right, i);
1893                 u32 ioff;
1894
1895                 if (!right->map_token) {
1896                         map_extent_buffer(right, (unsigned long)item,
1897                                         sizeof(struct btrfs_item),
1898                                         &right->map_token, &right->kaddr,
1899                                         &right->map_start, &right->map_len,
1900                                         KM_USER1);
1901                 }
1902
1903                 ioff = btrfs_item_offset(right, item);
1904                 btrfs_set_item_offset(right, item, ioff + rt_data_off);
1905         }
1906
1907         if (right->map_token) {
1908                 unmap_extent_buffer(right, right->map_token, KM_USER1);
1909                 right->map_token = NULL;
1910         }
1911
1912         btrfs_set_header_nritems(l, mid);
1913         ret = 0;
1914         btrfs_item_key(right, &disk_key, 0);
1915         wret = insert_ptr(trans, root, path, &disk_key, right->start,
1916                           path->slots[1] + 1, 1);
1917         if (wret)
1918                 ret = wret;
1919
1920         btrfs_mark_buffer_dirty(right);
1921         btrfs_mark_buffer_dirty(l);
1922         BUG_ON(path->slots[0] != slot);
1923
1924         if (mid <= slot) {
1925                 free_extent_buffer(path->nodes[0]);
1926                 path->nodes[0] = right;
1927                 path->slots[0] -= mid;
1928                 path->slots[1] += 1;
1929         } else
1930                 free_extent_buffer(right);
1931
1932         BUG_ON(path->slots[0] < 0);
1933
1934         if (!double_split) {
1935                 return ret;
1936         }
1937
1938         right = btrfs_alloc_free_block(trans, root, root->leafsize,
1939                                        l->start, 0);
1940         if (IS_ERR(right))
1941                 return PTR_ERR(right);
1942
1943         memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
1944         btrfs_set_header_bytenr(right, right->start);
1945         btrfs_set_header_generation(right, trans->transid);
1946         btrfs_set_header_owner(right, root->root_key.objectid);
1947         btrfs_set_header_level(right, 0);
1948         write_extent_buffer(right, root->fs_info->fsid,
1949                             (unsigned long)btrfs_header_fsid(right),
1950                             BTRFS_FSID_SIZE);
1951
1952         btrfs_cpu_key_to_disk(&disk_key, ins_key);
1953         btrfs_set_header_nritems(right, 0);
1954         wret = insert_ptr(trans, root, path,
1955                           &disk_key, right->start,
1956                           path->slots[1], 1);
1957         if (wret)
1958                 ret = wret;
1959         if (path->slots[1] == 0) {
1960                 wret = fixup_low_keys(trans, root, path, &disk_key, 1);
1961                 if (wret)
1962                         ret = wret;
1963         }
1964         free_extent_buffer(path->nodes[0]);
1965         path->nodes[0] = right;
1966         path->slots[0] = 0;
1967         return ret;
1968 }
1969
1970 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
1971                         struct btrfs_root *root,
1972                         struct btrfs_path *path,
1973                         u32 new_size)
1974 {
1975         int ret = 0;
1976         int slot;
1977         int slot_orig;
1978         struct extent_buffer *leaf;
1979         struct btrfs_item *item;
1980         u32 nritems;
1981         unsigned int data_end;
1982         unsigned int old_data_start;
1983         unsigned int old_size;
1984         unsigned int size_diff;
1985         int i;
1986
1987         slot_orig = path->slots[0];
1988         leaf = path->nodes[0];
1989
1990         nritems = btrfs_header_nritems(leaf);
1991         data_end = leaf_data_end(root, leaf);
1992
1993         slot = path->slots[0];
1994         old_data_start = btrfs_item_offset_nr(leaf, slot);
1995         old_size = btrfs_item_size_nr(leaf, slot);
1996         BUG_ON(old_size <= new_size);
1997         size_diff = old_size - new_size;
1998
1999         BUG_ON(slot < 0);
2000         BUG_ON(slot >= nritems);
2001
2002         /*
2003          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2004          */
2005         /* first correct the data pointers */
2006         for (i = slot; i < nritems; i++) {
2007                 u32 ioff;
2008                 item = btrfs_item_nr(leaf, i);
2009
2010                 if (!leaf->map_token) {
2011                         map_extent_buffer(leaf, (unsigned long)item,
2012                                         sizeof(struct btrfs_item),
2013                                         &leaf->map_token, &leaf->kaddr,
2014                                         &leaf->map_start, &leaf->map_len,
2015                                         KM_USER1);
2016                 }
2017
2018                 ioff = btrfs_item_offset(leaf, item);
2019                 btrfs_set_item_offset(leaf, item, ioff + size_diff);
2020         }
2021
2022         if (leaf->map_token) {
2023                 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2024                 leaf->map_token = NULL;
2025         }
2026
2027         /* shift the data */
2028         memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2029                       data_end + size_diff, btrfs_leaf_data(leaf) +
2030                       data_end, old_data_start + new_size - data_end);
2031
2032         item = btrfs_item_nr(leaf, slot);
2033         btrfs_set_item_size(leaf, item, new_size);
2034         btrfs_mark_buffer_dirty(leaf);
2035
2036         ret = 0;
2037         if (btrfs_leaf_free_space(root, leaf) < 0) {
2038                 btrfs_print_leaf(root, leaf);
2039                 BUG();
2040         }
2041         return ret;
2042 }
2043
2044 int btrfs_extend_item(struct btrfs_trans_handle *trans,
2045                       struct btrfs_root *root, struct btrfs_path *path,
2046                       u32 data_size)
2047 {
2048         int ret = 0;
2049         int slot;
2050         int slot_orig;
2051         struct extent_buffer *leaf;
2052         struct btrfs_item *item;
2053         u32 nritems;
2054         unsigned int data_end;
2055         unsigned int old_data;
2056         unsigned int old_size;
2057         int i;
2058
2059         slot_orig = path->slots[0];
2060         leaf = path->nodes[0];
2061
2062         nritems = btrfs_header_nritems(leaf);
2063         data_end = leaf_data_end(root, leaf);
2064
2065         if (btrfs_leaf_free_space(root, leaf) < data_size) {
2066                 btrfs_print_leaf(root, leaf);
2067                 BUG();
2068         }
2069         slot = path->slots[0];
2070         old_data = btrfs_item_end_nr(leaf, slot);
2071
2072         BUG_ON(slot < 0);
2073         if (slot >= nritems) {
2074                 btrfs_print_leaf(root, leaf);
2075                 printk("slot %d too large, nritems %d\n", slot, nritems);
2076                 BUG_ON(1);
2077         }
2078
2079         /*
2080          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2081          */
2082         /* first correct the data pointers */
2083         for (i = slot; i < nritems; i++) {
2084                 u32 ioff;
2085                 item = btrfs_item_nr(leaf, i);
2086
2087                 if (!leaf->map_token) {
2088                         map_extent_buffer(leaf, (unsigned long)item,
2089                                         sizeof(struct btrfs_item),
2090                                         &leaf->map_token, &leaf->kaddr,
2091                                         &leaf->map_start, &leaf->map_len,
2092                                         KM_USER1);
2093                 }
2094                 ioff = btrfs_item_offset(leaf, item);
2095                 btrfs_set_item_offset(leaf, item, ioff - data_size);
2096         }
2097
2098         if (leaf->map_token) {
2099                 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2100                 leaf->map_token = NULL;
2101         }
2102
2103         /* shift the data */
2104         memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2105                       data_end - data_size, btrfs_leaf_data(leaf) +
2106                       data_end, old_data - data_end);
2107
2108         data_end = old_data;
2109         old_size = btrfs_item_size_nr(leaf, slot);
2110         item = btrfs_item_nr(leaf, slot);
2111         btrfs_set_item_size(leaf, item, old_size + data_size);
2112         btrfs_mark_buffer_dirty(leaf);
2113
2114         ret = 0;
2115         if (btrfs_leaf_free_space(root, leaf) < 0) {
2116                 btrfs_print_leaf(root, leaf);
2117                 BUG();
2118         }
2119         return ret;
2120 }
2121
2122 /*
2123  * Given a key and some data, insert an item into the tree.
2124  * This does all the path init required, making room in the tree if needed.
2125  */
2126 int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
2127                             struct btrfs_root *root,
2128                             struct btrfs_path *path,
2129                             struct btrfs_key *cpu_key, u32 data_size)
2130 {
2131         struct extent_buffer *leaf;
2132         struct btrfs_item *item;
2133         int ret = 0;
2134         int slot;
2135         int slot_orig;
2136         u32 nritems;
2137         unsigned int data_end;
2138         struct btrfs_disk_key disk_key;
2139
2140         btrfs_cpu_key_to_disk(&disk_key, cpu_key);
2141
2142         /* create a root if there isn't one */
2143         if (!root->node)
2144                 BUG();
2145
2146         ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
2147         if (ret == 0) {
2148                 return -EEXIST;
2149         }
2150         if (ret < 0)
2151                 goto out;
2152
2153         slot_orig = path->slots[0];
2154         leaf = path->nodes[0];
2155
2156         nritems = btrfs_header_nritems(leaf);
2157         data_end = leaf_data_end(root, leaf);
2158
2159         if (btrfs_leaf_free_space(root, leaf) <
2160             sizeof(struct btrfs_item) + data_size) {
2161                 btrfs_print_leaf(root, leaf);
2162                 printk("not enough freespace need %u have %d\n",
2163                        data_size, btrfs_leaf_free_space(root, leaf));
2164                 BUG();
2165         }
2166
2167         slot = path->slots[0];
2168         BUG_ON(slot < 0);
2169
2170         if (slot != nritems) {
2171                 int i;
2172                 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
2173
2174                 if (old_data < data_end) {
2175                         btrfs_print_leaf(root, leaf);
2176                         printk("slot %d old_data %d data_end %d\n",
2177                                slot, old_data, data_end);
2178                         BUG_ON(1);
2179                 }
2180                 /*
2181                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
2182                  */
2183                 /* first correct the data pointers */
2184                 WARN_ON(leaf->map_token);
2185                 for (i = slot; i < nritems; i++) {
2186                         u32 ioff;
2187
2188                         item = btrfs_item_nr(leaf, i);
2189                         if (!leaf->map_token) {
2190                                 map_extent_buffer(leaf, (unsigned long)item,
2191                                         sizeof(struct btrfs_item),
2192                                         &leaf->map_token, &leaf->kaddr,
2193                                         &leaf->map_start, &leaf->map_len,
2194                                         KM_USER1);
2195                         }
2196
2197                         ioff = btrfs_item_offset(leaf, item);
2198                         btrfs_set_item_offset(leaf, item, ioff - data_size);
2199                 }
2200                 if (leaf->map_token) {
2201                         unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2202                         leaf->map_token = NULL;
2203                 }
2204
2205                 /* shift the items */
2206                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
2207                               btrfs_item_nr_offset(slot),
2208                               (nritems - slot) * sizeof(struct btrfs_item));
2209
2210                 /* shift the data */
2211                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2212                               data_end - data_size, btrfs_leaf_data(leaf) +
2213                               data_end, old_data - data_end);
2214                 data_end = old_data;
2215         }
2216
2217         /* setup the item for the new data */
2218         btrfs_set_item_key(leaf, &disk_key, slot);
2219         item = btrfs_item_nr(leaf, slot);
2220         btrfs_set_item_offset(leaf, item, data_end - data_size);
2221         btrfs_set_item_size(leaf, item, data_size);
2222         btrfs_set_header_nritems(leaf, nritems + 1);
2223         btrfs_mark_buffer_dirty(leaf);
2224
2225         ret = 0;
2226         if (slot == 0)
2227                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
2228
2229         if (btrfs_leaf_free_space(root, leaf) < 0) {
2230                 btrfs_print_leaf(root, leaf);
2231                 BUG();
2232         }
2233 out:
2234         return ret;
2235 }
2236
2237 /*
2238  * Given a key and some data, insert an item into the tree.
2239  * This does all the path init required, making room in the tree if needed.
2240  */
2241 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
2242                       *root, struct btrfs_key *cpu_key, void *data, u32
2243                       data_size)
2244 {
2245         int ret = 0;
2246         struct btrfs_path *path;
2247         struct extent_buffer *leaf;
2248         unsigned long ptr;
2249
2250         path = btrfs_alloc_path();
2251         BUG_ON(!path);
2252         ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
2253         if (!ret) {
2254                 leaf = path->nodes[0];
2255                 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
2256                 write_extent_buffer(leaf, data, ptr, data_size);
2257                 btrfs_mark_buffer_dirty(leaf);
2258         }
2259         btrfs_free_path(path);
2260         return ret;
2261 }
2262
2263 /*
2264  * delete the pointer from a given node.
2265  *
2266  * If the delete empties a node, the node is removed from the tree,
2267  * continuing all the way the root if required.  The root is converted into
2268  * a leaf if all the nodes are emptied.
2269  */
2270 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2271                    struct btrfs_path *path, int level, int slot)
2272 {
2273         struct extent_buffer *parent = path->nodes[level];
2274         u32 nritems;
2275         int ret = 0;
2276         int wret;
2277
2278         nritems = btrfs_header_nritems(parent);
2279         if (slot != nritems -1) {
2280                 memmove_extent_buffer(parent,
2281                               btrfs_node_key_ptr_offset(slot),
2282                               btrfs_node_key_ptr_offset(slot + 1),
2283                               sizeof(struct btrfs_key_ptr) *
2284                               (nritems - slot - 1));
2285         }
2286         nritems--;
2287         btrfs_set_header_nritems(parent, nritems);
2288         if (nritems == 0 && parent == root->node) {
2289                 BUG_ON(btrfs_header_level(root->node) != 1);
2290                 /* just turn the root into a leaf and break */
2291                 btrfs_set_header_level(root->node, 0);
2292         } else if (slot == 0) {
2293                 struct btrfs_disk_key disk_key;
2294
2295                 btrfs_node_key(parent, &disk_key, 0);
2296                 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
2297                 if (wret)
2298                         ret = wret;
2299         }
2300         btrfs_mark_buffer_dirty(parent);
2301         return ret;
2302 }
2303
2304 /*
2305  * delete the item at the leaf level in path.  If that empties
2306  * the leaf, remove it from the tree
2307  */
2308 int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2309                    struct btrfs_path *path)
2310 {
2311         int slot;
2312         struct extent_buffer *leaf;
2313         struct btrfs_item *item;
2314         int doff;
2315         int dsize;
2316         int ret = 0;
2317         int wret;
2318         u32 nritems;
2319
2320         leaf = path->nodes[0];
2321         slot = path->slots[0];
2322         doff = btrfs_item_offset_nr(leaf, slot);
2323         dsize = btrfs_item_size_nr(leaf, slot);
2324         nritems = btrfs_header_nritems(leaf);
2325
2326         if (slot != nritems - 1) {
2327                 int i;
2328                 int data_end = leaf_data_end(root, leaf);
2329
2330                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2331                               data_end + dsize,
2332                               btrfs_leaf_data(leaf) + data_end,
2333                               doff - data_end);
2334
2335                 for (i = slot + 1; i < nritems; i++) {
2336                         u32 ioff;
2337
2338                         item = btrfs_item_nr(leaf, i);
2339                         if (!leaf->map_token) {
2340                                 map_extent_buffer(leaf, (unsigned long)item,
2341                                         sizeof(struct btrfs_item),
2342                                         &leaf->map_token, &leaf->kaddr,
2343                                         &leaf->map_start, &leaf->map_len,
2344                                         KM_USER1);
2345                         }
2346                         ioff = btrfs_item_offset(leaf, item);
2347                         btrfs_set_item_offset(leaf, item, ioff + dsize);
2348                 }
2349
2350                 if (leaf->map_token) {
2351                         unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2352                         leaf->map_token = NULL;
2353                 }
2354
2355                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
2356                               btrfs_item_nr_offset(slot + 1),
2357                               sizeof(struct btrfs_item) *
2358                               (nritems - slot - 1));
2359         }
2360         btrfs_set_header_nritems(leaf, nritems - 1);
2361         nritems--;
2362
2363         /* delete the leaf if we've emptied it */
2364         if (nritems == 0) {
2365                 if (leaf == root->node) {
2366                         btrfs_set_header_level(leaf, 0);
2367                 } else {
2368                         clean_tree_block(trans, root, leaf);
2369                         wait_on_tree_block_writeback(root, leaf);
2370                         wret = del_ptr(trans, root, path, 1, path->slots[1]);
2371                         if (wret)
2372                                 ret = wret;
2373                         wret = btrfs_free_extent(trans, root,
2374                                                  leaf->start, leaf->len, 1);
2375                         if (wret)
2376                                 ret = wret;
2377                 }
2378         } else {
2379                 int used = leaf_space_used(leaf, 0, nritems);
2380                 if (slot == 0) {
2381                         struct btrfs_disk_key disk_key;
2382
2383                         btrfs_item_key(leaf, &disk_key, 0);
2384                         wret = fixup_low_keys(trans, root, path,
2385                                               &disk_key, 1);
2386                         if (wret)
2387                                 ret = wret;
2388                 }
2389
2390                 /* delete the leaf if it is mostly empty */
2391                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
2392                         /* push_leaf_left fixes the path.
2393                          * make sure the path still points to our leaf
2394                          * for possible call to del_ptr below
2395                          */
2396                         slot = path->slots[1];
2397                         extent_buffer_get(leaf);
2398
2399                         wret = push_leaf_right(trans, root, path, 1);
2400                         if (wret < 0 && wret != -ENOSPC)
2401                                 ret = wret;
2402
2403                         if (path->nodes[0] == leaf &&
2404                             btrfs_header_nritems(leaf)) {
2405                                 wret = push_leaf_left(trans, root, path, 1);
2406                                 if (wret < 0 && wret != -ENOSPC)
2407                                         ret = wret;
2408                         }
2409
2410                         if (btrfs_header_nritems(leaf) == 0) {
2411                                 u64 bytenr = leaf->start;
2412                                 u32 blocksize = leaf->len;
2413
2414                                 clean_tree_block(trans, root, leaf);
2415                                 wait_on_tree_block_writeback(root, leaf);
2416
2417                                 wret = del_ptr(trans, root, path, 1, slot);
2418                                 if (wret)
2419                                         ret = wret;
2420
2421                                 free_extent_buffer(leaf);
2422                                 wret = btrfs_free_extent(trans, root, bytenr,
2423                                                          blocksize, 1);
2424                                 if (wret)
2425                                         ret = wret;
2426                         } else {
2427                                 btrfs_mark_buffer_dirty(leaf);
2428                                 free_extent_buffer(leaf);
2429                         }
2430                 } else {
2431                         btrfs_mark_buffer_dirty(leaf);
2432                 }
2433         }
2434         return ret;
2435 }
2436
2437 /*
2438  * walk up the tree as far as required to find the next leaf.
2439  * returns 0 if it found something or 1 if there are no greater leaves.
2440  * returns < 0 on io errors.
2441  */
2442 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2443 {
2444         int slot;
2445         int level = 1;
2446         u64 bytenr;
2447         struct extent_buffer *c;
2448         struct extent_buffer *next = NULL;
2449
2450         while(level < BTRFS_MAX_LEVEL) {
2451                 if (!path->nodes[level])
2452                         return 1;
2453
2454                 slot = path->slots[level] + 1;
2455                 c = path->nodes[level];
2456                 if (slot >= btrfs_header_nritems(c)) {
2457                         level++;
2458                         continue;
2459                 }
2460
2461                 bytenr = btrfs_node_blockptr(c, slot);
2462                 if (next)
2463                         free_extent_buffer(next);
2464
2465                 if (path->reada)
2466                         reada_for_search(root, path, level, slot);
2467
2468                 next = read_tree_block(root, bytenr,
2469                                        btrfs_level_size(root, level -1));
2470                 break;
2471         }
2472         path->slots[level] = slot;
2473         while(1) {
2474                 level--;
2475                 c = path->nodes[level];
2476                 free_extent_buffer(c);
2477                 path->nodes[level] = next;
2478                 path->slots[level] = 0;
2479                 if (!level)
2480                         break;
2481                 if (path->reada)
2482                         reada_for_search(root, path, level, 0);
2483                 next = read_tree_block(root, btrfs_node_blockptr(next, 0),
2484                                        btrfs_level_size(root, level - 1));
2485         }
2486         return 0;
2487 }