ocfs2: Improve ocfs2_read_xattr_bucket().
[pandora-kernel.git] / fs / ocfs2 / xattr.c
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * xattr.c
5  *
6  * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
7  *
8  * CREDITS:
9  * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
10  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public
14  * License version 2 as published by the Free Software Foundation.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * General Public License for more details.
20  */
21
22 #include <linux/capability.h>
23 #include <linux/fs.h>
24 #include <linux/types.h>
25 #include <linux/slab.h>
26 #include <linux/highmem.h>
27 #include <linux/pagemap.h>
28 #include <linux/uio.h>
29 #include <linux/sched.h>
30 #include <linux/splice.h>
31 #include <linux/mount.h>
32 #include <linux/writeback.h>
33 #include <linux/falloc.h>
34 #include <linux/sort.h>
35 #include <linux/init.h>
36 #include <linux/module.h>
37 #include <linux/string.h>
38
39 #define MLOG_MASK_PREFIX ML_XATTR
40 #include <cluster/masklog.h>
41
42 #include "ocfs2.h"
43 #include "alloc.h"
44 #include "dlmglue.h"
45 #include "file.h"
46 #include "symlink.h"
47 #include "sysfile.h"
48 #include "inode.h"
49 #include "journal.h"
50 #include "ocfs2_fs.h"
51 #include "suballoc.h"
52 #include "uptodate.h"
53 #include "buffer_head_io.h"
54 #include "super.h"
55 #include "xattr.h"
56
57
58 struct ocfs2_xattr_def_value_root {
59         struct ocfs2_xattr_value_root   xv;
60         struct ocfs2_extent_rec         er;
61 };
62
63 struct ocfs2_xattr_bucket {
64         struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
65 };
66
67 #define OCFS2_XATTR_ROOT_SIZE   (sizeof(struct ocfs2_xattr_def_value_root))
68 #define OCFS2_XATTR_INLINE_SIZE 80
69
70 static struct ocfs2_xattr_def_value_root def_xv = {
71         .xv.xr_list.l_count = cpu_to_le16(1),
72 };
73
74 struct xattr_handler *ocfs2_xattr_handlers[] = {
75         &ocfs2_xattr_user_handler,
76         &ocfs2_xattr_trusted_handler,
77         NULL
78 };
79
80 static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
81         [OCFS2_XATTR_INDEX_USER]        = &ocfs2_xattr_user_handler,
82         [OCFS2_XATTR_INDEX_TRUSTED]     = &ocfs2_xattr_trusted_handler,
83 };
84
85 struct ocfs2_xattr_info {
86         int name_index;
87         const char *name;
88         const void *value;
89         size_t value_len;
90 };
91
92 struct ocfs2_xattr_search {
93         struct buffer_head *inode_bh;
94         /*
95          * xattr_bh point to the block buffer head which has extended attribute
96          * when extended attribute in inode, xattr_bh is equal to inode_bh.
97          */
98         struct buffer_head *xattr_bh;
99         struct ocfs2_xattr_header *header;
100         struct ocfs2_xattr_bucket bucket;
101         void *base;
102         void *end;
103         struct ocfs2_xattr_entry *here;
104         int not_found;
105 };
106
107 static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
108                                              struct ocfs2_xattr_header *xh,
109                                              int index,
110                                              int *block_off,
111                                              int *new_offset);
112
113 static int ocfs2_xattr_block_find(struct inode *inode,
114                                   int name_index,
115                                   const char *name,
116                                   struct ocfs2_xattr_search *xs);
117 static int ocfs2_xattr_index_block_find(struct inode *inode,
118                                         struct buffer_head *root_bh,
119                                         int name_index,
120                                         const char *name,
121                                         struct ocfs2_xattr_search *xs);
122
123 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
124                                         struct ocfs2_xattr_tree_root *xt,
125                                         char *buffer,
126                                         size_t buffer_size);
127
128 static int ocfs2_xattr_create_index_block(struct inode *inode,
129                                           struct ocfs2_xattr_search *xs);
130
131 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
132                                              struct ocfs2_xattr_info *xi,
133                                              struct ocfs2_xattr_search *xs);
134
135 static int ocfs2_delete_xattr_index_block(struct inode *inode,
136                                           struct buffer_head *xb_bh);
137
138 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
139 {
140         return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
141 }
142
143 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
144 {
145         return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
146 }
147
148 static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
149 {
150         u16 len = sb->s_blocksize -
151                  offsetof(struct ocfs2_xattr_header, xh_entries);
152
153         return len / sizeof(struct ocfs2_xattr_entry);
154 }
155
156 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
157 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
158 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
159
160 static void ocfs2_xattr_bucket_relse(struct inode *inode,
161                                      struct ocfs2_xattr_bucket *bucket)
162 {
163         int i, blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
164
165         for (i = 0; i < blks; i++) {
166                 brelse(bucket->bu_bhs[i]);
167                 bucket->bu_bhs[i] = NULL;
168         }
169 }
170
171 /*
172  * A bucket that has never been written to disk doesn't need to be
173  * read.  We just need the buffer_heads.  Don't call this for
174  * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
175  * them fully.
176  */
177 static int ocfs2_init_xattr_bucket(struct inode *inode,
178                                    struct ocfs2_xattr_bucket *bucket,
179                                    u64 xb_blkno)
180 {
181         int i, rc = 0;
182         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
183
184         for (i = 0; i < blks; i++) {
185                 bucket->bu_bhs[i] = sb_getblk(inode->i_sb, xb_blkno + i);
186                 if (!bucket->bu_bhs[i]) {
187                         rc = -EIO;
188                         mlog_errno(rc);
189                         break;
190                 }
191
192                 ocfs2_set_new_buffer_uptodate(inode, bucket->bu_bhs[i]);
193         }
194
195         if (rc)
196                 ocfs2_xattr_bucket_relse(inode, bucket);
197         return rc;
198 }
199
200 /* Read the xattr bucket at xb_blkno */
201 static int ocfs2_read_xattr_bucket(struct inode *inode,
202                                    struct ocfs2_xattr_bucket *bucket,
203                                    u64 xb_blkno)
204 {
205         int rc, blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
206
207         rc = ocfs2_read_blocks(inode, xb_blkno, blks, bucket->bu_bhs, 0);
208         if (rc)
209                 ocfs2_xattr_bucket_relse(inode, bucket);
210         return rc;
211 }
212
213 static inline const char *ocfs2_xattr_prefix(int name_index)
214 {
215         struct xattr_handler *handler = NULL;
216
217         if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
218                 handler = ocfs2_xattr_handler_map[name_index];
219
220         return handler ? handler->prefix : NULL;
221 }
222
223 static u32 ocfs2_xattr_name_hash(struct inode *inode,
224                                  const char *name,
225                                  int name_len)
226 {
227         /* Get hash value of uuid from super block */
228         u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
229         int i;
230
231         /* hash extended attribute name */
232         for (i = 0; i < name_len; i++) {
233                 hash = (hash << OCFS2_HASH_SHIFT) ^
234                        (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
235                        *name++;
236         }
237
238         return hash;
239 }
240
241 /*
242  * ocfs2_xattr_hash_entry()
243  *
244  * Compute the hash of an extended attribute.
245  */
246 static void ocfs2_xattr_hash_entry(struct inode *inode,
247                                    struct ocfs2_xattr_header *header,
248                                    struct ocfs2_xattr_entry *entry)
249 {
250         u32 hash = 0;
251         char *name = (char *)header + le16_to_cpu(entry->xe_name_offset);
252
253         hash = ocfs2_xattr_name_hash(inode, name, entry->xe_name_len);
254         entry->xe_name_hash = cpu_to_le32(hash);
255
256         return;
257 }
258
259 static int ocfs2_xattr_extend_allocation(struct inode *inode,
260                                          u32 clusters_to_add,
261                                          struct buffer_head *xattr_bh,
262                                          struct ocfs2_xattr_value_root *xv)
263 {
264         int status = 0;
265         int restart_func = 0;
266         int credits = 0;
267         handle_t *handle = NULL;
268         struct ocfs2_alloc_context *data_ac = NULL;
269         struct ocfs2_alloc_context *meta_ac = NULL;
270         enum ocfs2_alloc_restarted why;
271         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
272         u32 prev_clusters, logical_start = le32_to_cpu(xv->xr_clusters);
273         struct ocfs2_extent_tree et;
274
275         mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
276
277         ocfs2_init_xattr_value_extent_tree(&et, inode, xattr_bh, xv);
278
279 restart_all:
280
281         status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
282                                        &data_ac, &meta_ac);
283         if (status) {
284                 mlog_errno(status);
285                 goto leave;
286         }
287
288         credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el,
289                                             clusters_to_add);
290         handle = ocfs2_start_trans(osb, credits);
291         if (IS_ERR(handle)) {
292                 status = PTR_ERR(handle);
293                 handle = NULL;
294                 mlog_errno(status);
295                 goto leave;
296         }
297
298 restarted_transaction:
299         status = ocfs2_journal_access(handle, inode, xattr_bh,
300                                       OCFS2_JOURNAL_ACCESS_WRITE);
301         if (status < 0) {
302                 mlog_errno(status);
303                 goto leave;
304         }
305
306         prev_clusters = le32_to_cpu(xv->xr_clusters);
307         status = ocfs2_add_clusters_in_btree(osb,
308                                              inode,
309                                              &logical_start,
310                                              clusters_to_add,
311                                              0,
312                                              &et,
313                                              handle,
314                                              data_ac,
315                                              meta_ac,
316                                              &why);
317         if ((status < 0) && (status != -EAGAIN)) {
318                 if (status != -ENOSPC)
319                         mlog_errno(status);
320                 goto leave;
321         }
322
323         status = ocfs2_journal_dirty(handle, xattr_bh);
324         if (status < 0) {
325                 mlog_errno(status);
326                 goto leave;
327         }
328
329         clusters_to_add -= le32_to_cpu(xv->xr_clusters) - prev_clusters;
330
331         if (why != RESTART_NONE && clusters_to_add) {
332                 if (why == RESTART_META) {
333                         mlog(0, "restarting function.\n");
334                         restart_func = 1;
335                 } else {
336                         BUG_ON(why != RESTART_TRANS);
337
338                         mlog(0, "restarting transaction.\n");
339                         /* TODO: This can be more intelligent. */
340                         credits = ocfs2_calc_extend_credits(osb->sb,
341                                                             et.et_root_el,
342                                                             clusters_to_add);
343                         status = ocfs2_extend_trans(handle, credits);
344                         if (status < 0) {
345                                 /* handle still has to be committed at
346                                  * this point. */
347                                 status = -ENOMEM;
348                                 mlog_errno(status);
349                                 goto leave;
350                         }
351                         goto restarted_transaction;
352                 }
353         }
354
355 leave:
356         if (handle) {
357                 ocfs2_commit_trans(osb, handle);
358                 handle = NULL;
359         }
360         if (data_ac) {
361                 ocfs2_free_alloc_context(data_ac);
362                 data_ac = NULL;
363         }
364         if (meta_ac) {
365                 ocfs2_free_alloc_context(meta_ac);
366                 meta_ac = NULL;
367         }
368         if ((!status) && restart_func) {
369                 restart_func = 0;
370                 goto restart_all;
371         }
372
373         return status;
374 }
375
376 static int __ocfs2_remove_xattr_range(struct inode *inode,
377                                       struct buffer_head *root_bh,
378                                       struct ocfs2_xattr_value_root *xv,
379                                       u32 cpos, u32 phys_cpos, u32 len,
380                                       struct ocfs2_cached_dealloc_ctxt *dealloc)
381 {
382         int ret;
383         u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
384         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
385         struct inode *tl_inode = osb->osb_tl_inode;
386         handle_t *handle;
387         struct ocfs2_alloc_context *meta_ac = NULL;
388         struct ocfs2_extent_tree et;
389
390         ocfs2_init_xattr_value_extent_tree(&et, inode, root_bh, xv);
391
392         ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
393         if (ret) {
394                 mlog_errno(ret);
395                 return ret;
396         }
397
398         mutex_lock(&tl_inode->i_mutex);
399
400         if (ocfs2_truncate_log_needs_flush(osb)) {
401                 ret = __ocfs2_flush_truncate_log(osb);
402                 if (ret < 0) {
403                         mlog_errno(ret);
404                         goto out;
405                 }
406         }
407
408         handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
409         if (IS_ERR(handle)) {
410                 ret = PTR_ERR(handle);
411                 mlog_errno(ret);
412                 goto out;
413         }
414
415         ret = ocfs2_journal_access(handle, inode, root_bh,
416                                    OCFS2_JOURNAL_ACCESS_WRITE);
417         if (ret) {
418                 mlog_errno(ret);
419                 goto out_commit;
420         }
421
422         ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
423                                   dealloc);
424         if (ret) {
425                 mlog_errno(ret);
426                 goto out_commit;
427         }
428
429         le32_add_cpu(&xv->xr_clusters, -len);
430
431         ret = ocfs2_journal_dirty(handle, root_bh);
432         if (ret) {
433                 mlog_errno(ret);
434                 goto out_commit;
435         }
436
437         ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
438         if (ret)
439                 mlog_errno(ret);
440
441 out_commit:
442         ocfs2_commit_trans(osb, handle);
443 out:
444         mutex_unlock(&tl_inode->i_mutex);
445
446         if (meta_ac)
447                 ocfs2_free_alloc_context(meta_ac);
448
449         return ret;
450 }
451
452 static int ocfs2_xattr_shrink_size(struct inode *inode,
453                                    u32 old_clusters,
454                                    u32 new_clusters,
455                                    struct buffer_head *root_bh,
456                                    struct ocfs2_xattr_value_root *xv)
457 {
458         int ret = 0;
459         u32 trunc_len, cpos, phys_cpos, alloc_size;
460         u64 block;
461         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
462         struct ocfs2_cached_dealloc_ctxt dealloc;
463
464         ocfs2_init_dealloc_ctxt(&dealloc);
465
466         if (old_clusters <= new_clusters)
467                 return 0;
468
469         cpos = new_clusters;
470         trunc_len = old_clusters - new_clusters;
471         while (trunc_len) {
472                 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
473                                                &alloc_size, &xv->xr_list);
474                 if (ret) {
475                         mlog_errno(ret);
476                         goto out;
477                 }
478
479                 if (alloc_size > trunc_len)
480                         alloc_size = trunc_len;
481
482                 ret = __ocfs2_remove_xattr_range(inode, root_bh, xv, cpos,
483                                                  phys_cpos, alloc_size,
484                                                  &dealloc);
485                 if (ret) {
486                         mlog_errno(ret);
487                         goto out;
488                 }
489
490                 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
491                 ocfs2_remove_xattr_clusters_from_cache(inode, block,
492                                                        alloc_size);
493                 cpos += alloc_size;
494                 trunc_len -= alloc_size;
495         }
496
497 out:
498         ocfs2_schedule_truncate_log_flush(osb, 1);
499         ocfs2_run_deallocs(osb, &dealloc);
500
501         return ret;
502 }
503
504 static int ocfs2_xattr_value_truncate(struct inode *inode,
505                                       struct buffer_head *root_bh,
506                                       struct ocfs2_xattr_value_root *xv,
507                                       int len)
508 {
509         int ret;
510         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
511         u32 old_clusters = le32_to_cpu(xv->xr_clusters);
512
513         if (new_clusters == old_clusters)
514                 return 0;
515
516         if (new_clusters > old_clusters)
517                 ret = ocfs2_xattr_extend_allocation(inode,
518                                                     new_clusters - old_clusters,
519                                                     root_bh, xv);
520         else
521                 ret = ocfs2_xattr_shrink_size(inode,
522                                               old_clusters, new_clusters,
523                                               root_bh, xv);
524
525         return ret;
526 }
527
528 static int ocfs2_xattr_list_entry(char *buffer, size_t size,
529                                   size_t *result, const char *prefix,
530                                   const char *name, int name_len)
531 {
532         char *p = buffer + *result;
533         int prefix_len = strlen(prefix);
534         int total_len = prefix_len + name_len + 1;
535
536         *result += total_len;
537
538         /* we are just looking for how big our buffer needs to be */
539         if (!size)
540                 return 0;
541
542         if (*result > size)
543                 return -ERANGE;
544
545         memcpy(p, prefix, prefix_len);
546         memcpy(p + prefix_len, name, name_len);
547         p[prefix_len + name_len] = '\0';
548
549         return 0;
550 }
551
552 static int ocfs2_xattr_list_entries(struct inode *inode,
553                                     struct ocfs2_xattr_header *header,
554                                     char *buffer, size_t buffer_size)
555 {
556         size_t result = 0;
557         int i, type, ret;
558         const char *prefix, *name;
559
560         for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
561                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
562                 type = ocfs2_xattr_get_type(entry);
563                 prefix = ocfs2_xattr_prefix(type);
564
565                 if (prefix) {
566                         name = (const char *)header +
567                                 le16_to_cpu(entry->xe_name_offset);
568
569                         ret = ocfs2_xattr_list_entry(buffer, buffer_size,
570                                                      &result, prefix, name,
571                                                      entry->xe_name_len);
572                         if (ret)
573                                 return ret;
574                 }
575         }
576
577         return result;
578 }
579
580 static int ocfs2_xattr_ibody_list(struct inode *inode,
581                                   struct ocfs2_dinode *di,
582                                   char *buffer,
583                                   size_t buffer_size)
584 {
585         struct ocfs2_xattr_header *header = NULL;
586         struct ocfs2_inode_info *oi = OCFS2_I(inode);
587         int ret = 0;
588
589         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
590                 return ret;
591
592         header = (struct ocfs2_xattr_header *)
593                  ((void *)di + inode->i_sb->s_blocksize -
594                  le16_to_cpu(di->i_xattr_inline_size));
595
596         ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
597
598         return ret;
599 }
600
601 static int ocfs2_xattr_block_list(struct inode *inode,
602                                   struct ocfs2_dinode *di,
603                                   char *buffer,
604                                   size_t buffer_size)
605 {
606         struct buffer_head *blk_bh = NULL;
607         struct ocfs2_xattr_block *xb;
608         int ret = 0;
609
610         if (!di->i_xattr_loc)
611                 return ret;
612
613         ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh);
614         if (ret < 0) {
615                 mlog_errno(ret);
616                 return ret;
617         }
618
619         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
620         if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
621                 ret = -EIO;
622                 goto cleanup;
623         }
624
625         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
626                 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
627                 ret = ocfs2_xattr_list_entries(inode, header,
628                                                buffer, buffer_size);
629         } else {
630                 struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
631                 ret = ocfs2_xattr_tree_list_index_block(inode, xt,
632                                                    buffer, buffer_size);
633         }
634 cleanup:
635         brelse(blk_bh);
636
637         return ret;
638 }
639
640 ssize_t ocfs2_listxattr(struct dentry *dentry,
641                         char *buffer,
642                         size_t size)
643 {
644         int ret = 0, i_ret = 0, b_ret = 0;
645         struct buffer_head *di_bh = NULL;
646         struct ocfs2_dinode *di = NULL;
647         struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
648
649         if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
650                 return -EOPNOTSUPP;
651
652         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
653                 return ret;
654
655         ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
656         if (ret < 0) {
657                 mlog_errno(ret);
658                 return ret;
659         }
660
661         di = (struct ocfs2_dinode *)di_bh->b_data;
662
663         down_read(&oi->ip_xattr_sem);
664         i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
665         if (i_ret < 0)
666                 b_ret = 0;
667         else {
668                 if (buffer) {
669                         buffer += i_ret;
670                         size -= i_ret;
671                 }
672                 b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
673                                                buffer, size);
674                 if (b_ret < 0)
675                         i_ret = 0;
676         }
677         up_read(&oi->ip_xattr_sem);
678         ocfs2_inode_unlock(dentry->d_inode, 0);
679
680         brelse(di_bh);
681
682         return i_ret + b_ret;
683 }
684
685 static int ocfs2_xattr_find_entry(int name_index,
686                                   const char *name,
687                                   struct ocfs2_xattr_search *xs)
688 {
689         struct ocfs2_xattr_entry *entry;
690         size_t name_len;
691         int i, cmp = 1;
692
693         if (name == NULL)
694                 return -EINVAL;
695
696         name_len = strlen(name);
697         entry = xs->here;
698         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
699                 cmp = name_index - ocfs2_xattr_get_type(entry);
700                 if (!cmp)
701                         cmp = name_len - entry->xe_name_len;
702                 if (!cmp)
703                         cmp = memcmp(name, (xs->base +
704                                      le16_to_cpu(entry->xe_name_offset)),
705                                      name_len);
706                 if (cmp == 0)
707                         break;
708                 entry += 1;
709         }
710         xs->here = entry;
711
712         return cmp ? -ENODATA : 0;
713 }
714
715 static int ocfs2_xattr_get_value_outside(struct inode *inode,
716                                          struct ocfs2_xattr_value_root *xv,
717                                          void *buffer,
718                                          size_t len)
719 {
720         u32 cpos, p_cluster, num_clusters, bpc, clusters;
721         u64 blkno;
722         int i, ret = 0;
723         size_t cplen, blocksize;
724         struct buffer_head *bh = NULL;
725         struct ocfs2_extent_list *el;
726
727         el = &xv->xr_list;
728         clusters = le32_to_cpu(xv->xr_clusters);
729         bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
730         blocksize = inode->i_sb->s_blocksize;
731
732         cpos = 0;
733         while (cpos < clusters) {
734                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
735                                                &num_clusters, el);
736                 if (ret) {
737                         mlog_errno(ret);
738                         goto out;
739                 }
740
741                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
742                 /* Copy ocfs2_xattr_value */
743                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
744                         ret = ocfs2_read_block(inode, blkno, &bh);
745                         if (ret) {
746                                 mlog_errno(ret);
747                                 goto out;
748                         }
749
750                         cplen = len >= blocksize ? blocksize : len;
751                         memcpy(buffer, bh->b_data, cplen);
752                         len -= cplen;
753                         buffer += cplen;
754
755                         brelse(bh);
756                         bh = NULL;
757                         if (len == 0)
758                                 break;
759                 }
760                 cpos += num_clusters;
761         }
762 out:
763         return ret;
764 }
765
766 static int ocfs2_xattr_ibody_get(struct inode *inode,
767                                  int name_index,
768                                  const char *name,
769                                  void *buffer,
770                                  size_t buffer_size,
771                                  struct ocfs2_xattr_search *xs)
772 {
773         struct ocfs2_inode_info *oi = OCFS2_I(inode);
774         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
775         struct ocfs2_xattr_value_root *xv;
776         size_t size;
777         int ret = 0;
778
779         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
780                 return -ENODATA;
781
782         xs->end = (void *)di + inode->i_sb->s_blocksize;
783         xs->header = (struct ocfs2_xattr_header *)
784                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
785         xs->base = (void *)xs->header;
786         xs->here = xs->header->xh_entries;
787
788         ret = ocfs2_xattr_find_entry(name_index, name, xs);
789         if (ret)
790                 return ret;
791         size = le64_to_cpu(xs->here->xe_value_size);
792         if (buffer) {
793                 if (size > buffer_size)
794                         return -ERANGE;
795                 if (ocfs2_xattr_is_local(xs->here)) {
796                         memcpy(buffer, (void *)xs->base +
797                                le16_to_cpu(xs->here->xe_name_offset) +
798                                OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
799                 } else {
800                         xv = (struct ocfs2_xattr_value_root *)
801                                 (xs->base + le16_to_cpu(
802                                  xs->here->xe_name_offset) +
803                                 OCFS2_XATTR_SIZE(xs->here->xe_name_len));
804                         ret = ocfs2_xattr_get_value_outside(inode, xv,
805                                                             buffer, size);
806                         if (ret < 0) {
807                                 mlog_errno(ret);
808                                 return ret;
809                         }
810                 }
811         }
812
813         return size;
814 }
815
816 static int ocfs2_xattr_block_get(struct inode *inode,
817                                  int name_index,
818                                  const char *name,
819                                  void *buffer,
820                                  size_t buffer_size,
821                                  struct ocfs2_xattr_search *xs)
822 {
823         struct ocfs2_xattr_block *xb;
824         struct ocfs2_xattr_value_root *xv;
825         size_t size;
826         int ret = -ENODATA, name_offset, name_len, block_off, i;
827
828         memset(&xs->bucket, 0, sizeof(xs->bucket));
829
830         ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
831         if (ret) {
832                 mlog_errno(ret);
833                 goto cleanup;
834         }
835
836         if (xs->not_found) {
837                 ret = -ENODATA;
838                 goto cleanup;
839         }
840
841         xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
842         size = le64_to_cpu(xs->here->xe_value_size);
843         if (buffer) {
844                 ret = -ERANGE;
845                 if (size > buffer_size)
846                         goto cleanup;
847
848                 name_offset = le16_to_cpu(xs->here->xe_name_offset);
849                 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
850                 i = xs->here - xs->header->xh_entries;
851
852                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
853                         ret = ocfs2_xattr_bucket_get_name_value(inode,
854                                                                 bucket_xh(&xs->bucket),
855                                                                 i,
856                                                                 &block_off,
857                                                                 &name_offset);
858                         xs->base = bucket_block(&xs->bucket, block_off);
859                 }
860                 if (ocfs2_xattr_is_local(xs->here)) {
861                         memcpy(buffer, (void *)xs->base +
862                                name_offset + name_len, size);
863                 } else {
864                         xv = (struct ocfs2_xattr_value_root *)
865                                 (xs->base + name_offset + name_len);
866                         ret = ocfs2_xattr_get_value_outside(inode, xv,
867                                                             buffer, size);
868                         if (ret < 0) {
869                                 mlog_errno(ret);
870                                 goto cleanup;
871                         }
872                 }
873         }
874         ret = size;
875 cleanup:
876         ocfs2_xattr_bucket_relse(inode, &xs->bucket);
877         memset(&xs->bucket, 0, sizeof(xs->bucket));
878
879         brelse(xs->xattr_bh);
880         xs->xattr_bh = NULL;
881         return ret;
882 }
883
884 /* ocfs2_xattr_get()
885  *
886  * Copy an extended attribute into the buffer provided.
887  * Buffer is NULL to compute the size of buffer required.
888  */
889 static int ocfs2_xattr_get(struct inode *inode,
890                            int name_index,
891                            const char *name,
892                            void *buffer,
893                            size_t buffer_size)
894 {
895         int ret;
896         struct ocfs2_dinode *di = NULL;
897         struct buffer_head *di_bh = NULL;
898         struct ocfs2_inode_info *oi = OCFS2_I(inode);
899         struct ocfs2_xattr_search xis = {
900                 .not_found = -ENODATA,
901         };
902         struct ocfs2_xattr_search xbs = {
903                 .not_found = -ENODATA,
904         };
905
906         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
907                 return -EOPNOTSUPP;
908
909         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
910                 ret = -ENODATA;
911
912         ret = ocfs2_inode_lock(inode, &di_bh, 0);
913         if (ret < 0) {
914                 mlog_errno(ret);
915                 return ret;
916         }
917         xis.inode_bh = xbs.inode_bh = di_bh;
918         di = (struct ocfs2_dinode *)di_bh->b_data;
919
920         down_read(&oi->ip_xattr_sem);
921         ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
922                                     buffer_size, &xis);
923         if (ret == -ENODATA && di->i_xattr_loc)
924                 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
925                                             buffer_size, &xbs);
926         up_read(&oi->ip_xattr_sem);
927         ocfs2_inode_unlock(inode, 0);
928
929         brelse(di_bh);
930
931         return ret;
932 }
933
934 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
935                                            struct ocfs2_xattr_value_root *xv,
936                                            const void *value,
937                                            int value_len)
938 {
939         int ret = 0, i, cp_len, credits;
940         u16 blocksize = inode->i_sb->s_blocksize;
941         u32 p_cluster, num_clusters;
942         u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
943         u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
944         u64 blkno;
945         struct buffer_head *bh = NULL;
946         handle_t *handle;
947
948         BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
949
950         credits = clusters * bpc;
951         handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), credits);
952         if (IS_ERR(handle)) {
953                 ret = PTR_ERR(handle);
954                 mlog_errno(ret);
955                 goto out;
956         }
957
958         while (cpos < clusters) {
959                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
960                                                &num_clusters, &xv->xr_list);
961                 if (ret) {
962                         mlog_errno(ret);
963                         goto out_commit;
964                 }
965
966                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
967
968                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
969                         ret = ocfs2_read_block(inode, blkno, &bh);
970                         if (ret) {
971                                 mlog_errno(ret);
972                                 goto out_commit;
973                         }
974
975                         ret = ocfs2_journal_access(handle,
976                                                    inode,
977                                                    bh,
978                                                    OCFS2_JOURNAL_ACCESS_WRITE);
979                         if (ret < 0) {
980                                 mlog_errno(ret);
981                                 goto out_commit;
982                         }
983
984                         cp_len = value_len > blocksize ? blocksize : value_len;
985                         memcpy(bh->b_data, value, cp_len);
986                         value_len -= cp_len;
987                         value += cp_len;
988                         if (cp_len < blocksize)
989                                 memset(bh->b_data + cp_len, 0,
990                                        blocksize - cp_len);
991
992                         ret = ocfs2_journal_dirty(handle, bh);
993                         if (ret < 0) {
994                                 mlog_errno(ret);
995                                 goto out_commit;
996                         }
997                         brelse(bh);
998                         bh = NULL;
999
1000                         /*
1001                          * XXX: do we need to empty all the following
1002                          * blocks in this cluster?
1003                          */
1004                         if (!value_len)
1005                                 break;
1006                 }
1007                 cpos += num_clusters;
1008         }
1009 out_commit:
1010         ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1011 out:
1012         brelse(bh);
1013
1014         return ret;
1015 }
1016
1017 static int ocfs2_xattr_cleanup(struct inode *inode,
1018                                struct ocfs2_xattr_info *xi,
1019                                struct ocfs2_xattr_search *xs,
1020                                size_t offs)
1021 {
1022         handle_t *handle = NULL;
1023         int ret = 0;
1024         size_t name_len = strlen(xi->name);
1025         void *val = xs->base + offs;
1026         size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1027
1028         handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1029                                    OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
1030         if (IS_ERR(handle)) {
1031                 ret = PTR_ERR(handle);
1032                 mlog_errno(ret);
1033                 goto out;
1034         }
1035         ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1036                                    OCFS2_JOURNAL_ACCESS_WRITE);
1037         if (ret) {
1038                 mlog_errno(ret);
1039                 goto out_commit;
1040         }
1041         /* Decrease xattr count */
1042         le16_add_cpu(&xs->header->xh_count, -1);
1043         /* Remove the xattr entry and tree root which has already be set*/
1044         memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry));
1045         memset(val, 0, size);
1046
1047         ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1048         if (ret < 0)
1049                 mlog_errno(ret);
1050 out_commit:
1051         ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1052 out:
1053         return ret;
1054 }
1055
1056 static int ocfs2_xattr_update_entry(struct inode *inode,
1057                                     struct ocfs2_xattr_info *xi,
1058                                     struct ocfs2_xattr_search *xs,
1059                                     size_t offs)
1060 {
1061         handle_t *handle = NULL;
1062         int ret = 0;
1063
1064         handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1065                                    OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
1066         if (IS_ERR(handle)) {
1067                 ret = PTR_ERR(handle);
1068                 mlog_errno(ret);
1069                 goto out;
1070         }
1071         ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1072                                    OCFS2_JOURNAL_ACCESS_WRITE);
1073         if (ret) {
1074                 mlog_errno(ret);
1075                 goto out_commit;
1076         }
1077
1078         xs->here->xe_name_offset = cpu_to_le16(offs);
1079         xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1080         if (xi->value_len <= OCFS2_XATTR_INLINE_SIZE)
1081                 ocfs2_xattr_set_local(xs->here, 1);
1082         else
1083                 ocfs2_xattr_set_local(xs->here, 0);
1084         ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1085
1086         ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1087         if (ret < 0)
1088                 mlog_errno(ret);
1089 out_commit:
1090         ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1091 out:
1092         return ret;
1093 }
1094
1095 /*
1096  * ocfs2_xattr_set_value_outside()
1097  *
1098  * Set large size value in B tree.
1099  */
1100 static int ocfs2_xattr_set_value_outside(struct inode *inode,
1101                                          struct ocfs2_xattr_info *xi,
1102                                          struct ocfs2_xattr_search *xs,
1103                                          size_t offs)
1104 {
1105         size_t name_len = strlen(xi->name);
1106         void *val = xs->base + offs;
1107         struct ocfs2_xattr_value_root *xv = NULL;
1108         size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1109         int ret = 0;
1110
1111         memset(val, 0, size);
1112         memcpy(val, xi->name, name_len);
1113         xv = (struct ocfs2_xattr_value_root *)
1114                 (val + OCFS2_XATTR_SIZE(name_len));
1115         xv->xr_clusters = 0;
1116         xv->xr_last_eb_blk = 0;
1117         xv->xr_list.l_tree_depth = 0;
1118         xv->xr_list.l_count = cpu_to_le16(1);
1119         xv->xr_list.l_next_free_rec = 0;
1120
1121         ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv,
1122                                          xi->value_len);
1123         if (ret < 0) {
1124                 mlog_errno(ret);
1125                 return ret;
1126         }
1127         ret = __ocfs2_xattr_set_value_outside(inode, xv, xi->value,
1128                                               xi->value_len);
1129         if (ret < 0) {
1130                 mlog_errno(ret);
1131                 return ret;
1132         }
1133         ret = ocfs2_xattr_update_entry(inode, xi, xs, offs);
1134         if (ret < 0)
1135                 mlog_errno(ret);
1136
1137         return ret;
1138 }
1139
1140 /*
1141  * ocfs2_xattr_set_entry_local()
1142  *
1143  * Set, replace or remove extended attribute in local.
1144  */
1145 static void ocfs2_xattr_set_entry_local(struct inode *inode,
1146                                         struct ocfs2_xattr_info *xi,
1147                                         struct ocfs2_xattr_search *xs,
1148                                         struct ocfs2_xattr_entry *last,
1149                                         size_t min_offs)
1150 {
1151         size_t name_len = strlen(xi->name);
1152         int i;
1153
1154         if (xi->value && xs->not_found) {
1155                 /* Insert the new xattr entry. */
1156                 le16_add_cpu(&xs->header->xh_count, 1);
1157                 ocfs2_xattr_set_type(last, xi->name_index);
1158                 ocfs2_xattr_set_local(last, 1);
1159                 last->xe_name_len = name_len;
1160         } else {
1161                 void *first_val;
1162                 void *val;
1163                 size_t offs, size;
1164
1165                 first_val = xs->base + min_offs;
1166                 offs = le16_to_cpu(xs->here->xe_name_offset);
1167                 val = xs->base + offs;
1168
1169                 if (le64_to_cpu(xs->here->xe_value_size) >
1170                     OCFS2_XATTR_INLINE_SIZE)
1171                         size = OCFS2_XATTR_SIZE(name_len) +
1172                                 OCFS2_XATTR_ROOT_SIZE;
1173                 else
1174                         size = OCFS2_XATTR_SIZE(name_len) +
1175                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1176
1177                 if (xi->value && size == OCFS2_XATTR_SIZE(name_len) +
1178                                 OCFS2_XATTR_SIZE(xi->value_len)) {
1179                         /* The old and the new value have the
1180                            same size. Just replace the value. */
1181                         ocfs2_xattr_set_local(xs->here, 1);
1182                         xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1183                         /* Clear value bytes. */
1184                         memset(val + OCFS2_XATTR_SIZE(name_len),
1185                                0,
1186                                OCFS2_XATTR_SIZE(xi->value_len));
1187                         memcpy(val + OCFS2_XATTR_SIZE(name_len),
1188                                xi->value,
1189                                xi->value_len);
1190                         return;
1191                 }
1192                 /* Remove the old name+value. */
1193                 memmove(first_val + size, first_val, val - first_val);
1194                 memset(first_val, 0, size);
1195                 xs->here->xe_name_hash = 0;
1196                 xs->here->xe_name_offset = 0;
1197                 ocfs2_xattr_set_local(xs->here, 1);
1198                 xs->here->xe_value_size = 0;
1199
1200                 min_offs += size;
1201
1202                 /* Adjust all value offsets. */
1203                 last = xs->header->xh_entries;
1204                 for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1205                         size_t o = le16_to_cpu(last->xe_name_offset);
1206
1207                         if (o < offs)
1208                                 last->xe_name_offset = cpu_to_le16(o + size);
1209                         last += 1;
1210                 }
1211
1212                 if (!xi->value) {
1213                         /* Remove the old entry. */
1214                         last -= 1;
1215                         memmove(xs->here, xs->here + 1,
1216                                 (void *)last - (void *)xs->here);
1217                         memset(last, 0, sizeof(struct ocfs2_xattr_entry));
1218                         le16_add_cpu(&xs->header->xh_count, -1);
1219                 }
1220         }
1221         if (xi->value) {
1222                 /* Insert the new name+value. */
1223                 size_t size = OCFS2_XATTR_SIZE(name_len) +
1224                                 OCFS2_XATTR_SIZE(xi->value_len);
1225                 void *val = xs->base + min_offs - size;
1226
1227                 xs->here->xe_name_offset = cpu_to_le16(min_offs - size);
1228                 memset(val, 0, size);
1229                 memcpy(val, xi->name, name_len);
1230                 memcpy(val + OCFS2_XATTR_SIZE(name_len),
1231                        xi->value,
1232                        xi->value_len);
1233                 xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1234                 ocfs2_xattr_set_local(xs->here, 1);
1235                 ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1236         }
1237
1238         return;
1239 }
1240
1241 /*
1242  * ocfs2_xattr_set_entry()
1243  *
1244  * Set extended attribute entry into inode or block.
1245  *
1246  * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE,
1247  * We first insert tree root(ocfs2_xattr_value_root) with set_entry_local(),
1248  * then set value in B tree with set_value_outside().
1249  */
1250 static int ocfs2_xattr_set_entry(struct inode *inode,
1251                                  struct ocfs2_xattr_info *xi,
1252                                  struct ocfs2_xattr_search *xs,
1253                                  int flag)
1254 {
1255         struct ocfs2_xattr_entry *last;
1256         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1257         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1258         size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name);
1259         size_t size_l = 0;
1260         handle_t *handle = NULL;
1261         int free, i, ret;
1262         struct ocfs2_xattr_info xi_l = {
1263                 .name_index = xi->name_index,
1264                 .name = xi->name,
1265                 .value = xi->value,
1266                 .value_len = xi->value_len,
1267         };
1268
1269         /* Compute min_offs, last and free space. */
1270         last = xs->header->xh_entries;
1271
1272         for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1273                 size_t offs = le16_to_cpu(last->xe_name_offset);
1274                 if (offs < min_offs)
1275                         min_offs = offs;
1276                 last += 1;
1277         }
1278
1279         free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
1280         if (free < 0)
1281                 return -EIO;
1282
1283         if (!xs->not_found) {
1284                 size_t size = 0;
1285                 if (ocfs2_xattr_is_local(xs->here))
1286                         size = OCFS2_XATTR_SIZE(name_len) +
1287                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1288                 else
1289                         size = OCFS2_XATTR_SIZE(name_len) +
1290                                 OCFS2_XATTR_ROOT_SIZE;
1291                 free += (size + sizeof(struct ocfs2_xattr_entry));
1292         }
1293         /* Check free space in inode or block */
1294         if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1295                 if (free < sizeof(struct ocfs2_xattr_entry) +
1296                            OCFS2_XATTR_SIZE(name_len) +
1297                            OCFS2_XATTR_ROOT_SIZE) {
1298                         ret = -ENOSPC;
1299                         goto out;
1300                 }
1301                 size_l = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1302                 xi_l.value = (void *)&def_xv;
1303                 xi_l.value_len = OCFS2_XATTR_ROOT_SIZE;
1304         } else if (xi->value) {
1305                 if (free < sizeof(struct ocfs2_xattr_entry) +
1306                            OCFS2_XATTR_SIZE(name_len) +
1307                            OCFS2_XATTR_SIZE(xi->value_len)) {
1308                         ret = -ENOSPC;
1309                         goto out;
1310                 }
1311         }
1312
1313         if (!xs->not_found) {
1314                 /* For existing extended attribute */
1315                 size_t size = OCFS2_XATTR_SIZE(name_len) +
1316                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1317                 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1318                 void *val = xs->base + offs;
1319
1320                 if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
1321                         /* Replace existing local xattr with tree root */
1322                         ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
1323                                                             offs);
1324                         if (ret < 0)
1325                                 mlog_errno(ret);
1326                         goto out;
1327                 } else if (!ocfs2_xattr_is_local(xs->here)) {
1328                         /* For existing xattr which has value outside */
1329                         struct ocfs2_xattr_value_root *xv = NULL;
1330                         xv = (struct ocfs2_xattr_value_root *)(val +
1331                                 OCFS2_XATTR_SIZE(name_len));
1332
1333                         if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1334                                 /*
1335                                  * If new value need set outside also,
1336                                  * first truncate old value to new value,
1337                                  * then set new value with set_value_outside().
1338                                  */
1339                                 ret = ocfs2_xattr_value_truncate(inode,
1340                                                                  xs->xattr_bh,
1341                                                                  xv,
1342                                                                  xi->value_len);
1343                                 if (ret < 0) {
1344                                         mlog_errno(ret);
1345                                         goto out;
1346                                 }
1347
1348                                 ret = __ocfs2_xattr_set_value_outside(inode,
1349                                                                 xv,
1350                                                                 xi->value,
1351                                                                 xi->value_len);
1352                                 if (ret < 0) {
1353                                         mlog_errno(ret);
1354                                         goto out;
1355                                 }
1356
1357                                 ret = ocfs2_xattr_update_entry(inode,
1358                                                                xi,
1359                                                                xs,
1360                                                                offs);
1361                                 if (ret < 0)
1362                                         mlog_errno(ret);
1363                                 goto out;
1364                         } else {
1365                                 /*
1366                                  * If new value need set in local,
1367                                  * just trucate old value to zero.
1368                                  */
1369                                  ret = ocfs2_xattr_value_truncate(inode,
1370                                                                  xs->xattr_bh,
1371                                                                  xv,
1372                                                                  0);
1373                                 if (ret < 0)
1374                                         mlog_errno(ret);
1375                         }
1376                 }
1377         }
1378
1379         handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1380                                    OCFS2_INODE_UPDATE_CREDITS);
1381         if (IS_ERR(handle)) {
1382                 ret = PTR_ERR(handle);
1383                 mlog_errno(ret);
1384                 goto out;
1385         }
1386
1387         ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
1388                                    OCFS2_JOURNAL_ACCESS_WRITE);
1389         if (ret) {
1390                 mlog_errno(ret);
1391                 goto out_commit;
1392         }
1393
1394         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1395                 /* set extended attribute in external block. */
1396                 ret = ocfs2_extend_trans(handle,
1397                                          OCFS2_INODE_UPDATE_CREDITS +
1398                                          OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
1399                 if (ret) {
1400                         mlog_errno(ret);
1401                         goto out_commit;
1402                 }
1403                 ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1404                                            OCFS2_JOURNAL_ACCESS_WRITE);
1405                 if (ret) {
1406                         mlog_errno(ret);
1407                         goto out_commit;
1408                 }
1409         }
1410
1411         /*
1412          * Set value in local, include set tree root in local.
1413          * This is the first step for value size >INLINE_SIZE.
1414          */
1415         ocfs2_xattr_set_entry_local(inode, &xi_l, xs, last, min_offs);
1416
1417         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1418                 ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1419                 if (ret < 0) {
1420                         mlog_errno(ret);
1421                         goto out_commit;
1422                 }
1423         }
1424
1425         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) &&
1426             (flag & OCFS2_INLINE_XATTR_FL)) {
1427                 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1428                 unsigned int xattrsize = osb->s_xattr_inline_size;
1429
1430                 /*
1431                  * Adjust extent record count or inline data size
1432                  * to reserve space for extended attribute.
1433                  */
1434                 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1435                         struct ocfs2_inline_data *idata = &di->id2.i_data;
1436                         le16_add_cpu(&idata->id_count, -xattrsize);
1437                 } else if (!(ocfs2_inode_is_fast_symlink(inode))) {
1438                         struct ocfs2_extent_list *el = &di->id2.i_list;
1439                         le16_add_cpu(&el->l_count, -(xattrsize /
1440                                         sizeof(struct ocfs2_extent_rec)));
1441                 }
1442                 di->i_xattr_inline_size = cpu_to_le16(xattrsize);
1443         }
1444         /* Update xattr flag */
1445         spin_lock(&oi->ip_lock);
1446         oi->ip_dyn_features |= flag;
1447         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1448         spin_unlock(&oi->ip_lock);
1449         /* Update inode ctime */
1450         inode->i_ctime = CURRENT_TIME;
1451         di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
1452         di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
1453
1454         ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1455         if (ret < 0)
1456                 mlog_errno(ret);
1457
1458 out_commit:
1459         ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1460
1461         if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1462                 /*
1463                  * Set value outside in B tree.
1464                  * This is the second step for value size > INLINE_SIZE.
1465                  */
1466                 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1467                 ret = ocfs2_xattr_set_value_outside(inode, xi, xs, offs);
1468                 if (ret < 0) {
1469                         int ret2;
1470
1471                         mlog_errno(ret);
1472                         /*
1473                          * If set value outside failed, we have to clean
1474                          * the junk tree root we have already set in local.
1475                          */
1476                         ret2 = ocfs2_xattr_cleanup(inode, xi, xs, offs);
1477                         if (ret2 < 0)
1478                                 mlog_errno(ret2);
1479                 }
1480         }
1481 out:
1482         return ret;
1483
1484 }
1485
1486 static int ocfs2_remove_value_outside(struct inode*inode,
1487                                       struct buffer_head *bh,
1488                                       struct ocfs2_xattr_header *header)
1489 {
1490         int ret = 0, i;
1491
1492         for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
1493                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
1494
1495                 if (!ocfs2_xattr_is_local(entry)) {
1496                         struct ocfs2_xattr_value_root *xv;
1497                         void *val;
1498
1499                         val = (void *)header +
1500                                 le16_to_cpu(entry->xe_name_offset);
1501                         xv = (struct ocfs2_xattr_value_root *)
1502                                 (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
1503                         ret = ocfs2_xattr_value_truncate(inode, bh, xv, 0);
1504                         if (ret < 0) {
1505                                 mlog_errno(ret);
1506                                 return ret;
1507                         }
1508                 }
1509         }
1510
1511         return ret;
1512 }
1513
1514 static int ocfs2_xattr_ibody_remove(struct inode *inode,
1515                                     struct buffer_head *di_bh)
1516 {
1517
1518         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1519         struct ocfs2_xattr_header *header;
1520         int ret;
1521
1522         header = (struct ocfs2_xattr_header *)
1523                  ((void *)di + inode->i_sb->s_blocksize -
1524                  le16_to_cpu(di->i_xattr_inline_size));
1525
1526         ret = ocfs2_remove_value_outside(inode, di_bh, header);
1527
1528         return ret;
1529 }
1530
1531 static int ocfs2_xattr_block_remove(struct inode *inode,
1532                                     struct buffer_head *blk_bh)
1533 {
1534         struct ocfs2_xattr_block *xb;
1535         int ret = 0;
1536
1537         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1538         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1539                 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
1540                 ret = ocfs2_remove_value_outside(inode, blk_bh, header);
1541         } else
1542                 ret = ocfs2_delete_xattr_index_block(inode, blk_bh);
1543
1544         return ret;
1545 }
1546
1547 static int ocfs2_xattr_free_block(struct inode *inode,
1548                                   u64 block)
1549 {
1550         struct inode *xb_alloc_inode;
1551         struct buffer_head *xb_alloc_bh = NULL;
1552         struct buffer_head *blk_bh = NULL;
1553         struct ocfs2_xattr_block *xb;
1554         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1555         handle_t *handle;
1556         int ret = 0;
1557         u64 blk, bg_blkno;
1558         u16 bit;
1559
1560         ret = ocfs2_read_block(inode, block, &blk_bh);
1561         if (ret < 0) {
1562                 mlog_errno(ret);
1563                 goto out;
1564         }
1565
1566         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1567         if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
1568                 ret = -EIO;
1569                 goto out;
1570         }
1571
1572         ret = ocfs2_xattr_block_remove(inode, blk_bh);
1573         if (ret < 0) {
1574                 mlog_errno(ret);
1575                 goto out;
1576         }
1577
1578         blk = le64_to_cpu(xb->xb_blkno);
1579         bit = le16_to_cpu(xb->xb_suballoc_bit);
1580         bg_blkno = ocfs2_which_suballoc_group(blk, bit);
1581
1582         xb_alloc_inode = ocfs2_get_system_file_inode(osb,
1583                                 EXTENT_ALLOC_SYSTEM_INODE,
1584                                 le16_to_cpu(xb->xb_suballoc_slot));
1585         if (!xb_alloc_inode) {
1586                 ret = -ENOMEM;
1587                 mlog_errno(ret);
1588                 goto out;
1589         }
1590         mutex_lock(&xb_alloc_inode->i_mutex);
1591
1592         ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
1593         if (ret < 0) {
1594                 mlog_errno(ret);
1595                 goto out_mutex;
1596         }
1597
1598         handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
1599         if (IS_ERR(handle)) {
1600                 ret = PTR_ERR(handle);
1601                 mlog_errno(ret);
1602                 goto out_unlock;
1603         }
1604
1605         ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
1606                                        bit, bg_blkno, 1);
1607         if (ret < 0)
1608                 mlog_errno(ret);
1609
1610         ocfs2_commit_trans(osb, handle);
1611 out_unlock:
1612         ocfs2_inode_unlock(xb_alloc_inode, 1);
1613         brelse(xb_alloc_bh);
1614 out_mutex:
1615         mutex_unlock(&xb_alloc_inode->i_mutex);
1616         iput(xb_alloc_inode);
1617 out:
1618         brelse(blk_bh);
1619         return ret;
1620 }
1621
1622 /*
1623  * ocfs2_xattr_remove()
1624  *
1625  * Free extended attribute resources associated with this inode.
1626  */
1627 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1628 {
1629         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1630         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1631         handle_t *handle;
1632         int ret;
1633
1634         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1635                 return 0;
1636
1637         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1638                 return 0;
1639
1640         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1641                 ret = ocfs2_xattr_ibody_remove(inode, di_bh);
1642                 if (ret < 0) {
1643                         mlog_errno(ret);
1644                         goto out;
1645                 }
1646         }
1647
1648         if (di->i_xattr_loc) {
1649                 ret = ocfs2_xattr_free_block(inode,
1650                                              le64_to_cpu(di->i_xattr_loc));
1651                 if (ret < 0) {
1652                         mlog_errno(ret);
1653                         goto out;
1654                 }
1655         }
1656
1657         handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1658                                    OCFS2_INODE_UPDATE_CREDITS);
1659         if (IS_ERR(handle)) {
1660                 ret = PTR_ERR(handle);
1661                 mlog_errno(ret);
1662                 goto out;
1663         }
1664         ret = ocfs2_journal_access(handle, inode, di_bh,
1665                                    OCFS2_JOURNAL_ACCESS_WRITE);
1666         if (ret) {
1667                 mlog_errno(ret);
1668                 goto out_commit;
1669         }
1670
1671         di->i_xattr_loc = 0;
1672
1673         spin_lock(&oi->ip_lock);
1674         oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
1675         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1676         spin_unlock(&oi->ip_lock);
1677
1678         ret = ocfs2_journal_dirty(handle, di_bh);
1679         if (ret < 0)
1680                 mlog_errno(ret);
1681 out_commit:
1682         ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1683 out:
1684         return ret;
1685 }
1686
1687 static int ocfs2_xattr_has_space_inline(struct inode *inode,
1688                                         struct ocfs2_dinode *di)
1689 {
1690         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1691         unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
1692         int free;
1693
1694         if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
1695                 return 0;
1696
1697         if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1698                 struct ocfs2_inline_data *idata = &di->id2.i_data;
1699                 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
1700         } else if (ocfs2_inode_is_fast_symlink(inode)) {
1701                 free = ocfs2_fast_symlink_chars(inode->i_sb) -
1702                         le64_to_cpu(di->i_size);
1703         } else {
1704                 struct ocfs2_extent_list *el = &di->id2.i_list;
1705                 free = (le16_to_cpu(el->l_count) -
1706                         le16_to_cpu(el->l_next_free_rec)) *
1707                         sizeof(struct ocfs2_extent_rec);
1708         }
1709         if (free >= xattrsize)
1710                 return 1;
1711
1712         return 0;
1713 }
1714
1715 /*
1716  * ocfs2_xattr_ibody_find()
1717  *
1718  * Find extended attribute in inode block and
1719  * fill search info into struct ocfs2_xattr_search.
1720  */
1721 static int ocfs2_xattr_ibody_find(struct inode *inode,
1722                                   int name_index,
1723                                   const char *name,
1724                                   struct ocfs2_xattr_search *xs)
1725 {
1726         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1727         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1728         int ret;
1729         int has_space = 0;
1730
1731         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
1732                 return 0;
1733
1734         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
1735                 down_read(&oi->ip_alloc_sem);
1736                 has_space = ocfs2_xattr_has_space_inline(inode, di);
1737                 up_read(&oi->ip_alloc_sem);
1738                 if (!has_space)
1739                         return 0;
1740         }
1741
1742         xs->xattr_bh = xs->inode_bh;
1743         xs->end = (void *)di + inode->i_sb->s_blocksize;
1744         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
1745                 xs->header = (struct ocfs2_xattr_header *)
1746                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
1747         else
1748                 xs->header = (struct ocfs2_xattr_header *)
1749                         (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
1750         xs->base = (void *)xs->header;
1751         xs->here = xs->header->xh_entries;
1752
1753         /* Find the named attribute. */
1754         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1755                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
1756                 if (ret && ret != -ENODATA)
1757                         return ret;
1758                 xs->not_found = ret;
1759         }
1760
1761         return 0;
1762 }
1763
1764 /*
1765  * ocfs2_xattr_ibody_set()
1766  *
1767  * Set, replace or remove an extended attribute into inode block.
1768  *
1769  */
1770 static int ocfs2_xattr_ibody_set(struct inode *inode,
1771                                  struct ocfs2_xattr_info *xi,
1772                                  struct ocfs2_xattr_search *xs)
1773 {
1774         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1775         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1776         int ret;
1777
1778         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
1779                 return -ENOSPC;
1780
1781         down_write(&oi->ip_alloc_sem);
1782         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
1783                 if (!ocfs2_xattr_has_space_inline(inode, di)) {
1784                         ret = -ENOSPC;
1785                         goto out;
1786                 }
1787         }
1788
1789         ret = ocfs2_xattr_set_entry(inode, xi, xs,
1790                                 (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
1791 out:
1792         up_write(&oi->ip_alloc_sem);
1793
1794         return ret;
1795 }
1796
1797 /*
1798  * ocfs2_xattr_block_find()
1799  *
1800  * Find extended attribute in external block and
1801  * fill search info into struct ocfs2_xattr_search.
1802  */
1803 static int ocfs2_xattr_block_find(struct inode *inode,
1804                                   int name_index,
1805                                   const char *name,
1806                                   struct ocfs2_xattr_search *xs)
1807 {
1808         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1809         struct buffer_head *blk_bh = NULL;
1810         struct ocfs2_xattr_block *xb;
1811         int ret = 0;
1812
1813         if (!di->i_xattr_loc)
1814                 return ret;
1815
1816         ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh);
1817         if (ret < 0) {
1818                 mlog_errno(ret);
1819                 return ret;
1820         }
1821
1822         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1823         if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
1824                 ret = -EIO;
1825                 goto cleanup;
1826         }
1827
1828         xs->xattr_bh = blk_bh;
1829
1830         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1831                 xs->header = &xb->xb_attrs.xb_header;
1832                 xs->base = (void *)xs->header;
1833                 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
1834                 xs->here = xs->header->xh_entries;
1835
1836                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
1837         } else
1838                 ret = ocfs2_xattr_index_block_find(inode, blk_bh,
1839                                                    name_index,
1840                                                    name, xs);
1841
1842         if (ret && ret != -ENODATA) {
1843                 xs->xattr_bh = NULL;
1844                 goto cleanup;
1845         }
1846         xs->not_found = ret;
1847         return 0;
1848 cleanup:
1849         brelse(blk_bh);
1850
1851         return ret;
1852 }
1853
1854 /*
1855  * ocfs2_xattr_block_set()
1856  *
1857  * Set, replace or remove an extended attribute into external block.
1858  *
1859  */
1860 static int ocfs2_xattr_block_set(struct inode *inode,
1861                                  struct ocfs2_xattr_info *xi,
1862                                  struct ocfs2_xattr_search *xs)
1863 {
1864         struct buffer_head *new_bh = NULL;
1865         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1866         struct ocfs2_dinode *di =  (struct ocfs2_dinode *)xs->inode_bh->b_data;
1867         struct ocfs2_alloc_context *meta_ac = NULL;
1868         handle_t *handle = NULL;
1869         struct ocfs2_xattr_block *xblk = NULL;
1870         u16 suballoc_bit_start;
1871         u32 num_got;
1872         u64 first_blkno;
1873         int ret;
1874
1875         if (!xs->xattr_bh) {
1876                 /*
1877                  * Alloc one external block for extended attribute
1878                  * outside of inode.
1879                  */
1880                 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
1881                 if (ret < 0) {
1882                         mlog_errno(ret);
1883                         goto out;
1884                 }
1885                 handle = ocfs2_start_trans(osb,
1886                                            OCFS2_XATTR_BLOCK_CREATE_CREDITS);
1887                 if (IS_ERR(handle)) {
1888                         ret = PTR_ERR(handle);
1889                         mlog_errno(ret);
1890                         goto out;
1891                 }
1892                 ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
1893                                            OCFS2_JOURNAL_ACCESS_CREATE);
1894                 if (ret < 0) {
1895                         mlog_errno(ret);
1896                         goto out_commit;
1897                 }
1898
1899                 ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
1900                                            &suballoc_bit_start, &num_got,
1901                                            &first_blkno);
1902                 if (ret < 0) {
1903                         mlog_errno(ret);
1904                         goto out_commit;
1905                 }
1906
1907                 new_bh = sb_getblk(inode->i_sb, first_blkno);
1908                 ocfs2_set_new_buffer_uptodate(inode, new_bh);
1909
1910                 ret = ocfs2_journal_access(handle, inode, new_bh,
1911                                            OCFS2_JOURNAL_ACCESS_CREATE);
1912                 if (ret < 0) {
1913                         mlog_errno(ret);
1914                         goto out_commit;
1915                 }
1916
1917                 /* Initialize ocfs2_xattr_block */
1918                 xs->xattr_bh = new_bh;
1919                 xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
1920                 memset(xblk, 0, inode->i_sb->s_blocksize);
1921                 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
1922                 xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num);
1923                 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
1924                 xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
1925                 xblk->xb_blkno = cpu_to_le64(first_blkno);
1926
1927                 xs->header = &xblk->xb_attrs.xb_header;
1928                 xs->base = (void *)xs->header;
1929                 xs->end = (void *)xblk + inode->i_sb->s_blocksize;
1930                 xs->here = xs->header->xh_entries;
1931
1932
1933                 ret = ocfs2_journal_dirty(handle, new_bh);
1934                 if (ret < 0) {
1935                         mlog_errno(ret);
1936                         goto out_commit;
1937                 }
1938                 di->i_xattr_loc = cpu_to_le64(first_blkno);
1939                 ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1940                 if (ret < 0)
1941                         mlog_errno(ret);
1942 out_commit:
1943                 ocfs2_commit_trans(osb, handle);
1944 out:
1945                 if (meta_ac)
1946                         ocfs2_free_alloc_context(meta_ac);
1947                 if (ret < 0)
1948                         return ret;
1949         } else
1950                 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1951
1952         if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
1953                 /* Set extended attribute into external block */
1954                 ret = ocfs2_xattr_set_entry(inode, xi, xs, OCFS2_HAS_XATTR_FL);
1955                 if (!ret || ret != -ENOSPC)
1956                         goto end;
1957
1958                 ret = ocfs2_xattr_create_index_block(inode, xs);
1959                 if (ret)
1960                         goto end;
1961         }
1962
1963         ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs);
1964
1965 end:
1966
1967         return ret;
1968 }
1969
1970 /*
1971  * ocfs2_xattr_set()
1972  *
1973  * Set, replace or remove an extended attribute for this inode.
1974  * value is NULL to remove an existing extended attribute, else either
1975  * create or replace an extended attribute.
1976  */
1977 int ocfs2_xattr_set(struct inode *inode,
1978                     int name_index,
1979                     const char *name,
1980                     const void *value,
1981                     size_t value_len,
1982                     int flags)
1983 {
1984         struct buffer_head *di_bh = NULL;
1985         struct ocfs2_dinode *di;
1986         int ret;
1987
1988         struct ocfs2_xattr_info xi = {
1989                 .name_index = name_index,
1990                 .name = name,
1991                 .value = value,
1992                 .value_len = value_len,
1993         };
1994
1995         struct ocfs2_xattr_search xis = {
1996                 .not_found = -ENODATA,
1997         };
1998
1999         struct ocfs2_xattr_search xbs = {
2000                 .not_found = -ENODATA,
2001         };
2002
2003         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2004                 return -EOPNOTSUPP;
2005
2006         ret = ocfs2_inode_lock(inode, &di_bh, 1);
2007         if (ret < 0) {
2008                 mlog_errno(ret);
2009                 return ret;
2010         }
2011         xis.inode_bh = xbs.inode_bh = di_bh;
2012         di = (struct ocfs2_dinode *)di_bh->b_data;
2013
2014         down_write(&OCFS2_I(inode)->ip_xattr_sem);
2015         /*
2016          * Scan inode and external block to find the same name
2017          * extended attribute and collect search infomation.
2018          */
2019         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
2020         if (ret)
2021                 goto cleanup;
2022         if (xis.not_found) {
2023                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
2024                 if (ret)
2025                         goto cleanup;
2026         }
2027
2028         if (xis.not_found && xbs.not_found) {
2029                 ret = -ENODATA;
2030                 if (flags & XATTR_REPLACE)
2031                         goto cleanup;
2032                 ret = 0;
2033                 if (!value)
2034                         goto cleanup;
2035         } else {
2036                 ret = -EEXIST;
2037                 if (flags & XATTR_CREATE)
2038                         goto cleanup;
2039         }
2040
2041         if (!value) {
2042                 /* Remove existing extended attribute */
2043                 if (!xis.not_found)
2044                         ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
2045                 else if (!xbs.not_found)
2046                         ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
2047         } else {
2048                 /* We always try to set extended attribute into inode first*/
2049                 ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
2050                 if (!ret && !xbs.not_found) {
2051                         /*
2052                          * If succeed and that extended attribute existing in
2053                          * external block, then we will remove it.
2054                          */
2055                         xi.value = NULL;
2056                         xi.value_len = 0;
2057                         ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
2058                 } else if (ret == -ENOSPC) {
2059                         if (di->i_xattr_loc && !xbs.xattr_bh) {
2060                                 ret = ocfs2_xattr_block_find(inode, name_index,
2061                                                              name, &xbs);
2062                                 if (ret)
2063                                         goto cleanup;
2064                         }
2065                         /*
2066                          * If no space in inode, we will set extended attribute
2067                          * into external block.
2068                          */
2069                         ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
2070                         if (ret)
2071                                 goto cleanup;
2072                         if (!xis.not_found) {
2073                                 /*
2074                                  * If succeed and that extended attribute
2075                                  * existing in inode, we will remove it.
2076                                  */
2077                                 xi.value = NULL;
2078                                 xi.value_len = 0;
2079                                 ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
2080                         }
2081                 }
2082         }
2083 cleanup:
2084         up_write(&OCFS2_I(inode)->ip_xattr_sem);
2085         ocfs2_inode_unlock(inode, 1);
2086         brelse(di_bh);
2087         brelse(xbs.xattr_bh);
2088         ocfs2_xattr_bucket_relse(inode, &xbs.bucket);
2089
2090         return ret;
2091 }
2092
2093 /*
2094  * Find the xattr extent rec which may contains name_hash.
2095  * e_cpos will be the first name hash of the xattr rec.
2096  * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
2097  */
2098 static int ocfs2_xattr_get_rec(struct inode *inode,
2099                                u32 name_hash,
2100                                u64 *p_blkno,
2101                                u32 *e_cpos,
2102                                u32 *num_clusters,
2103                                struct ocfs2_extent_list *el)
2104 {
2105         int ret = 0, i;
2106         struct buffer_head *eb_bh = NULL;
2107         struct ocfs2_extent_block *eb;
2108         struct ocfs2_extent_rec *rec = NULL;
2109         u64 e_blkno = 0;
2110
2111         if (el->l_tree_depth) {
2112                 ret = ocfs2_find_leaf(inode, el, name_hash, &eb_bh);
2113                 if (ret) {
2114                         mlog_errno(ret);
2115                         goto out;
2116                 }
2117
2118                 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
2119                 el = &eb->h_list;
2120
2121                 if (el->l_tree_depth) {
2122                         ocfs2_error(inode->i_sb,
2123                                     "Inode %lu has non zero tree depth in "
2124                                     "xattr tree block %llu\n", inode->i_ino,
2125                                     (unsigned long long)eb_bh->b_blocknr);
2126                         ret = -EROFS;
2127                         goto out;
2128                 }
2129         }
2130
2131         for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
2132                 rec = &el->l_recs[i];
2133
2134                 if (le32_to_cpu(rec->e_cpos) <= name_hash) {
2135                         e_blkno = le64_to_cpu(rec->e_blkno);
2136                         break;
2137                 }
2138         }
2139
2140         if (!e_blkno) {
2141                 ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
2142                             "record (%u, %u, 0) in xattr", inode->i_ino,
2143                             le32_to_cpu(rec->e_cpos),
2144                             ocfs2_rec_clusters(el, rec));
2145                 ret = -EROFS;
2146                 goto out;
2147         }
2148
2149         *p_blkno = le64_to_cpu(rec->e_blkno);
2150         *num_clusters = le16_to_cpu(rec->e_leaf_clusters);
2151         if (e_cpos)
2152                 *e_cpos = le32_to_cpu(rec->e_cpos);
2153 out:
2154         brelse(eb_bh);
2155         return ret;
2156 }
2157
2158 typedef int (xattr_bucket_func)(struct inode *inode,
2159                                 struct ocfs2_xattr_bucket *bucket,
2160                                 void *para);
2161
2162 static int ocfs2_find_xe_in_bucket(struct inode *inode,
2163                                    struct buffer_head *header_bh,
2164                                    int name_index,
2165                                    const char *name,
2166                                    u32 name_hash,
2167                                    u16 *xe_index,
2168                                    int *found)
2169 {
2170         int i, ret = 0, cmp = 1, block_off, new_offset;
2171         struct ocfs2_xattr_header *xh =
2172                         (struct ocfs2_xattr_header *)header_bh->b_data;
2173         size_t name_len = strlen(name);
2174         struct ocfs2_xattr_entry *xe = NULL;
2175         struct buffer_head *name_bh = NULL;
2176         char *xe_name;
2177
2178         /*
2179          * We don't use binary search in the bucket because there
2180          * may be multiple entries with the same name hash.
2181          */
2182         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
2183                 xe = &xh->xh_entries[i];
2184
2185                 if (name_hash > le32_to_cpu(xe->xe_name_hash))
2186                         continue;
2187                 else if (name_hash < le32_to_cpu(xe->xe_name_hash))
2188                         break;
2189
2190                 cmp = name_index - ocfs2_xattr_get_type(xe);
2191                 if (!cmp)
2192                         cmp = name_len - xe->xe_name_len;
2193                 if (cmp)
2194                         continue;
2195
2196                 ret = ocfs2_xattr_bucket_get_name_value(inode,
2197                                                         xh,
2198                                                         i,
2199                                                         &block_off,
2200                                                         &new_offset);
2201                 if (ret) {
2202                         mlog_errno(ret);
2203                         break;
2204                 }
2205
2206                 ret = ocfs2_read_block(inode, header_bh->b_blocknr + block_off,
2207                                        &name_bh);
2208                 if (ret) {
2209                         mlog_errno(ret);
2210                         break;
2211                 }
2212                 xe_name = name_bh->b_data + new_offset;
2213
2214                 cmp = memcmp(name, xe_name, name_len);
2215                 brelse(name_bh);
2216                 name_bh = NULL;
2217
2218                 if (cmp == 0) {
2219                         *xe_index = i;
2220                         *found = 1;
2221                         ret = 0;
2222                         break;
2223                 }
2224         }
2225
2226         return ret;
2227 }
2228
2229 /*
2230  * Find the specified xattr entry in a series of buckets.
2231  * This series start from p_blkno and last for num_clusters.
2232  * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
2233  * the num of the valid buckets.
2234  *
2235  * Return the buffer_head this xattr should reside in. And if the xattr's
2236  * hash is in the gap of 2 buckets, return the lower bucket.
2237  */
2238 static int ocfs2_xattr_bucket_find(struct inode *inode,
2239                                    int name_index,
2240                                    const char *name,
2241                                    u32 name_hash,
2242                                    u64 p_blkno,
2243                                    u32 first_hash,
2244                                    u32 num_clusters,
2245                                    struct ocfs2_xattr_search *xs)
2246 {
2247         int ret, found = 0;
2248         struct buffer_head *bh = NULL;
2249         struct buffer_head *lower_bh = NULL;
2250         struct ocfs2_xattr_header *xh = NULL;
2251         struct ocfs2_xattr_entry *xe = NULL;
2252         u16 index = 0;
2253         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2254         int low_bucket = 0, bucket, high_bucket;
2255         u32 last_hash;
2256         u64 blkno;
2257
2258         ret = ocfs2_read_block(inode, p_blkno, &bh);
2259         if (ret) {
2260                 mlog_errno(ret);
2261                 goto out;
2262         }
2263
2264         xh = (struct ocfs2_xattr_header *)bh->b_data;
2265         high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
2266
2267         while (low_bucket <= high_bucket) {
2268                 brelse(bh);
2269                 bh = NULL;
2270                 bucket = (low_bucket + high_bucket) / 2;
2271
2272                 blkno = p_blkno + bucket * blk_per_bucket;
2273
2274                 ret = ocfs2_read_block(inode, blkno, &bh);
2275                 if (ret) {
2276                         mlog_errno(ret);
2277                         goto out;
2278                 }
2279
2280                 xh = (struct ocfs2_xattr_header *)bh->b_data;
2281                 xe = &xh->xh_entries[0];
2282                 if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
2283                         high_bucket = bucket - 1;
2284                         continue;
2285                 }
2286
2287                 /*
2288                  * Check whether the hash of the last entry in our
2289                  * bucket is larger than the search one. for an empty
2290                  * bucket, the last one is also the first one.
2291                  */
2292                 if (xh->xh_count)
2293                         xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
2294
2295                 last_hash = le32_to_cpu(xe->xe_name_hash);
2296
2297                 /* record lower_bh which may be the insert place. */
2298                 brelse(lower_bh);
2299                 lower_bh = bh;
2300                 bh = NULL;
2301
2302                 if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
2303                         low_bucket = bucket + 1;
2304                         continue;
2305                 }
2306
2307                 /* the searched xattr should reside in this bucket if exists. */
2308                 ret = ocfs2_find_xe_in_bucket(inode, lower_bh,
2309                                               name_index, name, name_hash,
2310                                               &index, &found);
2311                 if (ret) {
2312                         mlog_errno(ret);
2313                         goto out;
2314                 }
2315                 break;
2316         }
2317
2318         /*
2319          * Record the bucket we have found.
2320          * When the xattr's hash value is in the gap of 2 buckets, we will
2321          * always set it to the previous bucket.
2322          */
2323         if (!lower_bh) {
2324                 /*
2325                  * We can't find any bucket whose first name_hash is less
2326                  * than the find name_hash.
2327                  */
2328                 BUG_ON(bh->b_blocknr != p_blkno);
2329                 lower_bh = bh;
2330                 bh = NULL;
2331         }
2332         xs->bucket.bu_bhs[0] = lower_bh;
2333         lower_bh = NULL;
2334
2335         xs->header = bucket_xh(&xs->bucket);
2336         xs->base = bucket_block(&xs->bucket, 0);
2337         xs->end = xs->base + inode->i_sb->s_blocksize;
2338
2339         if (found) {
2340                 /*
2341                  * If we have found the xattr enty, read all the blocks in
2342                  * this bucket.
2343                  */
2344                 ret = ocfs2_read_blocks(inode, bucket_blkno(&xs->bucket) + 1,
2345                                         blk_per_bucket - 1, &xs->bucket.bu_bhs[1],
2346                                         0);
2347                 if (ret) {
2348                         mlog_errno(ret);
2349                         goto out;
2350                 }
2351
2352                 xs->here = &xs->header->xh_entries[index];
2353                 mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
2354                      (unsigned long long)bucket_blkno(&xs->bucket), index);
2355         } else
2356                 ret = -ENODATA;
2357
2358 out:
2359         brelse(bh);
2360         brelse(lower_bh);
2361         return ret;
2362 }
2363
2364 static int ocfs2_xattr_index_block_find(struct inode *inode,
2365                                         struct buffer_head *root_bh,
2366                                         int name_index,
2367                                         const char *name,
2368                                         struct ocfs2_xattr_search *xs)
2369 {
2370         int ret;
2371         struct ocfs2_xattr_block *xb =
2372                         (struct ocfs2_xattr_block *)root_bh->b_data;
2373         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
2374         struct ocfs2_extent_list *el = &xb_root->xt_list;
2375         u64 p_blkno = 0;
2376         u32 first_hash, num_clusters = 0;
2377         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
2378
2379         if (le16_to_cpu(el->l_next_free_rec) == 0)
2380                 return -ENODATA;
2381
2382         mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
2383              name, name_hash, name_index);
2384
2385         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
2386                                   &num_clusters, el);
2387         if (ret) {
2388                 mlog_errno(ret);
2389                 goto out;
2390         }
2391
2392         BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
2393
2394         mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
2395              "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno,
2396              first_hash);
2397
2398         ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
2399                                       p_blkno, first_hash, num_clusters, xs);
2400
2401 out:
2402         return ret;
2403 }
2404
2405 static int ocfs2_iterate_xattr_buckets(struct inode *inode,
2406                                        u64 blkno,
2407                                        u32 clusters,
2408                                        xattr_bucket_func *func,
2409                                        void *para)
2410 {
2411         int i, ret = 0;
2412         int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2413         u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
2414         u32 num_buckets = clusters * bpc;
2415         struct ocfs2_xattr_bucket bucket;
2416
2417         memset(&bucket, 0, sizeof(bucket));
2418
2419         mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
2420              clusters, (unsigned long long)blkno);
2421
2422         for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) {
2423                 ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket,
2424                                         bucket.bu_bhs, 0);
2425                 if (ret) {
2426                         mlog_errno(ret);
2427                         goto out;
2428                 }
2429
2430                 /*
2431                  * The real bucket num in this series of blocks is stored
2432                  * in the 1st bucket.
2433                  */
2434                 if (i == 0)
2435                         num_buckets = le16_to_cpu(bucket_xh(&bucket)->xh_num_buckets);
2436
2437                 mlog(0, "iterating xattr bucket %llu, first hash %u\n",
2438                      (unsigned long long)blkno,
2439                      le32_to_cpu(bucket_xh(&bucket)->xh_entries[0].xe_name_hash));
2440                 if (func) {
2441                         ret = func(inode, &bucket, para);
2442                         if (ret) {
2443                                 mlog_errno(ret);
2444                                 break;
2445                         }
2446                 }
2447
2448                 ocfs2_xattr_bucket_relse(inode, &bucket);
2449                 memset(&bucket, 0, sizeof(bucket));
2450         }
2451
2452 out:
2453         ocfs2_xattr_bucket_relse(inode, &bucket);
2454
2455         return ret;
2456 }
2457
2458 struct ocfs2_xattr_tree_list {
2459         char *buffer;
2460         size_t buffer_size;
2461         size_t result;
2462 };
2463
2464 static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
2465                                              struct ocfs2_xattr_header *xh,
2466                                              int index,
2467                                              int *block_off,
2468                                              int *new_offset)
2469 {
2470         u16 name_offset;
2471
2472         if (index < 0 || index >= le16_to_cpu(xh->xh_count))
2473                 return -EINVAL;
2474
2475         name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
2476
2477         *block_off = name_offset >> inode->i_sb->s_blocksize_bits;
2478         *new_offset = name_offset % inode->i_sb->s_blocksize;
2479
2480         return 0;
2481 }
2482
2483 static int ocfs2_list_xattr_bucket(struct inode *inode,
2484                                    struct ocfs2_xattr_bucket *bucket,
2485                                    void *para)
2486 {
2487         int ret = 0, type;
2488         struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
2489         int i, block_off, new_offset;
2490         const char *prefix, *name;
2491
2492         for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
2493                 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
2494                 type = ocfs2_xattr_get_type(entry);
2495                 prefix = ocfs2_xattr_prefix(type);
2496
2497                 if (prefix) {
2498                         ret = ocfs2_xattr_bucket_get_name_value(inode,
2499                                                                 bucket_xh(bucket),
2500                                                                 i,
2501                                                                 &block_off,
2502                                                                 &new_offset);
2503                         if (ret)
2504                                 break;
2505
2506                         name = (const char *)bucket_block(bucket, block_off) +
2507                                 new_offset;
2508                         ret = ocfs2_xattr_list_entry(xl->buffer,
2509                                                      xl->buffer_size,
2510                                                      &xl->result,
2511                                                      prefix, name,
2512                                                      entry->xe_name_len);
2513                         if (ret)
2514                                 break;
2515                 }
2516         }
2517
2518         return ret;
2519 }
2520
2521 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
2522                                              struct ocfs2_xattr_tree_root *xt,
2523                                              char *buffer,
2524                                              size_t buffer_size)
2525 {
2526         struct ocfs2_extent_list *el = &xt->xt_list;
2527         int ret = 0;
2528         u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
2529         u64 p_blkno = 0;
2530         struct ocfs2_xattr_tree_list xl = {
2531                 .buffer = buffer,
2532                 .buffer_size = buffer_size,
2533                 .result = 0,
2534         };
2535
2536         if (le16_to_cpu(el->l_next_free_rec) == 0)
2537                 return 0;
2538
2539         while (name_hash > 0) {
2540                 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
2541                                           &e_cpos, &num_clusters, el);
2542                 if (ret) {
2543                         mlog_errno(ret);
2544                         goto out;
2545                 }
2546
2547                 ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
2548                                                   ocfs2_list_xattr_bucket,
2549                                                   &xl);
2550                 if (ret) {
2551                         mlog_errno(ret);
2552                         goto out;
2553                 }
2554
2555                 if (e_cpos == 0)
2556                         break;
2557
2558                 name_hash = e_cpos - 1;
2559         }
2560
2561         ret = xl.result;
2562 out:
2563         return ret;
2564 }
2565
2566 static int cmp_xe(const void *a, const void *b)
2567 {
2568         const struct ocfs2_xattr_entry *l = a, *r = b;
2569         u32 l_hash = le32_to_cpu(l->xe_name_hash);
2570         u32 r_hash = le32_to_cpu(r->xe_name_hash);
2571
2572         if (l_hash > r_hash)
2573                 return 1;
2574         if (l_hash < r_hash)
2575                 return -1;
2576         return 0;
2577 }
2578
2579 static void swap_xe(void *a, void *b, int size)
2580 {
2581         struct ocfs2_xattr_entry *l = a, *r = b, tmp;
2582
2583         tmp = *l;
2584         memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
2585         memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
2586 }
2587
2588 /*
2589  * When the ocfs2_xattr_block is filled up, new bucket will be created
2590  * and all the xattr entries will be moved to the new bucket.
2591  * Note: we need to sort the entries since they are not saved in order
2592  * in the ocfs2_xattr_block.
2593  */
2594 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
2595                                            struct buffer_head *xb_bh,
2596                                            struct buffer_head *xh_bh,
2597                                            struct buffer_head *data_bh)
2598 {
2599         int i, blocksize = inode->i_sb->s_blocksize;
2600         u16 offset, size, off_change;
2601         struct ocfs2_xattr_entry *xe;
2602         struct ocfs2_xattr_block *xb =
2603                                 (struct ocfs2_xattr_block *)xb_bh->b_data;
2604         struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
2605         struct ocfs2_xattr_header *xh =
2606                                 (struct ocfs2_xattr_header *)xh_bh->b_data;
2607         u16 count = le16_to_cpu(xb_xh->xh_count);
2608         char *target = xh_bh->b_data, *src = xb_bh->b_data;
2609
2610         mlog(0, "cp xattr from block %llu to bucket %llu\n",
2611              (unsigned long long)xb_bh->b_blocknr,
2612              (unsigned long long)xh_bh->b_blocknr);
2613
2614         memset(xh_bh->b_data, 0, blocksize);
2615         if (data_bh)
2616                 memset(data_bh->b_data, 0, blocksize);
2617         /*
2618          * Since the xe_name_offset is based on ocfs2_xattr_header,
2619          * there is a offset change corresponding to the change of
2620          * ocfs2_xattr_header's position.
2621          */
2622         off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
2623         xe = &xb_xh->xh_entries[count - 1];
2624         offset = le16_to_cpu(xe->xe_name_offset) + off_change;
2625         size = blocksize - offset;
2626
2627         /* copy all the names and values. */
2628         if (data_bh)
2629                 target = data_bh->b_data;
2630         memcpy(target + offset, src + offset, size);
2631
2632         /* Init new header now. */
2633         xh->xh_count = xb_xh->xh_count;
2634         xh->xh_num_buckets = cpu_to_le16(1);
2635         xh->xh_name_value_len = cpu_to_le16(size);
2636         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
2637
2638         /* copy all the entries. */
2639         target = xh_bh->b_data;
2640         offset = offsetof(struct ocfs2_xattr_header, xh_entries);
2641         size = count * sizeof(struct ocfs2_xattr_entry);
2642         memcpy(target + offset, (char *)xb_xh + offset, size);
2643
2644         /* Change the xe offset for all the xe because of the move. */
2645         off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
2646                  offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
2647         for (i = 0; i < count; i++)
2648                 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
2649
2650         mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
2651              offset, size, off_change);
2652
2653         sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
2654              cmp_xe, swap_xe);
2655 }
2656
2657 /*
2658  * After we move xattr from block to index btree, we have to
2659  * update ocfs2_xattr_search to the new xe and base.
2660  *
2661  * When the entry is in xattr block, xattr_bh indicates the storage place.
2662  * While if the entry is in index b-tree, "bucket" indicates the
2663  * real place of the xattr.
2664  */
2665 static int ocfs2_xattr_update_xattr_search(struct inode *inode,
2666                                            struct ocfs2_xattr_search *xs,
2667                                            struct buffer_head *old_bh,
2668                                            struct buffer_head *new_bh)
2669 {
2670         int ret = 0;
2671         char *buf = old_bh->b_data;
2672         struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
2673         struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
2674         int i, blocksize = inode->i_sb->s_blocksize;
2675         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2676
2677         xs->bucket.bu_bhs[0] = new_bh;
2678         get_bh(new_bh);
2679         xs->header = bucket_xh(&xs->bucket);
2680
2681         xs->base = new_bh->b_data;
2682         xs->end = xs->base + inode->i_sb->s_blocksize;
2683
2684         if (!xs->not_found) {
2685                 if (OCFS2_XATTR_BUCKET_SIZE != blocksize) {
2686                         ret = ocfs2_read_blocks(inode,
2687                                         bucket_blkno(&xs->bucket) + 1,
2688                                         blk_per_bucket - 1, &xs->bucket.bu_bhs[1],
2689                                         0);
2690                         if (ret) {
2691                                 mlog_errno(ret);
2692                                 return ret;
2693                         }
2694
2695                 }
2696                 i = xs->here - old_xh->xh_entries;
2697                 xs->here = &xs->header->xh_entries[i];
2698         }
2699
2700         return ret;
2701 }
2702
2703 static int ocfs2_xattr_create_index_block(struct inode *inode,
2704                                           struct ocfs2_xattr_search *xs)
2705 {
2706         int ret, credits = OCFS2_SUBALLOC_ALLOC;
2707         u32 bit_off, len;
2708         u64 blkno;
2709         handle_t *handle;
2710         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2711         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2712         struct ocfs2_alloc_context *data_ac;
2713         struct buffer_head *xh_bh = NULL, *data_bh = NULL;
2714         struct buffer_head *xb_bh = xs->xattr_bh;
2715         struct ocfs2_xattr_block *xb =
2716                         (struct ocfs2_xattr_block *)xb_bh->b_data;
2717         struct ocfs2_xattr_tree_root *xr;
2718         u16 xb_flags = le16_to_cpu(xb->xb_flags);
2719         u16 bpb = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2720
2721         mlog(0, "create xattr index block for %llu\n",
2722              (unsigned long long)xb_bh->b_blocknr);
2723
2724         BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
2725
2726         ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
2727         if (ret) {
2728                 mlog_errno(ret);
2729                 goto out;
2730         }
2731
2732         /*
2733          * XXX:
2734          * We can use this lock for now, and maybe move to a dedicated mutex
2735          * if performance becomes a problem later.
2736          */
2737         down_write(&oi->ip_alloc_sem);
2738
2739         /*
2740          * 3 more credits, one for xattr block update, one for the 1st block
2741          * of the new xattr bucket and one for the value/data.
2742          */
2743         credits += 3;
2744         handle = ocfs2_start_trans(osb, credits);
2745         if (IS_ERR(handle)) {
2746                 ret = PTR_ERR(handle);
2747                 mlog_errno(ret);
2748                 goto out_sem;
2749         }
2750
2751         ret = ocfs2_journal_access(handle, inode, xb_bh,
2752                                    OCFS2_JOURNAL_ACCESS_WRITE);
2753         if (ret) {
2754                 mlog_errno(ret);
2755                 goto out_commit;
2756         }
2757
2758         ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len);
2759         if (ret) {
2760                 mlog_errno(ret);
2761                 goto out_commit;
2762         }
2763
2764         /*
2765          * The bucket may spread in many blocks, and
2766          * we will only touch the 1st block and the last block
2767          * in the whole bucket(one for entry and one for data).
2768          */
2769         blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
2770
2771         mlog(0, "allocate 1 cluster from %llu to xattr block\n",
2772              (unsigned long long)blkno);
2773
2774         xh_bh = sb_getblk(inode->i_sb, blkno);
2775         if (!xh_bh) {
2776                 ret = -EIO;
2777                 mlog_errno(ret);
2778                 goto out_commit;
2779         }
2780
2781         ocfs2_set_new_buffer_uptodate(inode, xh_bh);
2782
2783         ret = ocfs2_journal_access(handle, inode, xh_bh,
2784                                    OCFS2_JOURNAL_ACCESS_CREATE);
2785         if (ret) {
2786                 mlog_errno(ret);
2787                 goto out_commit;
2788         }
2789
2790         if (bpb > 1) {
2791                 data_bh = sb_getblk(inode->i_sb, blkno + bpb - 1);
2792                 if (!data_bh) {
2793                         ret = -EIO;
2794                         mlog_errno(ret);
2795                         goto out_commit;
2796                 }
2797
2798                 ocfs2_set_new_buffer_uptodate(inode, data_bh);
2799
2800                 ret = ocfs2_journal_access(handle, inode, data_bh,
2801                                            OCFS2_JOURNAL_ACCESS_CREATE);
2802                 if (ret) {
2803                         mlog_errno(ret);
2804                         goto out_commit;
2805                 }
2806         }
2807
2808         ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xh_bh, data_bh);
2809
2810         ocfs2_journal_dirty(handle, xh_bh);
2811         if (data_bh)
2812                 ocfs2_journal_dirty(handle, data_bh);
2813
2814         ret = ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh);
2815         if (ret) {
2816                 mlog_errno(ret);
2817                 goto out_commit;
2818         }
2819
2820         /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
2821         memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
2822                offsetof(struct ocfs2_xattr_block, xb_attrs));
2823
2824         xr = &xb->xb_attrs.xb_root;
2825         xr->xt_clusters = cpu_to_le32(1);
2826         xr->xt_last_eb_blk = 0;
2827         xr->xt_list.l_tree_depth = 0;
2828         xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
2829         xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2830
2831         xr->xt_list.l_recs[0].e_cpos = 0;
2832         xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
2833         xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
2834
2835         xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
2836
2837         ret = ocfs2_journal_dirty(handle, xb_bh);
2838         if (ret) {
2839                 mlog_errno(ret);
2840                 goto out_commit;
2841         }
2842
2843 out_commit:
2844         ocfs2_commit_trans(osb, handle);
2845
2846 out_sem:
2847         up_write(&oi->ip_alloc_sem);
2848
2849 out:
2850         if (data_ac)
2851                 ocfs2_free_alloc_context(data_ac);
2852
2853         brelse(xh_bh);
2854         brelse(data_bh);
2855
2856         return ret;
2857 }
2858
2859 static int cmp_xe_offset(const void *a, const void *b)
2860 {
2861         const struct ocfs2_xattr_entry *l = a, *r = b;
2862         u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
2863         u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
2864
2865         if (l_name_offset < r_name_offset)
2866                 return 1;
2867         if (l_name_offset > r_name_offset)
2868                 return -1;
2869         return 0;
2870 }
2871
2872 /*
2873  * defrag a xattr bucket if we find that the bucket has some
2874  * holes beteen name/value pairs.
2875  * We will move all the name/value pairs to the end of the bucket
2876  * so that we can spare some space for insertion.
2877  */
2878 static int ocfs2_defrag_xattr_bucket(struct inode *inode,
2879                                      struct ocfs2_xattr_bucket *bucket)
2880 {
2881         int ret, i;
2882         size_t end, offset, len, value_len;
2883         struct ocfs2_xattr_header *xh;
2884         char *entries, *buf, *bucket_buf = NULL;
2885         u64 blkno = bucket_blkno(bucket);
2886         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2887         u16 xh_free_start;
2888         size_t blocksize = inode->i_sb->s_blocksize;
2889         handle_t *handle;
2890         struct buffer_head **bhs;
2891         struct ocfs2_xattr_entry *xe;
2892
2893         bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
2894                         GFP_NOFS);
2895         if (!bhs)
2896                 return -ENOMEM;
2897
2898         ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, bhs, 0);
2899         if (ret)
2900                 goto out;
2901
2902         /*
2903          * In order to make the operation more efficient and generic,
2904          * we copy all the blocks into a contiguous memory and do the
2905          * defragment there, so if anything is error, we will not touch
2906          * the real block.
2907          */
2908         bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
2909         if (!bucket_buf) {
2910                 ret = -EIO;
2911                 goto out;
2912         }
2913
2914         buf = bucket_buf;
2915         for (i = 0; i < blk_per_bucket; i++, buf += blocksize)
2916                 memcpy(buf, bhs[i]->b_data, blocksize);
2917
2918         handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), blk_per_bucket);
2919         if (IS_ERR(handle)) {
2920                 ret = PTR_ERR(handle);
2921                 handle = NULL;
2922                 mlog_errno(ret);
2923                 goto out;
2924         }
2925
2926         for (i = 0; i < blk_per_bucket; i++) {
2927                 ret = ocfs2_journal_access(handle, inode, bhs[i],
2928                                            OCFS2_JOURNAL_ACCESS_WRITE);
2929                 if (ret < 0) {
2930                         mlog_errno(ret);
2931                         goto commit;
2932                 }
2933         }
2934
2935         xh = (struct ocfs2_xattr_header *)bucket_buf;
2936         entries = (char *)xh->xh_entries;
2937         xh_free_start = le16_to_cpu(xh->xh_free_start);
2938
2939         mlog(0, "adjust xattr bucket in %llu, count = %u, "
2940              "xh_free_start = %u, xh_name_value_len = %u.\n",
2941              (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
2942              xh_free_start, le16_to_cpu(xh->xh_name_value_len));
2943
2944         /*
2945          * sort all the entries by their offset.
2946          * the largest will be the first, so that we can
2947          * move them to the end one by one.
2948          */
2949         sort(entries, le16_to_cpu(xh->xh_count),
2950              sizeof(struct ocfs2_xattr_entry),
2951              cmp_xe_offset, swap_xe);
2952
2953         /* Move all name/values to the end of the bucket. */
2954         xe = xh->xh_entries;
2955         end = OCFS2_XATTR_BUCKET_SIZE;
2956         for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
2957                 offset = le16_to_cpu(xe->xe_name_offset);
2958                 if (ocfs2_xattr_is_local(xe))
2959                         value_len = OCFS2_XATTR_SIZE(
2960                                         le64_to_cpu(xe->xe_value_size));
2961                 else
2962                         value_len = OCFS2_XATTR_ROOT_SIZE;
2963                 len = OCFS2_XATTR_SIZE(xe->xe_name_len) + value_len;
2964
2965                 /*
2966                  * We must make sure that the name/value pair
2967                  * exist in the same block. So adjust end to
2968                  * the previous block end if needed.
2969                  */
2970                 if (((end - len) / blocksize !=
2971                         (end - 1) / blocksize))
2972                         end = end - end % blocksize;
2973
2974                 if (end > offset + len) {
2975                         memmove(bucket_buf + end - len,
2976                                 bucket_buf + offset, len);
2977                         xe->xe_name_offset = cpu_to_le16(end - len);
2978                 }
2979
2980                 mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
2981                                 "bucket %llu\n", (unsigned long long)blkno);
2982
2983                 end -= len;
2984         }
2985
2986         mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
2987                         "bucket %llu\n", (unsigned long long)blkno);
2988
2989         if (xh_free_start == end)
2990                 goto commit;
2991
2992         memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
2993         xh->xh_free_start = cpu_to_le16(end);
2994
2995         /* sort the entries by their name_hash. */
2996         sort(entries, le16_to_cpu(xh->xh_count),
2997              sizeof(struct ocfs2_xattr_entry),
2998              cmp_xe, swap_xe);
2999
3000         buf = bucket_buf;
3001         for (i = 0; i < blk_per_bucket; i++, buf += blocksize) {
3002                 memcpy(bhs[i]->b_data, buf, blocksize);
3003                 ocfs2_journal_dirty(handle, bhs[i]);
3004         }
3005
3006 commit:
3007         ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
3008 out:
3009
3010         if (bhs) {
3011                 for (i = 0; i < blk_per_bucket; i++)
3012                         brelse(bhs[i]);
3013         }
3014         kfree(bhs);
3015
3016         kfree(bucket_buf);
3017         return ret;
3018 }
3019
3020 /*
3021  * Move half nums of the xattr bucket in the previous cluster to this new
3022  * cluster. We only touch the last cluster of the previous extend record.
3023  *
3024  * first_bh is the first buffer_head of a series of bucket in the same
3025  * extent rec and header_bh is the header of one bucket in this cluster.
3026  * They will be updated if we move the data header_bh contains to the new
3027  * cluster. first_hash will be set as the 1st xe's name_hash of the new cluster.
3028  */
3029 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
3030                                                handle_t *handle,
3031                                                struct buffer_head **first_bh,
3032                                                struct buffer_head **header_bh,
3033                                                u64 new_blkno,
3034                                                u64 prev_blkno,
3035                                                u32 num_clusters,
3036                                                u32 *first_hash)
3037 {
3038         int i, ret, credits;
3039         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3040         int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3041         int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
3042         int blocksize = inode->i_sb->s_blocksize;
3043         struct buffer_head *old_bh, *new_bh, *prev_bh, *new_first_bh = NULL;
3044         struct ocfs2_xattr_header *new_xh;
3045         struct ocfs2_xattr_header *xh =
3046                         (struct ocfs2_xattr_header *)((*first_bh)->b_data);
3047
3048         BUG_ON(le16_to_cpu(xh->xh_num_buckets) < num_buckets);
3049         BUG_ON(OCFS2_XATTR_BUCKET_SIZE == osb->s_clustersize);
3050
3051         prev_bh = *first_bh;
3052         get_bh(prev_bh);
3053         xh = (struct ocfs2_xattr_header *)prev_bh->b_data;
3054
3055         prev_blkno += (num_clusters - 1) * bpc + bpc / 2;
3056
3057         mlog(0, "move half of xattrs in cluster %llu to %llu\n",
3058              (unsigned long long)prev_blkno, (unsigned long long)new_blkno);
3059
3060         /*
3061          * We need to update the 1st half of the new cluster and
3062          * 1 more for the update of the 1st bucket of the previous
3063          * extent record.
3064          */
3065         credits = bpc / 2 + 1;
3066         ret = ocfs2_extend_trans(handle, credits);
3067         if (ret) {
3068                 mlog_errno(ret);
3069                 goto out;
3070         }
3071
3072         ret = ocfs2_journal_access(handle, inode, prev_bh,
3073                                    OCFS2_JOURNAL_ACCESS_WRITE);
3074         if (ret) {
3075                 mlog_errno(ret);
3076                 goto out;
3077         }
3078
3079         for (i = 0; i < bpc / 2; i++, prev_blkno++, new_blkno++) {
3080                 old_bh = new_bh = NULL;
3081                 new_bh = sb_getblk(inode->i_sb, new_blkno);
3082                 if (!new_bh) {
3083                         ret = -EIO;
3084                         mlog_errno(ret);
3085                         goto out;
3086                 }
3087
3088                 ocfs2_set_new_buffer_uptodate(inode, new_bh);
3089
3090                 ret = ocfs2_journal_access(handle, inode, new_bh,
3091                                            OCFS2_JOURNAL_ACCESS_CREATE);
3092                 if (ret < 0) {
3093                         mlog_errno(ret);
3094                         brelse(new_bh);
3095                         goto out;
3096                 }
3097
3098                 ret = ocfs2_read_block(inode, prev_blkno, &old_bh);
3099                 if (ret < 0) {
3100                         mlog_errno(ret);
3101                         brelse(new_bh);
3102                         goto out;
3103                 }
3104
3105                 memcpy(new_bh->b_data, old_bh->b_data, blocksize);
3106
3107                 if (i == 0) {
3108                         new_xh = (struct ocfs2_xattr_header *)new_bh->b_data;
3109                         new_xh->xh_num_buckets = cpu_to_le16(num_buckets / 2);
3110
3111                         if (first_hash)
3112                                 *first_hash = le32_to_cpu(
3113                                         new_xh->xh_entries[0].xe_name_hash);
3114                         new_first_bh = new_bh;
3115                         get_bh(new_first_bh);
3116                 }
3117
3118                 ocfs2_journal_dirty(handle, new_bh);
3119
3120                 if (*header_bh == old_bh) {
3121                         brelse(*header_bh);
3122                         *header_bh = new_bh;
3123                         get_bh(*header_bh);
3124
3125                         brelse(*first_bh);
3126                         *first_bh = new_first_bh;
3127                         get_bh(*first_bh);
3128                 }
3129                 brelse(new_bh);
3130                 brelse(old_bh);
3131         }
3132
3133         le16_add_cpu(&xh->xh_num_buckets, -(num_buckets / 2));
3134
3135         ocfs2_journal_dirty(handle, prev_bh);
3136 out:
3137         brelse(prev_bh);
3138         brelse(new_first_bh);
3139         return ret;
3140 }
3141
3142 /*
3143  * Find the suitable pos when we divide a bucket into 2.
3144  * We have to make sure the xattrs with the same hash value exist
3145  * in the same bucket.
3146  *
3147  * If this ocfs2_xattr_header covers more than one hash value, find a
3148  * place where the hash value changes.  Try to find the most even split.
3149  * The most common case is that all entries have different hash values,
3150  * and the first check we make will find a place to split.
3151  */
3152 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
3153 {
3154         struct ocfs2_xattr_entry *entries = xh->xh_entries;
3155         int count = le16_to_cpu(xh->xh_count);
3156         int delta, middle = count / 2;
3157
3158         /*
3159          * We start at the middle.  Each step gets farther away in both
3160          * directions.  We therefore hit the change in hash value
3161          * nearest to the middle.  Note that this loop does not execute for
3162          * count < 2.
3163          */
3164         for (delta = 0; delta < middle; delta++) {
3165                 /* Let's check delta earlier than middle */
3166                 if (cmp_xe(&entries[middle - delta - 1],
3167                            &entries[middle - delta]))
3168                         return middle - delta;
3169
3170                 /* For even counts, don't walk off the end */
3171                 if ((middle + delta + 1) == count)
3172                         continue;
3173
3174                 /* Now try delta past middle */
3175                 if (cmp_xe(&entries[middle + delta],
3176                            &entries[middle + delta + 1]))
3177                         return middle + delta + 1;
3178         }
3179
3180         /* Every entry had the same hash */
3181         return count;
3182 }
3183
3184 /*
3185  * Move some xattrs in old bucket(blk) to new bucket(new_blk).
3186  * first_hash will record the 1st hash of the new bucket.
3187  *
3188  * Normally half of the xattrs will be moved.  But we have to make
3189  * sure that the xattrs with the same hash value are stored in the
3190  * same bucket. If all the xattrs in this bucket have the same hash
3191  * value, the new bucket will be initialized as an empty one and the
3192  * first_hash will be initialized as (hash_value+1).
3193  */
3194 static int ocfs2_divide_xattr_bucket(struct inode *inode,
3195                                     handle_t *handle,
3196                                     u64 blk,
3197                                     u64 new_blk,
3198                                     u32 *first_hash,
3199                                     int new_bucket_head)
3200 {
3201         int ret, i;
3202         int count, start, len, name_value_len = 0, xe_len, name_offset = 0;
3203         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3204         struct ocfs2_xattr_bucket s_bucket, t_bucket;
3205         struct ocfs2_xattr_header *xh;
3206         struct ocfs2_xattr_entry *xe;
3207         int blocksize = inode->i_sb->s_blocksize;
3208
3209         mlog(0, "move some of xattrs from bucket %llu to %llu\n",
3210              (unsigned long long)blk, (unsigned long long)new_blk);
3211
3212         memset(&s_bucket, 0, sizeof(struct ocfs2_xattr_bucket));
3213         memset(&t_bucket, 0, sizeof(struct ocfs2_xattr_bucket));
3214
3215         ret = ocfs2_read_xattr_bucket(inode, &s_bucket, blk);
3216         if (ret) {
3217                 mlog_errno(ret);
3218                 goto out;
3219         }
3220
3221         ret = ocfs2_journal_access(handle, inode, s_bucket.bu_bhs[0],
3222                                    OCFS2_JOURNAL_ACCESS_WRITE);
3223         if (ret) {
3224                 mlog_errno(ret);
3225                 goto out;
3226         }
3227
3228         /*
3229          * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
3230          * there's no need to read it.
3231          */
3232         ret = ocfs2_init_xattr_bucket(inode, &t_bucket, new_blk);
3233         if (ret) {
3234                 mlog_errno(ret);
3235                 goto out;
3236         }
3237
3238         for (i = 0; i < blk_per_bucket; i++) {
3239                 ret = ocfs2_journal_access(handle, inode, t_bucket.bu_bhs[i],
3240                                            new_bucket_head ?
3241                                            OCFS2_JOURNAL_ACCESS_CREATE :
3242                                            OCFS2_JOURNAL_ACCESS_WRITE);
3243                 if (ret) {
3244                         mlog_errno(ret);
3245                         goto out;
3246                 }
3247         }
3248
3249         xh = bucket_xh(&s_bucket);
3250         count = le16_to_cpu(xh->xh_count);
3251         start = ocfs2_xattr_find_divide_pos(xh);
3252
3253         if (start == count) {
3254                 xe = &xh->xh_entries[start-1];
3255
3256                 /*
3257                  * initialized a new empty bucket here.
3258                  * The hash value is set as one larger than
3259                  * that of the last entry in the previous bucket.
3260                  */
3261                 for (i = 0; i < blk_per_bucket; i++)
3262                         memset(bucket_block(&t_bucket, i), 0, blocksize);
3263
3264                 xh = bucket_xh(&t_bucket);
3265                 xh->xh_free_start = cpu_to_le16(blocksize);
3266                 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
3267                 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
3268
3269                 goto set_num_buckets;
3270         }
3271
3272         /* copy the whole bucket to the new first. */
3273         for (i = 0; i < blk_per_bucket; i++)
3274                 memcpy(bucket_block(&t_bucket, i), bucket_block(&s_bucket, i),
3275                        blocksize);
3276
3277         /* update the new bucket. */
3278         xh = bucket_xh(&t_bucket);
3279
3280         /*
3281          * Calculate the total name/value len and xh_free_start for
3282          * the old bucket first.
3283          */
3284         name_offset = OCFS2_XATTR_BUCKET_SIZE;
3285         name_value_len = 0;
3286         for (i = 0; i < start; i++) {
3287                 xe = &xh->xh_entries[i];
3288                 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3289                 if (ocfs2_xattr_is_local(xe))
3290                         xe_len +=
3291                            OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3292                 else
3293                         xe_len += OCFS2_XATTR_ROOT_SIZE;
3294                 name_value_len += xe_len;
3295                 if (le16_to_cpu(xe->xe_name_offset) < name_offset)
3296                         name_offset = le16_to_cpu(xe->xe_name_offset);
3297         }
3298
3299         /*
3300          * Now begin the modification to the new bucket.
3301          *
3302          * In the new bucket, We just move the xattr entry to the beginning
3303          * and don't touch the name/value. So there will be some holes in the
3304          * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
3305          * called.
3306          */
3307         xe = &xh->xh_entries[start];
3308         len = sizeof(struct ocfs2_xattr_entry) * (count - start);
3309         mlog(0, "mv xattr entry len %d from %d to %d\n", len,
3310              (int)((char *)xe - (char *)xh),
3311              (int)((char *)xh->xh_entries - (char *)xh));
3312         memmove((char *)xh->xh_entries, (char *)xe, len);
3313         xe = &xh->xh_entries[count - start];
3314         len = sizeof(struct ocfs2_xattr_entry) * start;
3315         memset((char *)xe, 0, len);
3316
3317         le16_add_cpu(&xh->xh_count, -start);
3318         le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
3319
3320         /* Calculate xh_free_start for the new bucket. */
3321         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
3322         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3323                 xe = &xh->xh_entries[i];
3324                 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3325                 if (ocfs2_xattr_is_local(xe))
3326                         xe_len +=
3327                            OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3328                 else
3329                         xe_len += OCFS2_XATTR_ROOT_SIZE;
3330                 if (le16_to_cpu(xe->xe_name_offset) <
3331                     le16_to_cpu(xh->xh_free_start))
3332                         xh->xh_free_start = xe->xe_name_offset;
3333         }
3334
3335 set_num_buckets:
3336         /* set xh->xh_num_buckets for the new xh. */
3337         if (new_bucket_head)
3338                 xh->xh_num_buckets = cpu_to_le16(1);
3339         else
3340                 xh->xh_num_buckets = 0;
3341
3342         for (i = 0; i < blk_per_bucket; i++) {
3343                 ocfs2_journal_dirty(handle, t_bucket.bu_bhs[i]);
3344                 if (ret)
3345                         mlog_errno(ret);
3346         }
3347
3348         /* store the first_hash of the new bucket. */
3349         if (first_hash)
3350                 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3351
3352         /*
3353          * Now only update the 1st block of the old bucket.  If we
3354          * just added a new empty bucket, there is no need to modify
3355          * it.
3356          */
3357         if (start == count)
3358                 goto out;
3359
3360         xh = bucket_xh(&s_bucket);
3361         memset(&xh->xh_entries[start], 0,
3362                sizeof(struct ocfs2_xattr_entry) * (count - start));
3363         xh->xh_count = cpu_to_le16(start);
3364         xh->xh_free_start = cpu_to_le16(name_offset);
3365         xh->xh_name_value_len = cpu_to_le16(name_value_len);
3366
3367         ocfs2_journal_dirty(handle, s_bucket.bu_bhs[0]);
3368         if (ret)
3369                 mlog_errno(ret);
3370
3371 out:
3372         ocfs2_xattr_bucket_relse(inode, &s_bucket);
3373         ocfs2_xattr_bucket_relse(inode, &t_bucket);
3374
3375         return ret;
3376 }
3377
3378 /*
3379  * Copy xattr from one bucket to another bucket.
3380  *
3381  * The caller must make sure that the journal transaction
3382  * has enough space for journaling.
3383  */
3384 static int ocfs2_cp_xattr_bucket(struct inode *inode,
3385                                  handle_t *handle,
3386                                  u64 s_blkno,
3387                                  u64 t_blkno,
3388                                  int t_is_new)
3389 {
3390         int ret, i;
3391         int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3392         int blocksize = inode->i_sb->s_blocksize;
3393         struct ocfs2_xattr_bucket s_bucket, t_bucket;
3394
3395         BUG_ON(s_blkno == t_blkno);
3396
3397         mlog(0, "cp bucket %llu to %llu, target is %d\n",
3398              (unsigned long long)s_blkno, (unsigned long long)t_blkno,
3399              t_is_new);
3400
3401         memset(&s_bucket, 0, sizeof(struct ocfs2_xattr_bucket));
3402         memset(&t_bucket, 0, sizeof(struct ocfs2_xattr_bucket));
3403
3404         ret = ocfs2_read_xattr_bucket(inode, &s_bucket, s_blkno);
3405         if (ret)
3406                 goto out;
3407
3408         /*
3409          * Even if !t_is_new, we're overwriting t_bucket.  Thus,
3410          * there's no need to read it.
3411          */
3412         ret = ocfs2_init_xattr_bucket(inode, &t_bucket, t_blkno);
3413         if (ret)
3414                 goto out;
3415
3416         for (i = 0; i < blk_per_bucket; i++) {
3417                 ret = ocfs2_journal_access(handle, inode, t_bucket.bu_bhs[i],
3418                                            t_is_new ?
3419                                            OCFS2_JOURNAL_ACCESS_CREATE :
3420                                            OCFS2_JOURNAL_ACCESS_WRITE);
3421                 if (ret)
3422                         goto out;
3423         }
3424
3425         for (i = 0; i < blk_per_bucket; i++) {
3426                 memcpy(bucket_block(&t_bucket, i), bucket_block(&s_bucket, i),
3427                        blocksize);
3428                 ocfs2_journal_dirty(handle, t_bucket.bu_bhs[i]);
3429         }
3430
3431 out:
3432         ocfs2_xattr_bucket_relse(inode, &s_bucket);
3433         ocfs2_xattr_bucket_relse(inode, &t_bucket);
3434
3435         return ret;
3436 }
3437
3438 /*
3439  * Copy one xattr cluster from src_blk to to_blk.
3440  * The to_blk will become the first bucket header of the cluster, so its
3441  * xh_num_buckets will be initialized as the bucket num in the cluster.
3442  */
3443 static int ocfs2_cp_xattr_cluster(struct inode *inode,
3444                                   handle_t *handle,
3445                                   struct buffer_head *first_bh,
3446                                   u64 src_blk,
3447                                   u64 to_blk,
3448                                   u32 *first_hash)
3449 {
3450         int i, ret, credits;
3451         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3452         int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3453         int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
3454         struct buffer_head *bh = NULL;
3455         struct ocfs2_xattr_header *xh;
3456         u64 to_blk_start = to_blk;
3457
3458         mlog(0, "cp xattrs from cluster %llu to %llu\n",
3459              (unsigned long long)src_blk, (unsigned long long)to_blk);
3460
3461         /*
3462          * We need to update the new cluster and 1 more for the update of
3463          * the 1st bucket of the previous extent rec.
3464          */
3465         credits = bpc + 1;
3466         ret = ocfs2_extend_trans(handle, credits);
3467         if (ret) {
3468                 mlog_errno(ret);
3469                 goto out;
3470         }
3471
3472         ret = ocfs2_journal_access(handle, inode, first_bh,
3473                                    OCFS2_JOURNAL_ACCESS_WRITE);
3474         if (ret) {
3475                 mlog_errno(ret);
3476                 goto out;
3477         }
3478
3479         for (i = 0; i < num_buckets; i++) {
3480                 ret = ocfs2_cp_xattr_bucket(inode, handle,
3481                                             src_blk, to_blk, 1);
3482                 if (ret) {
3483                         mlog_errno(ret);
3484                         goto out;
3485                 }
3486
3487                 src_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3488                 to_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3489         }
3490
3491         /* update the old bucket header. */
3492         xh = (struct ocfs2_xattr_header *)first_bh->b_data;
3493         le16_add_cpu(&xh->xh_num_buckets, -num_buckets);
3494
3495         ocfs2_journal_dirty(handle, first_bh);
3496
3497         /* update the new bucket header. */
3498         ret = ocfs2_read_block(inode, to_blk_start, &bh);
3499         if (ret < 0) {
3500                 mlog_errno(ret);
3501                 goto out;
3502         }
3503
3504         ret = ocfs2_journal_access(handle, inode, bh,
3505                                    OCFS2_JOURNAL_ACCESS_WRITE);
3506         if (ret) {
3507                 mlog_errno(ret);
3508                 goto out;
3509         }
3510
3511         xh = (struct ocfs2_xattr_header *)bh->b_data;
3512         xh->xh_num_buckets = cpu_to_le16(num_buckets);
3513
3514         ocfs2_journal_dirty(handle, bh);
3515
3516         if (first_hash)
3517                 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3518 out:
3519         brelse(bh);
3520         return ret;
3521 }
3522
3523 /*
3524  * Move some xattrs in this cluster to the new cluster.
3525  * This function should only be called when bucket size == cluster size.
3526  * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
3527  */
3528 static int ocfs2_divide_xattr_cluster(struct inode *inode,
3529                                       handle_t *handle,
3530                                       u64 prev_blk,
3531                                       u64 new_blk,
3532                                       u32 *first_hash)
3533 {
3534         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3535         int ret, credits = 2 * blk_per_bucket;
3536
3537         BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
3538
3539         ret = ocfs2_extend_trans(handle, credits);
3540         if (ret) {
3541                 mlog_errno(ret);
3542                 return ret;
3543         }
3544
3545         /* Move half of the xattr in start_blk to the next bucket. */
3546         return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
3547                                           new_blk, first_hash, 1);
3548 }
3549
3550 /*
3551  * Move some xattrs from the old cluster to the new one since they are not
3552  * contiguous in ocfs2 xattr tree.
3553  *
3554  * new_blk starts a new separate cluster, and we will move some xattrs from
3555  * prev_blk to it. v_start will be set as the first name hash value in this
3556  * new cluster so that it can be used as e_cpos during tree insertion and
3557  * don't collide with our original b-tree operations. first_bh and header_bh
3558  * will also be updated since they will be used in ocfs2_extend_xattr_bucket
3559  * to extend the insert bucket.
3560  *
3561  * The problem is how much xattr should we move to the new one and when should
3562  * we update first_bh and header_bh?
3563  * 1. If cluster size > bucket size, that means the previous cluster has more
3564  *    than 1 bucket, so just move half nums of bucket into the new cluster and
3565  *    update the first_bh and header_bh if the insert bucket has been moved
3566  *    to the new cluster.
3567  * 2. If cluster_size == bucket_size:
3568  *    a) If the previous extent rec has more than one cluster and the insert
3569  *       place isn't in the last cluster, copy the entire last cluster to the
3570  *       new one. This time, we don't need to upate the first_bh and header_bh
3571  *       since they will not be moved into the new cluster.
3572  *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
3573  *       the new one. And we set the extend flag to zero if the insert place is
3574  *       moved into the new allocated cluster since no extend is needed.
3575  */
3576 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
3577                                             handle_t *handle,
3578                                             struct buffer_head **first_bh,
3579                                             struct buffer_head **header_bh,
3580                                             u64 new_blk,
3581                                             u64 prev_blk,
3582                                             u32 prev_clusters,
3583                                             u32 *v_start,
3584                                             int *extend)
3585 {
3586         int ret = 0;
3587         int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3588
3589         mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
3590              (unsigned long long)prev_blk, prev_clusters,
3591              (unsigned long long)new_blk);
3592
3593         if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1)
3594                 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
3595                                                           handle,
3596                                                           first_bh,
3597                                                           header_bh,
3598                                                           new_blk,
3599                                                           prev_blk,
3600                                                           prev_clusters,
3601                                                           v_start);
3602         else {
3603                 u64 last_blk = prev_blk + bpc * (prev_clusters - 1);
3604
3605                 if (prev_clusters > 1 && (*header_bh)->b_blocknr != last_blk)
3606                         ret = ocfs2_cp_xattr_cluster(inode, handle, *first_bh,
3607                                                      last_blk, new_blk,
3608                                                      v_start);
3609                 else {
3610                         ret = ocfs2_divide_xattr_cluster(inode, handle,
3611                                                          last_blk, new_blk,
3612                                                          v_start);
3613
3614                         if ((*header_bh)->b_blocknr == last_blk && extend)
3615                                 *extend = 0;
3616                 }
3617         }
3618
3619         return ret;
3620 }
3621
3622 /*
3623  * Add a new cluster for xattr storage.
3624  *
3625  * If the new cluster is contiguous with the previous one, it will be
3626  * appended to the same extent record, and num_clusters will be updated.
3627  * If not, we will insert a new extent for it and move some xattrs in
3628  * the last cluster into the new allocated one.
3629  * We also need to limit the maximum size of a btree leaf, otherwise we'll
3630  * lose the benefits of hashing because we'll have to search large leaves.
3631  * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
3632  * if it's bigger).
3633  *
3634  * first_bh is the first block of the previous extent rec and header_bh
3635  * indicates the bucket we will insert the new xattrs. They will be updated
3636  * when the header_bh is moved into the new cluster.
3637  */
3638 static int ocfs2_add_new_xattr_cluster(struct inode *inode,
3639                                        struct buffer_head *root_bh,
3640                                        struct buffer_head **first_bh,
3641                                        struct buffer_head **header_bh,
3642                                        u32 *num_clusters,
3643                                        u32 prev_cpos,
3644                                        u64 prev_blkno,
3645                                        int *extend)
3646 {
3647         int ret, credits;
3648         u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3649         u32 prev_clusters = *num_clusters;
3650         u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
3651         u64 block;
3652         handle_t *handle = NULL;
3653         struct ocfs2_alloc_context *data_ac = NULL;
3654         struct ocfs2_alloc_context *meta_ac = NULL;
3655         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3656         struct ocfs2_extent_tree et;
3657
3658         mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
3659              "previous xattr blkno = %llu\n",
3660              (unsigned long long)OCFS2_I(inode)->ip_blkno,
3661              prev_cpos, (unsigned long long)prev_blkno);
3662
3663         ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
3664
3665         ret = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
3666                                     &data_ac, &meta_ac);
3667         if (ret) {
3668                 mlog_errno(ret);
3669                 goto leave;
3670         }
3671
3672         credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el,
3673                                             clusters_to_add);
3674         handle = ocfs2_start_trans(osb, credits);
3675         if (IS_ERR(handle)) {
3676                 ret = PTR_ERR(handle);
3677                 handle = NULL;
3678                 mlog_errno(ret);
3679                 goto leave;
3680         }
3681
3682         ret = ocfs2_journal_access(handle, inode, root_bh,
3683                                    OCFS2_JOURNAL_ACCESS_WRITE);
3684         if (ret < 0) {
3685                 mlog_errno(ret);
3686                 goto leave;
3687         }
3688
3689         ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1,
3690                                      clusters_to_add, &bit_off, &num_bits);
3691         if (ret < 0) {
3692                 if (ret != -ENOSPC)
3693                         mlog_errno(ret);
3694                 goto leave;
3695         }
3696
3697         BUG_ON(num_bits > clusters_to_add);
3698
3699         block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
3700         mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
3701              num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
3702
3703         if (prev_blkno + prev_clusters * bpc == block &&
3704             (prev_clusters + num_bits) << osb->s_clustersize_bits <=
3705              OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
3706                 /*
3707                  * If this cluster is contiguous with the old one and
3708                  * adding this new cluster, we don't surpass the limit of
3709                  * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
3710                  * initialized and used like other buckets in the previous
3711                  * cluster.
3712                  * So add it as a contiguous one. The caller will handle
3713                  * its init process.
3714                  */
3715                 v_start = prev_cpos + prev_clusters;
3716                 *num_clusters = prev_clusters + num_bits;
3717                 mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
3718                      num_bits);
3719         } else {
3720                 ret = ocfs2_adjust_xattr_cross_cluster(inode,
3721                                                        handle,
3722                                                        first_bh,
3723                                                        header_bh,
3724                                                        block,
3725                                                        prev_blkno,
3726                                                        prev_clusters,
3727                                                        &v_start,
3728                                                        extend);
3729                 if (ret) {
3730                         mlog_errno(ret);
3731                         goto leave;
3732                 }
3733         }
3734
3735         if (handle->h_buffer_credits < credits) {
3736                 /*
3737                  * The journal has been restarted before, and don't
3738                  * have enough space for the insertion, so extend it
3739                  * here.
3740                  */
3741                 ret = ocfs2_extend_trans(handle, credits);
3742                 if (ret) {
3743                         mlog_errno(ret);
3744                         goto leave;
3745                 }
3746         }
3747         mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
3748              num_bits, (unsigned long long)block, v_start);
3749         ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block,
3750                                   num_bits, 0, meta_ac);
3751         if (ret < 0) {
3752                 mlog_errno(ret);
3753                 goto leave;
3754         }
3755
3756         ret = ocfs2_journal_dirty(handle, root_bh);
3757         if (ret < 0) {
3758                 mlog_errno(ret);
3759                 goto leave;
3760         }
3761
3762 leave:
3763         if (handle)
3764                 ocfs2_commit_trans(osb, handle);
3765         if (data_ac)
3766                 ocfs2_free_alloc_context(data_ac);
3767         if (meta_ac)
3768                 ocfs2_free_alloc_context(meta_ac);
3769
3770         return ret;
3771 }
3772
3773 /*
3774  * Extend a new xattr bucket and move xattrs to the end one by one until
3775  * We meet with start_bh. Only move half of the xattrs to the bucket after it.
3776  */
3777 static int ocfs2_extend_xattr_bucket(struct inode *inode,
3778                                      struct buffer_head *first_bh,
3779                                      struct buffer_head *start_bh,
3780                                      u32 num_clusters)
3781 {
3782         int ret, credits;
3783         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3784         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3785         u64 start_blk = start_bh->b_blocknr, end_blk;
3786         u32 num_buckets = num_clusters * ocfs2_xattr_buckets_per_cluster(osb);
3787         handle_t *handle;
3788         struct ocfs2_xattr_header *first_xh =
3789                                 (struct ocfs2_xattr_header *)first_bh->b_data;
3790         u16 bucket = le16_to_cpu(first_xh->xh_num_buckets);
3791
3792         mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
3793              "from %llu, len = %u\n", (unsigned long long)start_blk,
3794              (unsigned long long)first_bh->b_blocknr, num_clusters);
3795
3796         BUG_ON(bucket >= num_buckets);
3797
3798         end_blk = first_bh->b_blocknr + (bucket - 1) * blk_per_bucket;
3799
3800         /*
3801          * We will touch all the buckets after the start_bh(include it).
3802          * Add one more bucket and modify the first_bh.
3803          */
3804         credits = end_blk - start_blk + 2 * blk_per_bucket + 1;
3805         handle = ocfs2_start_trans(osb, credits);
3806         if (IS_ERR(handle)) {
3807                 ret = PTR_ERR(handle);
3808                 handle = NULL;
3809                 mlog_errno(ret);
3810                 goto out;
3811         }
3812
3813         ret = ocfs2_journal_access(handle, inode, first_bh,
3814                                    OCFS2_JOURNAL_ACCESS_WRITE);
3815         if (ret) {
3816                 mlog_errno(ret);
3817                 goto commit;
3818         }
3819
3820         while (end_blk != start_blk) {
3821                 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
3822                                             end_blk + blk_per_bucket, 0);
3823                 if (ret)
3824                         goto commit;
3825                 end_blk -= blk_per_bucket;
3826         }
3827
3828         /* Move half of the xattr in start_blk to the next bucket. */
3829         ret = ocfs2_divide_xattr_bucket(inode, handle, start_blk,
3830                                         start_blk + blk_per_bucket, NULL, 0);
3831
3832         le16_add_cpu(&first_xh->xh_num_buckets, 1);
3833         ocfs2_journal_dirty(handle, first_bh);
3834
3835 commit:
3836         ocfs2_commit_trans(osb, handle);
3837 out:
3838         return ret;
3839 }
3840
3841 /*
3842  * Add new xattr bucket in an extent record and adjust the buckets accordingly.
3843  * xb_bh is the ocfs2_xattr_block.
3844  * We will move all the buckets starting from header_bh to the next place. As
3845  * for this one, half num of its xattrs will be moved to the next one.
3846  *
3847  * We will allocate a new cluster if current cluster is full and adjust
3848  * header_bh and first_bh if the insert place is moved to the new cluster.
3849  */
3850 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
3851                                       struct buffer_head *xb_bh,
3852                                       struct buffer_head *header_bh)
3853 {
3854         struct ocfs2_xattr_header *first_xh = NULL;
3855         struct buffer_head *first_bh = NULL;
3856         struct ocfs2_xattr_block *xb =
3857                         (struct ocfs2_xattr_block *)xb_bh->b_data;
3858         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3859         struct ocfs2_extent_list *el = &xb_root->xt_list;
3860         struct ocfs2_xattr_header *xh =
3861                         (struct ocfs2_xattr_header *)header_bh->b_data;
3862         u32 name_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3863         struct super_block *sb = inode->i_sb;
3864         struct ocfs2_super *osb = OCFS2_SB(sb);
3865         int ret, num_buckets, extend = 1;
3866         u64 p_blkno;
3867         u32 e_cpos, num_clusters;
3868
3869         mlog(0, "Add new xattr bucket starting form %llu\n",
3870              (unsigned long long)header_bh->b_blocknr);
3871
3872         /*
3873          * Add refrence for header_bh here because it may be
3874          * changed in ocfs2_add_new_xattr_cluster and we need
3875          * to free it in the end.
3876          */
3877         get_bh(header_bh);
3878
3879         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
3880                                   &num_clusters, el);
3881         if (ret) {
3882                 mlog_errno(ret);
3883                 goto out;
3884         }
3885
3886         ret = ocfs2_read_block(inode, p_blkno, &first_bh);
3887         if (ret) {
3888                 mlog_errno(ret);
3889                 goto out;
3890         }
3891
3892         num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
3893         first_xh = (struct ocfs2_xattr_header *)first_bh->b_data;
3894
3895         if (num_buckets == le16_to_cpu(first_xh->xh_num_buckets)) {
3896                 ret = ocfs2_add_new_xattr_cluster(inode,
3897                                                   xb_bh,
3898                                                   &first_bh,
3899                                                   &header_bh,
3900                                                   &num_clusters,
3901                                                   e_cpos,
3902                                                   p_blkno,
3903                                                   &extend);
3904                 if (ret) {
3905                         mlog_errno(ret);
3906                         goto out;
3907                 }
3908         }
3909
3910         if (extend)
3911                 ret = ocfs2_extend_xattr_bucket(inode,
3912                                                 first_bh,
3913                                                 header_bh,
3914                                                 num_clusters);
3915         if (ret)
3916                 mlog_errno(ret);
3917 out:
3918         brelse(first_bh);
3919         brelse(header_bh);
3920         return ret;
3921 }
3922
3923 static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
3924                                         struct ocfs2_xattr_bucket *bucket,
3925                                         int offs)
3926 {
3927         int block_off = offs >> inode->i_sb->s_blocksize_bits;
3928
3929         offs = offs % inode->i_sb->s_blocksize;
3930         return bucket_block(bucket, block_off) + offs;
3931 }
3932
3933 /*
3934  * Handle the normal xattr set, including replace, delete and new.
3935  *
3936  * Note: "local" indicates the real data's locality. So we can't
3937  * just its bucket locality by its length.
3938  */
3939 static void ocfs2_xattr_set_entry_normal(struct inode *inode,
3940                                          struct ocfs2_xattr_info *xi,
3941                                          struct ocfs2_xattr_search *xs,
3942                                          u32 name_hash,
3943                                          int local)
3944 {
3945         struct ocfs2_xattr_entry *last, *xe;
3946         int name_len = strlen(xi->name);
3947         struct ocfs2_xattr_header *xh = xs->header;
3948         u16 count = le16_to_cpu(xh->xh_count), start;
3949         size_t blocksize = inode->i_sb->s_blocksize;
3950         char *val;
3951         size_t offs, size, new_size;
3952
3953         last = &xh->xh_entries[count];
3954         if (!xs->not_found) {
3955                 xe = xs->here;
3956                 offs = le16_to_cpu(xe->xe_name_offset);
3957                 if (ocfs2_xattr_is_local(xe))
3958                         size = OCFS2_XATTR_SIZE(name_len) +
3959                         OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3960                 else
3961                         size = OCFS2_XATTR_SIZE(name_len) +
3962                         OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
3963
3964                 /*
3965                  * If the new value will be stored outside, xi->value has been
3966                  * initalized as an empty ocfs2_xattr_value_root, and the same
3967                  * goes with xi->value_len, so we can set new_size safely here.
3968                  * See ocfs2_xattr_set_in_bucket.
3969                  */
3970                 new_size = OCFS2_XATTR_SIZE(name_len) +
3971                            OCFS2_XATTR_SIZE(xi->value_len);
3972
3973                 le16_add_cpu(&xh->xh_name_value_len, -size);
3974                 if (xi->value) {
3975                         if (new_size > size)
3976                                 goto set_new_name_value;
3977
3978                         /* Now replace the old value with new one. */
3979                         if (local)
3980                                 xe->xe_value_size = cpu_to_le64(xi->value_len);
3981                         else
3982                                 xe->xe_value_size = 0;
3983
3984                         val = ocfs2_xattr_bucket_get_val(inode,
3985                                                          &xs->bucket, offs);
3986                         memset(val + OCFS2_XATTR_SIZE(name_len), 0,
3987                                size - OCFS2_XATTR_SIZE(name_len));
3988                         if (OCFS2_XATTR_SIZE(xi->value_len) > 0)
3989                                 memcpy(val + OCFS2_XATTR_SIZE(name_len),
3990                                        xi->value, xi->value_len);
3991
3992                         le16_add_cpu(&xh->xh_name_value_len, new_size);
3993                         ocfs2_xattr_set_local(xe, local);
3994                         return;
3995                 } else {
3996                         /*
3997                          * Remove the old entry if there is more than one.
3998                          * We don't remove the last entry so that we can
3999                          * use it to indicate the hash value of the empty
4000                          * bucket.
4001                          */
4002                         last -= 1;
4003                         le16_add_cpu(&xh->xh_count, -1);
4004                         if (xh->xh_count) {
4005                                 memmove(xe, xe + 1,
4006                                         (void *)last - (void *)xe);
4007                                 memset(last, 0,
4008                                        sizeof(struct ocfs2_xattr_entry));
4009                         } else
4010                                 xh->xh_free_start =
4011                                         cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4012
4013                         return;
4014                 }
4015         } else {
4016                 /* find a new entry for insert. */
4017                 int low = 0, high = count - 1, tmp;
4018                 struct ocfs2_xattr_entry *tmp_xe;
4019
4020                 while (low <= high && count) {
4021                         tmp = (low + high) / 2;
4022                         tmp_xe = &xh->xh_entries[tmp];
4023
4024                         if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
4025                                 low = tmp + 1;
4026                         else if (name_hash <
4027                                  le32_to_cpu(tmp_xe->xe_name_hash))
4028                                 high = tmp - 1;
4029                         else {
4030                                 low = tmp;
4031                                 break;
4032                         }
4033                 }
4034
4035                 xe = &xh->xh_entries[low];
4036                 if (low != count)
4037                         memmove(xe + 1, xe, (void *)last - (void *)xe);
4038
4039                 le16_add_cpu(&xh->xh_count, 1);
4040                 memset(xe, 0, sizeof(struct ocfs2_xattr_entry));
4041                 xe->xe_name_hash = cpu_to_le32(name_hash);
4042                 xe->xe_name_len = name_len;
4043                 ocfs2_xattr_set_type(xe, xi->name_index);
4044         }
4045
4046 set_new_name_value:
4047         /* Insert the new name+value. */
4048         size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(xi->value_len);
4049
4050         /*
4051          * We must make sure that the name/value pair
4052          * exists in the same block.
4053          */
4054         offs = le16_to_cpu(xh->xh_free_start);
4055         start = offs - size;
4056
4057         if (start >> inode->i_sb->s_blocksize_bits !=
4058             (offs - 1) >> inode->i_sb->s_blocksize_bits) {
4059                 offs = offs - offs % blocksize;
4060                 xh->xh_free_start = cpu_to_le16(offs);
4061         }
4062
4063         val = ocfs2_xattr_bucket_get_val(inode,
4064                                          &xs->bucket, offs - size);
4065         xe->xe_name_offset = cpu_to_le16(offs - size);
4066
4067         memset(val, 0, size);
4068         memcpy(val, xi->name, name_len);
4069         memcpy(val + OCFS2_XATTR_SIZE(name_len), xi->value, xi->value_len);
4070
4071         xe->xe_value_size = cpu_to_le64(xi->value_len);
4072         ocfs2_xattr_set_local(xe, local);
4073         xs->here = xe;
4074         le16_add_cpu(&xh->xh_free_start, -size);
4075         le16_add_cpu(&xh->xh_name_value_len, size);
4076
4077         return;
4078 }
4079
4080 static int ocfs2_xattr_bucket_handle_journal(struct inode *inode,
4081                                              handle_t *handle,
4082                                              struct ocfs2_xattr_search *xs,
4083                                              struct buffer_head **bhs,
4084                                              u16 bh_num)
4085 {
4086         int ret = 0, off, block_off;
4087         struct ocfs2_xattr_entry *xe = xs->here;
4088
4089         /*
4090          * First calculate all the blocks we should journal_access
4091          * and journal_dirty. The first block should always be touched.
4092          */
4093         ret = ocfs2_journal_dirty(handle, bhs[0]);
4094         if (ret)
4095                 mlog_errno(ret);
4096
4097         /* calc the data. */
4098         off = le16_to_cpu(xe->xe_name_offset);
4099         block_off = off >> inode->i_sb->s_blocksize_bits;
4100         ret = ocfs2_journal_dirty(handle, bhs[block_off]);
4101         if (ret)
4102                 mlog_errno(ret);
4103
4104         return ret;
4105 }
4106
4107 /*
4108  * Set the xattr entry in the specified bucket.
4109  * The bucket is indicated by xs->bucket and it should have the enough
4110  * space for the xattr insertion.
4111  */
4112 static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
4113                                            struct ocfs2_xattr_info *xi,
4114                                            struct ocfs2_xattr_search *xs,
4115                                            u32 name_hash,
4116                                            int local)
4117 {
4118         int i, ret;
4119         handle_t *handle = NULL;
4120         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4121         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4122
4123         mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
4124              (unsigned long)xi->value_len, xi->name_index,
4125              (unsigned long long)bucket_blkno(&xs->bucket));
4126
4127         if (!xs->bucket.bu_bhs[1]) {
4128                 ret = ocfs2_read_blocks(inode,
4129                                         bucket_blkno(&xs->bucket) + 1,
4130                                         blk_per_bucket - 1, &xs->bucket.bu_bhs[1],
4131                                         0);
4132                 if (ret) {
4133                         mlog_errno(ret);
4134                         goto out;
4135                 }
4136         }
4137
4138         handle = ocfs2_start_trans(osb, blk_per_bucket);
4139         if (IS_ERR(handle)) {
4140                 ret = PTR_ERR(handle);
4141                 handle = NULL;
4142                 mlog_errno(ret);
4143                 goto out;
4144         }
4145
4146         for (i = 0; i < blk_per_bucket; i++) {
4147                 ret = ocfs2_journal_access(handle, inode, xs->bucket.bu_bhs[i],
4148                                            OCFS2_JOURNAL_ACCESS_WRITE);
4149                 if (ret < 0) {
4150                         mlog_errno(ret);
4151                         goto out;
4152                 }
4153         }
4154
4155         ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local);
4156
4157         /*Only dirty the blocks we have touched in set xattr. */
4158         ret = ocfs2_xattr_bucket_handle_journal(inode, handle, xs,
4159                                                 xs->bucket.bu_bhs, blk_per_bucket);
4160         if (ret)
4161                 mlog_errno(ret);
4162 out:
4163         ocfs2_commit_trans(osb, handle);
4164
4165         return ret;
4166 }
4167
4168 static int ocfs2_xattr_value_update_size(struct inode *inode,
4169                                          struct buffer_head *xe_bh,
4170                                          struct ocfs2_xattr_entry *xe,
4171                                          u64 new_size)
4172 {
4173         int ret;
4174         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4175         handle_t *handle = NULL;
4176
4177         handle = ocfs2_start_trans(osb, 1);
4178         if (IS_ERR(handle)) {
4179                 ret = -ENOMEM;
4180                 mlog_errno(ret);
4181                 goto out;
4182         }
4183
4184         ret = ocfs2_journal_access(handle, inode, xe_bh,
4185                                    OCFS2_JOURNAL_ACCESS_WRITE);
4186         if (ret < 0) {
4187                 mlog_errno(ret);
4188                 goto out_commit;
4189         }
4190
4191         xe->xe_value_size = cpu_to_le64(new_size);
4192
4193         ret = ocfs2_journal_dirty(handle, xe_bh);
4194         if (ret < 0)
4195                 mlog_errno(ret);
4196
4197 out_commit:
4198         ocfs2_commit_trans(osb, handle);
4199 out:
4200         return ret;
4201 }
4202
4203 /*
4204  * Truncate the specified xe_off entry in xattr bucket.
4205  * bucket is indicated by header_bh and len is the new length.
4206  * Both the ocfs2_xattr_value_root and the entry will be updated here.
4207  *
4208  * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
4209  */
4210 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
4211                                              struct buffer_head *header_bh,
4212                                              int xe_off,
4213                                              int len)
4214 {
4215         int ret, offset;
4216         u64 value_blk;
4217         struct buffer_head *value_bh = NULL;
4218         struct ocfs2_xattr_value_root *xv;
4219         struct ocfs2_xattr_entry *xe;
4220         struct ocfs2_xattr_header *xh =
4221                         (struct ocfs2_xattr_header *)header_bh->b_data;
4222         size_t blocksize = inode->i_sb->s_blocksize;
4223
4224         xe = &xh->xh_entries[xe_off];
4225
4226         BUG_ON(!xe || ocfs2_xattr_is_local(xe));
4227
4228         offset = le16_to_cpu(xe->xe_name_offset) +
4229                  OCFS2_XATTR_SIZE(xe->xe_name_len);
4230
4231         value_blk = offset / blocksize;
4232
4233         /* We don't allow ocfs2_xattr_value to be stored in different block. */
4234         BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
4235         value_blk += header_bh->b_blocknr;
4236
4237         ret = ocfs2_read_block(inode, value_blk, &value_bh);
4238         if (ret) {
4239                 mlog_errno(ret);
4240                 goto out;
4241         }
4242
4243         xv = (struct ocfs2_xattr_value_root *)
4244                 (value_bh->b_data + offset % blocksize);
4245
4246         mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
4247              xe_off, (unsigned long long)header_bh->b_blocknr, len);
4248         ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len);
4249         if (ret) {
4250                 mlog_errno(ret);
4251                 goto out;
4252         }
4253
4254         ret = ocfs2_xattr_value_update_size(inode, header_bh, xe, len);
4255         if (ret) {
4256                 mlog_errno(ret);
4257                 goto out;
4258         }
4259
4260 out:
4261         brelse(value_bh);
4262         return ret;
4263 }
4264
4265 static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
4266                                                 struct ocfs2_xattr_search *xs,
4267                                                 int len)
4268 {
4269         int ret, offset;
4270         struct ocfs2_xattr_entry *xe = xs->here;
4271         struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
4272
4273         BUG_ON(!xs->bucket.bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe));
4274
4275         offset = xe - xh->xh_entries;
4276         ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket.bu_bhs[0],
4277                                                 offset, len);
4278         if (ret)
4279                 mlog_errno(ret);
4280
4281         return ret;
4282 }
4283
4284 static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
4285                                                 struct ocfs2_xattr_search *xs,
4286                                                 char *val,
4287                                                 int value_len)
4288 {
4289         int offset;
4290         struct ocfs2_xattr_value_root *xv;
4291         struct ocfs2_xattr_entry *xe = xs->here;
4292
4293         BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
4294
4295         offset = le16_to_cpu(xe->xe_name_offset) +
4296                  OCFS2_XATTR_SIZE(xe->xe_name_len);
4297
4298         xv = (struct ocfs2_xattr_value_root *)(xs->base + offset);
4299
4300         return __ocfs2_xattr_set_value_outside(inode, xv, val, value_len);
4301 }
4302
4303 static int ocfs2_rm_xattr_cluster(struct inode *inode,
4304                                   struct buffer_head *root_bh,
4305                                   u64 blkno,
4306                                   u32 cpos,
4307                                   u32 len)
4308 {
4309         int ret;
4310         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4311         struct inode *tl_inode = osb->osb_tl_inode;
4312         handle_t *handle;
4313         struct ocfs2_xattr_block *xb =
4314                         (struct ocfs2_xattr_block *)root_bh->b_data;
4315         struct ocfs2_alloc_context *meta_ac = NULL;
4316         struct ocfs2_cached_dealloc_ctxt dealloc;
4317         struct ocfs2_extent_tree et;
4318
4319         ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
4320
4321         ocfs2_init_dealloc_ctxt(&dealloc);
4322
4323         mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
4324              cpos, len, (unsigned long long)blkno);
4325
4326         ocfs2_remove_xattr_clusters_from_cache(inode, blkno, len);
4327
4328         ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
4329         if (ret) {
4330                 mlog_errno(ret);
4331                 return ret;
4332         }
4333
4334         mutex_lock(&tl_inode->i_mutex);
4335
4336         if (ocfs2_truncate_log_needs_flush(osb)) {
4337                 ret = __ocfs2_flush_truncate_log(osb);
4338                 if (ret < 0) {
4339                         mlog_errno(ret);
4340                         goto out;
4341                 }
4342         }
4343
4344         handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
4345         if (IS_ERR(handle)) {
4346                 ret = -ENOMEM;
4347                 mlog_errno(ret);
4348                 goto out;
4349         }
4350
4351         ret = ocfs2_journal_access(handle, inode, root_bh,
4352                                    OCFS2_JOURNAL_ACCESS_WRITE);
4353         if (ret) {
4354                 mlog_errno(ret);
4355                 goto out_commit;
4356         }
4357
4358         ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
4359                                   &dealloc);
4360         if (ret) {
4361                 mlog_errno(ret);
4362                 goto out_commit;
4363         }
4364
4365         le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
4366
4367         ret = ocfs2_journal_dirty(handle, root_bh);
4368         if (ret) {
4369                 mlog_errno(ret);
4370                 goto out_commit;
4371         }
4372
4373         ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
4374         if (ret)
4375                 mlog_errno(ret);
4376
4377 out_commit:
4378         ocfs2_commit_trans(osb, handle);
4379 out:
4380         ocfs2_schedule_truncate_log_flush(osb, 1);
4381
4382         mutex_unlock(&tl_inode->i_mutex);
4383
4384         if (meta_ac)
4385                 ocfs2_free_alloc_context(meta_ac);
4386
4387         ocfs2_run_deallocs(osb, &dealloc);
4388
4389         return ret;
4390 }
4391
4392 static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
4393                                          struct ocfs2_xattr_search *xs)
4394 {
4395         handle_t *handle = NULL;
4396         struct ocfs2_xattr_header *xh = bucket_xh(&xs->bucket);
4397         struct ocfs2_xattr_entry *last = &xh->xh_entries[
4398                                                 le16_to_cpu(xh->xh_count) - 1];
4399         int ret = 0;
4400
4401         handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 1);
4402         if (IS_ERR(handle)) {
4403                 ret = PTR_ERR(handle);
4404                 mlog_errno(ret);
4405                 return;
4406         }
4407
4408         ret = ocfs2_journal_access(handle, inode, xs->bucket.bu_bhs[0],
4409                                    OCFS2_JOURNAL_ACCESS_WRITE);
4410         if (ret) {
4411                 mlog_errno(ret);
4412                 goto out_commit;
4413         }
4414
4415         /* Remove the old entry. */
4416         memmove(xs->here, xs->here + 1,
4417                 (void *)last - (void *)xs->here);
4418         memset(last, 0, sizeof(struct ocfs2_xattr_entry));
4419         le16_add_cpu(&xh->xh_count, -1);
4420
4421         ret = ocfs2_journal_dirty(handle, xs->bucket.bu_bhs[0]);
4422         if (ret < 0)
4423                 mlog_errno(ret);
4424 out_commit:
4425         ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
4426 }
4427
4428 /*
4429  * Set the xattr name/value in the bucket specified in xs.
4430  *
4431  * As the new value in xi may be stored in the bucket or in an outside cluster,
4432  * we divide the whole process into 3 steps:
4433  * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket)
4434  * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs)
4435  * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside)
4436  * 4. If the clusters for the new outside value can't be allocated, we need
4437  *    to free the xattr we allocated in set.
4438  */
4439 static int ocfs2_xattr_set_in_bucket(struct inode *inode,
4440                                      struct ocfs2_xattr_info *xi,
4441                                      struct ocfs2_xattr_search *xs)
4442 {
4443         int ret, local = 1;
4444         size_t value_len;
4445         char *val = (char *)xi->value;
4446         struct ocfs2_xattr_entry *xe = xs->here;
4447         u32 name_hash = ocfs2_xattr_name_hash(inode, xi->name,
4448                                               strlen(xi->name));
4449
4450         if (!xs->not_found && !ocfs2_xattr_is_local(xe)) {
4451                 /*
4452                  * We need to truncate the xattr storage first.
4453                  *
4454                  * If both the old and new value are stored to
4455                  * outside block, we only need to truncate
4456                  * the storage and then set the value outside.
4457                  *
4458                  * If the new value should be stored within block,
4459                  * we should free all the outside block first and
4460                  * the modification to the xattr block will be done
4461                  * by following steps.
4462                  */
4463                 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
4464                         value_len = xi->value_len;
4465                 else
4466                         value_len = 0;
4467
4468                 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4469                                                            value_len);
4470                 if (ret)
4471                         goto out;
4472
4473                 if (value_len)
4474                         goto set_value_outside;
4475         }
4476
4477         value_len = xi->value_len;
4478         /* So we have to handle the inside block change now. */
4479         if (value_len > OCFS2_XATTR_INLINE_SIZE) {
4480                 /*
4481                  * If the new value will be stored outside of block,
4482                  * initalize a new empty value root and insert it first.
4483                  */
4484                 local = 0;
4485                 xi->value = &def_xv;
4486                 xi->value_len = OCFS2_XATTR_ROOT_SIZE;
4487         }
4488
4489         ret = ocfs2_xattr_set_entry_in_bucket(inode, xi, xs, name_hash, local);
4490         if (ret) {
4491                 mlog_errno(ret);
4492                 goto out;
4493         }
4494
4495         if (value_len <= OCFS2_XATTR_INLINE_SIZE)
4496                 goto out;
4497
4498         /* allocate the space now for the outside block storage. */
4499         ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4500                                                    value_len);
4501         if (ret) {
4502                 mlog_errno(ret);
4503
4504                 if (xs->not_found) {
4505                         /*
4506                          * We can't allocate enough clusters for outside
4507                          * storage and we have allocated xattr already,
4508                          * so need to remove it.
4509                          */
4510                         ocfs2_xattr_bucket_remove_xs(inode, xs);
4511                 }
4512                 goto out;
4513         }
4514
4515 set_value_outside:
4516         ret = ocfs2_xattr_bucket_set_value_outside(inode, xs, val, value_len);
4517 out:
4518         return ret;
4519 }
4520
4521 /*
4522  * check whether the xattr bucket is filled up with the same hash value.
4523  * If we want to insert the xattr with the same hash, return -ENOSPC.
4524  * If we want to insert a xattr with different hash value, go ahead
4525  * and ocfs2_divide_xattr_bucket will handle this.
4526  */
4527 static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
4528                                               struct ocfs2_xattr_bucket *bucket,
4529                                               const char *name)
4530 {
4531         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4532         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
4533
4534         if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
4535                 return 0;
4536
4537         if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
4538             xh->xh_entries[0].xe_name_hash) {
4539                 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
4540                      "hash = %u\n",
4541                      (unsigned long long)bucket_blkno(bucket),
4542                      le32_to_cpu(xh->xh_entries[0].xe_name_hash));
4543                 return -ENOSPC;
4544         }
4545
4546         return 0;
4547 }
4548
4549 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
4550                                              struct ocfs2_xattr_info *xi,
4551                                              struct ocfs2_xattr_search *xs)
4552 {
4553         struct ocfs2_xattr_header *xh;
4554         struct ocfs2_xattr_entry *xe;
4555         u16 count, header_size, xh_free_start;
4556         int free, max_free, need, old;
4557         size_t value_size = 0, name_len = strlen(xi->name);
4558         size_t blocksize = inode->i_sb->s_blocksize;
4559         int ret, allocation = 0;
4560
4561         mlog_entry("Set xattr %s in xattr index block\n", xi->name);
4562
4563 try_again:
4564         xh = xs->header;
4565         count = le16_to_cpu(xh->xh_count);
4566         xh_free_start = le16_to_cpu(xh->xh_free_start);
4567         header_size = sizeof(struct ocfs2_xattr_header) +
4568                         count * sizeof(struct ocfs2_xattr_entry);
4569         max_free = OCFS2_XATTR_BUCKET_SIZE -
4570                 le16_to_cpu(xh->xh_name_value_len) - header_size;
4571
4572         mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
4573                         "of %u which exceed block size\n",
4574                         (unsigned long long)bucket_blkno(&xs->bucket),
4575                         header_size);
4576
4577         if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
4578                 value_size = OCFS2_XATTR_ROOT_SIZE;
4579         else if (xi->value)
4580                 value_size = OCFS2_XATTR_SIZE(xi->value_len);
4581
4582         if (xs->not_found)
4583                 need = sizeof(struct ocfs2_xattr_entry) +
4584                         OCFS2_XATTR_SIZE(name_len) + value_size;
4585         else {
4586                 need = value_size + OCFS2_XATTR_SIZE(name_len);
4587
4588                 /*
4589                  * We only replace the old value if the new length is smaller
4590                  * than the old one. Otherwise we will allocate new space in the
4591                  * bucket to store it.
4592                  */
4593                 xe = xs->here;
4594                 if (ocfs2_xattr_is_local(xe))
4595                         old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4596                 else
4597                         old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
4598
4599                 if (old >= value_size)
4600                         need = 0;
4601         }
4602
4603         free = xh_free_start - header_size;
4604         /*
4605          * We need to make sure the new name/value pair
4606          * can exist in the same block.
4607          */
4608         if (xh_free_start % blocksize < need)
4609                 free -= xh_free_start % blocksize;
4610
4611         mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
4612              "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
4613              " %u\n", xs->not_found,
4614              (unsigned long long)bucket_blkno(&xs->bucket),
4615              free, need, max_free, le16_to_cpu(xh->xh_free_start),
4616              le16_to_cpu(xh->xh_name_value_len));
4617
4618         if (free < need || count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
4619                 if (need <= max_free &&
4620                     count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
4621                         /*
4622                          * We can create the space by defragment. Since only the
4623                          * name/value will be moved, the xe shouldn't be changed
4624                          * in xs.
4625                          */
4626                         ret = ocfs2_defrag_xattr_bucket(inode, &xs->bucket);
4627                         if (ret) {
4628                                 mlog_errno(ret);
4629                                 goto out;
4630                         }
4631
4632                         xh_free_start = le16_to_cpu(xh->xh_free_start);
4633                         free = xh_free_start - header_size;
4634                         if (xh_free_start % blocksize < need)
4635                                 free -= xh_free_start % blocksize;
4636
4637                         if (free >= need)
4638                                 goto xattr_set;
4639
4640                         mlog(0, "Can't get enough space for xattr insert by "
4641                              "defragment. Need %u bytes, but we have %d, so "
4642                              "allocate new bucket for it.\n", need, free);
4643                 }
4644
4645                 /*
4646                  * We have to add new buckets or clusters and one
4647                  * allocation should leave us enough space for insert.
4648                  */
4649                 BUG_ON(allocation);
4650
4651                 /*
4652                  * We do not allow for overlapping ranges between buckets. And
4653                  * the maximum number of collisions we will allow for then is
4654                  * one bucket's worth, so check it here whether we need to
4655                  * add a new bucket for the insert.
4656                  */
4657                 ret = ocfs2_check_xattr_bucket_collision(inode,
4658                                                          &xs->bucket,
4659                                                          xi->name);
4660                 if (ret) {
4661                         mlog_errno(ret);
4662                         goto out;
4663                 }
4664
4665                 ret = ocfs2_add_new_xattr_bucket(inode,
4666                                                  xs->xattr_bh,
4667                                                  xs->bucket.bu_bhs[0]);
4668                 if (ret) {
4669                         mlog_errno(ret);
4670                         goto out;
4671                 }
4672
4673                 ocfs2_xattr_bucket_relse(inode, &xs->bucket);
4674                 memset(&xs->bucket, 0, sizeof(xs->bucket));
4675
4676                 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
4677                                                    xi->name_index,
4678                                                    xi->name, xs);
4679                 if (ret && ret != -ENODATA)
4680                         goto out;
4681                 xs->not_found = ret;
4682                 allocation = 1;
4683                 goto try_again;
4684         }
4685
4686 xattr_set:
4687         ret = ocfs2_xattr_set_in_bucket(inode, xi, xs);
4688 out:
4689         mlog_exit(ret);
4690         return ret;
4691 }
4692
4693 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
4694                                         struct ocfs2_xattr_bucket *bucket,
4695                                         void *para)
4696 {
4697         int ret = 0;
4698         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4699         u16 i;
4700         struct ocfs2_xattr_entry *xe;
4701
4702         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4703                 xe = &xh->xh_entries[i];
4704                 if (ocfs2_xattr_is_local(xe))
4705                         continue;
4706
4707                 ret = ocfs2_xattr_bucket_value_truncate(inode,
4708                                                         bucket->bu_bhs[0],
4709                                                         i, 0);
4710                 if (ret) {
4711                         mlog_errno(ret);
4712                         break;
4713                 }
4714         }
4715
4716         return ret;
4717 }
4718
4719 static int ocfs2_delete_xattr_index_block(struct inode *inode,
4720                                           struct buffer_head *xb_bh)
4721 {
4722         struct ocfs2_xattr_block *xb =
4723                         (struct ocfs2_xattr_block *)xb_bh->b_data;
4724         struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
4725         int ret = 0;
4726         u32 name_hash = UINT_MAX, e_cpos, num_clusters;
4727         u64 p_blkno;
4728
4729         if (le16_to_cpu(el->l_next_free_rec) == 0)
4730                 return 0;
4731
4732         while (name_hash > 0) {
4733                 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
4734                                           &e_cpos, &num_clusters, el);
4735                 if (ret) {
4736                         mlog_errno(ret);
4737                         goto out;
4738                 }
4739
4740                 ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
4741                                                   ocfs2_delete_xattr_in_bucket,
4742                                                   NULL);
4743                 if (ret) {
4744                         mlog_errno(ret);
4745                         goto out;
4746                 }
4747
4748                 ret = ocfs2_rm_xattr_cluster(inode, xb_bh,
4749                                              p_blkno, e_cpos, num_clusters);
4750                 if (ret) {
4751                         mlog_errno(ret);
4752                         break;
4753                 }
4754
4755                 if (e_cpos == 0)
4756                         break;
4757
4758                 name_hash = e_cpos - 1;
4759         }
4760
4761 out:
4762         return ret;
4763 }
4764
4765 /*
4766  * 'trusted' attributes support
4767  */
4768 static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
4769                                        size_t list_size, const char *name,
4770                                        size_t name_len)
4771 {
4772         const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
4773         const size_t total_len = prefix_len + name_len + 1;
4774
4775         if (list && total_len <= list_size) {
4776                 memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
4777                 memcpy(list + prefix_len, name, name_len);
4778                 list[prefix_len + name_len] = '\0';
4779         }
4780         return total_len;
4781 }
4782
4783 static int ocfs2_xattr_trusted_get(struct inode *inode, const char *name,
4784                                    void *buffer, size_t size)
4785 {
4786         if (strcmp(name, "") == 0)
4787                 return -EINVAL;
4788         return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name,
4789                                buffer, size);
4790 }
4791
4792 static int ocfs2_xattr_trusted_set(struct inode *inode, const char *name,
4793                                    const void *value, size_t size, int flags)
4794 {
4795         if (strcmp(name, "") == 0)
4796                 return -EINVAL;
4797
4798         return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, name, value,
4799                                size, flags);
4800 }
4801
4802 struct xattr_handler ocfs2_xattr_trusted_handler = {
4803         .prefix = XATTR_TRUSTED_PREFIX,
4804         .list   = ocfs2_xattr_trusted_list,
4805         .get    = ocfs2_xattr_trusted_get,
4806         .set    = ocfs2_xattr_trusted_set,
4807 };
4808
4809 /*
4810  * 'user' attributes support
4811  */
4812 static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
4813                                     size_t list_size, const char *name,
4814                                     size_t name_len)
4815 {
4816         const size_t prefix_len = XATTR_USER_PREFIX_LEN;
4817         const size_t total_len = prefix_len + name_len + 1;
4818         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4819
4820         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
4821                 return 0;
4822
4823         if (list && total_len <= list_size) {
4824                 memcpy(list, XATTR_USER_PREFIX, prefix_len);
4825                 memcpy(list + prefix_len, name, name_len);
4826                 list[prefix_len + name_len] = '\0';
4827         }
4828         return total_len;
4829 }
4830
4831 static int ocfs2_xattr_user_get(struct inode *inode, const char *name,
4832                                 void *buffer, size_t size)
4833 {
4834         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4835
4836         if (strcmp(name, "") == 0)
4837                 return -EINVAL;
4838         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
4839                 return -EOPNOTSUPP;
4840         return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name,
4841                                buffer, size);
4842 }
4843
4844 static int ocfs2_xattr_user_set(struct inode *inode, const char *name,
4845                                 const void *value, size_t size, int flags)
4846 {
4847         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4848
4849         if (strcmp(name, "") == 0)
4850                 return -EINVAL;
4851         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
4852                 return -EOPNOTSUPP;
4853
4854         return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, name, value,
4855                                size, flags);
4856 }
4857
4858 struct xattr_handler ocfs2_xattr_user_handler = {
4859         .prefix = XATTR_USER_PREFIX,
4860         .list   = ocfs2_xattr_user_list,
4861         .get    = ocfs2_xattr_user_get,
4862         .set    = ocfs2_xattr_user_set,
4863 };